diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:03:47 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-07-26 19:04:23 +0000 |
| commit | 7fa27ce4a07f19b07799a767fc29416f3b625afb (patch) | |
| tree | 27825c83636c4de341eb09a74f49f5d38a15d165 /clang/lib/CodeGen | |
| parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) | |
Diffstat (limited to 'clang/lib/CodeGen')
98 files changed, 18508 insertions, 16048 deletions
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp new file mode 100644 index 000000000000..1b56cf7c596d --- /dev/null +++ b/clang/lib/CodeGen/ABIInfo.cpp @@ -0,0 +1,231 @@ +//===- ABIInfo.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfo.h" +#include "ABIInfoImpl.h" + +using namespace clang; +using namespace clang::CodeGen; + +// Pin the vtable to this file. +ABIInfo::~ABIInfo() = default; + +CGCXXABI &ABIInfo::getCXXABI() const { return CGT.getCXXABI(); } + +ASTContext &ABIInfo::getContext() const { return CGT.getContext(); } + +llvm::LLVMContext &ABIInfo::getVMContext() const { + return CGT.getLLVMContext(); +} + +const llvm::DataLayout &ABIInfo::getDataLayout() const { + return CGT.getDataLayout(); +} + +const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } + +const CodeGenOptions &ABIInfo::getCodeGenOpts() const { + return CGT.getCodeGenOpts(); +} + +bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } + +bool ABIInfo::isOHOSFamily() const { + return getTarget().getTriple().isOHOSFamily(); +} + +Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return Address::invalid(); +} + +bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return false; +} + +bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const { + return false; +} + +bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { + // For compatibility with GCC, ignore empty bitfields in C++ mode. + return getContext().getLangOpts().CPlusPlus; +} + +bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, + uint64_t &Members) const { + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { + uint64_t NElements = AT->getSize().getZExtValue(); + if (NElements == 0) + return false; + if (!isHomogeneousAggregate(AT->getElementType(), Base, Members)) + return false; + Members *= NElements; + } else if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return false; + + Members = 0; + + // If this is a C++ record, check the properties of the record such as + // bases and ABI specific restrictions + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (!getCXXABI().isPermittedToBeHomogeneousAggregate(CXXRD)) + return false; + + for (const auto &I : CXXRD->bases()) { + // Ignore empty records. + if (isEmptyRecord(getContext(), I.getType(), true)) + continue; + + uint64_t FldMembers; + if (!isHomogeneousAggregate(I.getType(), Base, FldMembers)) + return false; + + Members += FldMembers; + } + } + + for (const auto *FD : RD->fields()) { + // Ignore (non-zero arrays of) empty records. + QualType FT = FD->getType(); + while (const ConstantArrayType *AT = + getContext().getAsConstantArrayType(FT)) { + if (AT->getSize().getZExtValue() == 0) + return false; + FT = AT->getElementType(); + } + if (isEmptyRecord(getContext(), FT, true)) + continue; + + if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() && + FD->isZeroLengthBitField(getContext())) + continue; + + uint64_t FldMembers; + if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers)) + return false; + + Members = (RD->isUnion() ? + std::max(Members, FldMembers) : Members + FldMembers); + } + + if (!Base) + return false; + + // Ensure there is no padding. + if (getContext().getTypeSize(Base) * Members != + getContext().getTypeSize(Ty)) + return false; + } else { + Members = 1; + if (const ComplexType *CT = Ty->getAs<ComplexType>()) { + Members = 2; + Ty = CT->getElementType(); + } + + // Most ABIs only support float, double, and some vector type widths. + if (!isHomogeneousAggregateBaseType(Ty)) + return false; + + // The base type must be the same for all members. Types that + // agree in both total size and mode (float vs. vector) are + // treated as being equivalent here. + const Type *TyPtr = Ty.getTypePtr(); + if (!Base) { + Base = TyPtr; + // If it's a non-power-of-2 vector, its size is already a power-of-2, + // so make sure to widen it explicitly. + if (const VectorType *VT = Base->getAs<VectorType>()) { + QualType EltTy = VT->getElementType(); + unsigned NumElements = + getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); + Base = getContext() + .getVectorType(EltTy, NumElements, VT->getVectorKind()) + .getTypePtr(); + } + } + + if (Base->isVectorType() != TyPtr->isVectorType() || + getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) + return false; + } + return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members); +} + +bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { + if (getContext().isPromotableIntegerType(Ty)) + return true; + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy)) + return true; + + return false; +} + +ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal, + bool Realign, + llvm::Type *Padding) const { + return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal, + Realign, Padding); +} + +ABIArgInfo ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, + bool Realign) const { + return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty), + /*ByVal*/ false, Realign); +} + +// Pin the vtable to this file. +SwiftABIInfo::~SwiftABIInfo() = default; + +/// Does the given lowering require more than the given number of +/// registers when expanded? +/// +/// This is intended to be the basis of a reasonable basic implementation +/// of should{Pass,Return}Indirectly. +/// +/// For most targets, a limit of four total registers is reasonable; this +/// limits the amount of code required in order to move around the value +/// in case it wasn't produced immediately prior to the call by the caller +/// (or wasn't produced in exactly the right registers) or isn't used +/// immediately within the callee. But some targets may need to further +/// limit the register count due to an inability to support that many +/// return registers. +bool SwiftABIInfo::occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes, + unsigned maxAllRegisters) const { + unsigned intCount = 0, fpCount = 0; + for (llvm::Type *type : scalarTypes) { + if (type->isPointerTy()) { + intCount++; + } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) { + auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default); + intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; + } else { + assert(type->isVectorTy() || type->isFloatingPointTy()); + fpCount++; + } + } + + return (intCount + fpCount > maxAllRegisters); +} + +bool SwiftABIInfo::shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, + bool AsReturnValue) const { + return occupiesMoreThan(ComponentTys, /*total=*/4); +} + +bool SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const { + // The default implementation of this assumes that the target guarantees + // 128-bit SIMD support but nothing more. + return (VectorSize.getQuantity() > 8 && VectorSize.getQuantity() <= 16); +} diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 755d2aaa7beb..b9a5ef6e4366 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -15,130 +15,134 @@ #include "llvm/IR/Type.h" namespace llvm { - class Value; - class LLVMContext; - class DataLayout; - class Type; -} +class Value; +class LLVMContext; +class DataLayout; +class Type; +} // namespace llvm namespace clang { - class ASTContext; - class CodeGenOptions; - class TargetInfo; +class ASTContext; +class CodeGenOptions; +class TargetInfo; namespace CodeGen { - class ABIArgInfo; - class Address; - class CGCXXABI; - class CGFunctionInfo; - class CodeGenFunction; - class CodeGenTypes; +class ABIArgInfo; +class Address; +class CGCXXABI; +class CGFunctionInfo; +class CodeGenFunction; +class CodeGenTypes; - // FIXME: All of this stuff should be part of the target interface - // somehow. It is currently here because it is not clear how to factor - // the targets to support this, since the Targets currently live in a - // layer below types n'stuff. +// FIXME: All of this stuff should be part of the target interface +// somehow. It is currently here because it is not clear how to factor +// the targets to support this, since the Targets currently live in a +// layer below types n'stuff. +/// ABIInfo - Target specific hooks for defining how a type should be +/// passed or returned from functions. +class ABIInfo { +protected: + CodeGen::CodeGenTypes &CGT; + llvm::CallingConv::ID RuntimeCC; - /// ABIInfo - Target specific hooks for defining how a type should be - /// passed or returned from functions. - class ABIInfo { - protected: - CodeGen::CodeGenTypes &CGT; - llvm::CallingConv::ID RuntimeCC; - public: - ABIInfo(CodeGen::CodeGenTypes &cgt) - : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {} +public: + ABIInfo(CodeGen::CodeGenTypes &cgt) + : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {} - virtual ~ABIInfo(); + virtual ~ABIInfo(); - virtual bool allowBFloatArgsAndRet() const { return false; } + virtual bool allowBFloatArgsAndRet() const { return false; } - CodeGen::CGCXXABI &getCXXABI() const; - ASTContext &getContext() const; - llvm::LLVMContext &getVMContext() const; - const llvm::DataLayout &getDataLayout() const; - const TargetInfo &getTarget() const; - const CodeGenOptions &getCodeGenOpts() const; + CodeGen::CGCXXABI &getCXXABI() const; + ASTContext &getContext() const; + llvm::LLVMContext &getVMContext() const; + const llvm::DataLayout &getDataLayout() const; + const TargetInfo &getTarget() const; + const CodeGenOptions &getCodeGenOpts() const; - /// Return the calling convention to use for system runtime - /// functions. - llvm::CallingConv::ID getRuntimeCC() const { - return RuntimeCC; - } + /// Return the calling convention to use for system runtime + /// functions. + llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; } - virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0; + virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0; - /// EmitVAArg - Emit the target dependent code to load a value of - /// \arg Ty from the va_list pointed to by \arg VAListAddr. + /// EmitVAArg - Emit the target dependent code to load a value of + /// \arg Ty from the va_list pointed to by \arg VAListAddr. - // FIXME: This is a gaping layering violation if we wanted to drop - // the ABI information any lower than CodeGen. Of course, for - // VAArg handling it has to be at this level; there is no way to - // abstract this out. - virtual CodeGen::Address EmitVAArg(CodeGen::CodeGenFunction &CGF, - CodeGen::Address VAListAddr, - QualType Ty) const = 0; + // FIXME: This is a gaping layering violation if we wanted to drop + // the ABI information any lower than CodeGen. Of course, for + // VAArg handling it has to be at this level; there is no way to + // abstract this out. + virtual CodeGen::Address EmitVAArg(CodeGen::CodeGenFunction &CGF, + CodeGen::Address VAListAddr, + QualType Ty) const = 0; + + bool isAndroid() const; + bool isOHOSFamily() const; - bool isAndroid() const; + /// Emit the target dependent code to load a value of + /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr. + virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF, + CodeGen::Address VAListAddr, + QualType Ty) const; - /// Emit the target dependent code to load a value of - /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr. - virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF, - CodeGen::Address VAListAddr, - QualType Ty) const; + virtual bool isHomogeneousAggregateBaseType(QualType Ty) const; - virtual bool isHomogeneousAggregateBaseType(QualType Ty) const; + virtual bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const; + virtual bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const; - virtual bool isHomogeneousAggregateSmallEnough(const Type *Base, - uint64_t Members) const; - virtual bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const; + /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous + /// aggregate. Base is set to the base element type, and Members is set + /// to the number of base elements. + bool isHomogeneousAggregate(QualType Ty, const Type *&Base, + uint64_t &Members) const; - bool isHomogeneousAggregate(QualType Ty, const Type *&Base, - uint64_t &Members) const; + // Implement the Type::IsPromotableIntegerType for ABI specific needs. The + // only difference is that this considers bit-precise integer types as well. + bool isPromotableIntegerTypeForABI(QualType Ty) const; - // Implement the Type::IsPromotableIntegerType for ABI specific needs. The - // only difference is that this considers bit-precise integer types as well. - bool isPromotableIntegerTypeForABI(QualType Ty) const; + /// A convenience method to return an indirect ABIArgInfo with an + /// expected alignment equal to the ABI alignment of the given type. + CodeGen::ABIArgInfo + getNaturalAlignIndirect(QualType Ty, bool ByVal = true, bool Realign = false, + llvm::Type *Padding = nullptr) const; - /// A convenience method to return an indirect ABIArgInfo with an - /// expected alignment equal to the ABI alignment of the given type. - CodeGen::ABIArgInfo - getNaturalAlignIndirect(QualType Ty, bool ByVal = true, - bool Realign = false, - llvm::Type *Padding = nullptr) const; + CodeGen::ABIArgInfo getNaturalAlignIndirectInReg(QualType Ty, + bool Realign = false) const; +}; - CodeGen::ABIArgInfo - getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const; - }; +/// Target specific hooks for defining how a type should be passed or returned +/// from functions with one of the Swift calling conventions. +class SwiftABIInfo { +protected: + CodeGenTypes &CGT; + bool SwiftErrorInRegister; - /// Target specific hooks for defining how a type should be passed or returned - /// from functions with one of the Swift calling conventions. - class SwiftABIInfo { - protected: - CodeGenTypes &CGT; - bool SwiftErrorInRegister; + bool occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes, + unsigned maxAllRegisters) const; - public: - SwiftABIInfo(CodeGen::CodeGenTypes &CGT, bool SwiftErrorInRegister) - : CGT(CGT), SwiftErrorInRegister(SwiftErrorInRegister) {} +public: + SwiftABIInfo(CodeGen::CodeGenTypes &CGT, bool SwiftErrorInRegister) + : CGT(CGT), SwiftErrorInRegister(SwiftErrorInRegister) {} - virtual ~SwiftABIInfo(); + virtual ~SwiftABIInfo(); - /// Returns true if an aggregate which expands to the given type sequence - /// should be passed / returned indirectly. - virtual bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, - bool AsReturnValue) const; + /// Returns true if an aggregate which expands to the given type sequence + /// should be passed / returned indirectly. + virtual bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, + bool AsReturnValue) const; - /// Returns true if the given vector type is legal from Swift's calling - /// convention perspective. - virtual bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, - unsigned NumElts) const; + /// Returns true if the given vector type is legal from Swift's calling + /// convention perspective. + virtual bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const; - /// Returns true if swifterror is lowered to a register by the target ABI. - bool isSwiftErrorInRegister() const { return SwiftErrorInRegister; }; - }; -} // end namespace CodeGen -} // end namespace clang + /// Returns true if swifterror is lowered to a register by the target ABI. + bool isSwiftErrorInRegister() const { return SwiftErrorInRegister; }; +}; +} // end namespace CodeGen +} // end namespace clang #endif diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp new file mode 100644 index 000000000000..7c30cecfdb9b --- /dev/null +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -0,0 +1,452 @@ +//===- ABIInfoImpl.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" + +using namespace clang; +using namespace clang::CodeGen; + +// Pin the vtable to this file. +DefaultABIInfo::~DefaultABIInfo() = default; + +ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + return getNaturalAlignIndirect(Ty); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + ASTContext &Context = getContext(); + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > + Context.getTypeSize(Context.getTargetInfo().hasInt128Type() + ? Context.Int128Ty + : Context.LongLongTy)) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > + getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type() + ? getContext().Int128Ty + : getContext().LongLongTy)) + return getNaturalAlignIndirect(RetTy); + + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +void DefaultABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); +} + +Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); +} + +ABIArgInfo CodeGen::coerceToIntArray(QualType Ty, ASTContext &Context, + llvm::LLVMContext &LLVMContext) { + // Alignment and Size are measured in bits. + const uint64_t Size = Context.getTypeSize(Ty); + const uint64_t Alignment = Context.getTypeAlign(Ty); + llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment); + const uint64_t NumElements = (Size + Alignment - 1) / Alignment; + return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); +} + +void CodeGen::AssignToArrayRange(CodeGen::CGBuilderTy &Builder, + llvm::Value *Array, llvm::Value *Value, + unsigned FirstIndex, unsigned LastIndex) { + // Alternatively, we could emit this as a loop in the source. + for (unsigned I = FirstIndex; I <= LastIndex; ++I) { + llvm::Value *Cell = + Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I); + Builder.CreateAlignedStore(Value, Cell, CharUnits::One()); + } +} + +bool CodeGen::isAggregateTypeForABI(QualType T) { + return !CodeGenFunction::hasScalarEvaluationKind(T) || + T->isMemberFunctionPointerType(); +} + +llvm::Type *CodeGen::getVAListElementType(CodeGenFunction &CGF) { + return CGF.ConvertTypeForMem( + CGF.getContext().getBuiltinVaListType()->getPointeeType()); +} + +CGCXXABI::RecordArgABI CodeGen::getRecordArgABI(const RecordType *RT, + CGCXXABI &CXXABI) { + const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()); + if (!RD) { + if (!RT->getDecl()->canPassInRegisters()) + return CGCXXABI::RAA_Indirect; + return CGCXXABI::RAA_Default; + } + return CXXABI.getRecordArgABI(RD); +} + +CGCXXABI::RecordArgABI CodeGen::getRecordArgABI(QualType T, CGCXXABI &CXXABI) { + const RecordType *RT = T->getAs<RecordType>(); + if (!RT) + return CGCXXABI::RAA_Default; + return getRecordArgABI(RT, CXXABI); +} + +bool CodeGen::classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, + const ABIInfo &Info) { + QualType Ty = FI.getReturnType(); + + if (const auto *RT = Ty->getAs<RecordType>()) + if (!isa<CXXRecordDecl>(RT->getDecl()) && + !RT->getDecl()->canPassInRegisters()) { + FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty); + return true; + } + + return CXXABI.classifyReturnType(FI); +} + +QualType CodeGen::useFirstFieldIfTransparentUnion(QualType Ty) { + if (const RecordType *UT = Ty->getAsUnionType()) { + const RecordDecl *UD = UT->getDecl(); + if (UD->hasAttr<TransparentUnionAttr>()) { + assert(!UD->field_empty() && "sema created an empty transparent union"); + return UD->field_begin()->getType(); + } + } + return Ty; +} + +llvm::Value *CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction &CGF, + llvm::Value *Ptr, + CharUnits Align) { + // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; + llvm::Value *RoundUp = CGF.Builder.CreateConstInBoundsGEP1_32( + CGF.Builder.getInt8Ty(), Ptr, Align.getQuantity() - 1); + return CGF.Builder.CreateIntrinsic( + llvm::Intrinsic::ptrmask, {CGF.AllocaInt8PtrTy, CGF.IntPtrTy}, + {RoundUp, llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())}, + nullptr, Ptr->getName() + ".aligned"); +} + +Address +CodeGen::emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Address VAListAddr, + llvm::Type *DirectTy, CharUnits DirectSize, + CharUnits DirectAlign, CharUnits SlotSize, + bool AllowHigherAlign, bool ForceRightAdjust) { + // Cast the element type to i8* if necessary. Some platforms define + // va_list as a struct containing an i8* instead of just an i8*. + if (VAListAddr.getElementType() != CGF.Int8PtrTy) + VAListAddr = VAListAddr.withElementType(CGF.Int8PtrTy); + + llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur"); + + // If the CC aligns values higher than the slot size, do so if needed. + Address Addr = Address::invalid(); + if (AllowHigherAlign && DirectAlign > SlotSize) { + Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), + CGF.Int8Ty, DirectAlign); + } else { + Addr = Address(Ptr, CGF.Int8Ty, SlotSize); + } + + // Advance the pointer past the argument, then store that back. + CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); + Address NextPtr = + CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next"); + CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr); + + // If the argument is smaller than a slot, and this is a big-endian + // target, the argument will be right-adjusted in its slot. + if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && + (!DirectTy->isStructTy() || ForceRightAdjust)) { + Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); + } + + return Addr.withElementType(DirectTy); +} + +Address CodeGen::emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType ValueTy, bool IsIndirect, + TypeInfoChars ValueInfo, + CharUnits SlotSizeAndAlign, + bool AllowHigherAlign, + bool ForceRightAdjust) { + // The size and alignment of the value that was passed directly. + CharUnits DirectSize, DirectAlign; + if (IsIndirect) { + DirectSize = CGF.getPointerSize(); + DirectAlign = CGF.getPointerAlign(); + } else { + DirectSize = ValueInfo.Width; + DirectAlign = ValueInfo.Align; + } + + // Cast the address we've calculated to the right type. + llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy), *ElementTy = DirectTy; + if (IsIndirect) { + unsigned AllocaAS = CGF.CGM.getDataLayout().getAllocaAddrSpace(); + DirectTy = llvm::PointerType::get(CGF.getLLVMContext(), AllocaAS); + } + + Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, + DirectAlign, SlotSizeAndAlign, + AllowHigherAlign, ForceRightAdjust); + + if (IsIndirect) { + Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align); + } + + return Addr; +} + +Address CodeGen::emitMergePHI(CodeGenFunction &CGF, Address Addr1, + llvm::BasicBlock *Block1, Address Addr2, + llvm::BasicBlock *Block2, + const llvm::Twine &Name) { + assert(Addr1.getType() == Addr2.getType()); + llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name); + PHI->addIncoming(Addr1.getPointer(), Block1); + PHI->addIncoming(Addr2.getPointer(), Block2); + CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment()); + return Address(PHI, Addr1.getElementType(), Align); +} + +bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD, + bool AllowArrays) { + if (FD->isUnnamedBitfield()) + return true; + + QualType FT = FD->getType(); + + // Constant arrays of empty records count as empty, strip them off. + // Constant arrays of zero length always count as empty. + bool WasArray = false; + if (AllowArrays) + while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { + if (AT->getSize() == 0) + return true; + FT = AT->getElementType(); + // The [[no_unique_address]] special case below does not apply to + // arrays of C++ empty records, so we need to remember this fact. + WasArray = true; + } + + const RecordType *RT = FT->getAs<RecordType>(); + if (!RT) + return false; + + // C++ record fields are never empty, at least in the Itanium ABI. + // + // FIXME: We should use a predicate for whether this behavior is true in the + // current ABI. + // + // The exception to the above rule are fields marked with the + // [[no_unique_address]] attribute (since C++20). Those do count as empty + // according to the Itanium ABI. The exception applies only to records, + // not arrays of records, so we must also check whether we stripped off an + // array type above. + if (isa<CXXRecordDecl>(RT->getDecl()) && + (WasArray || !FD->hasAttr<NoUniqueAddressAttr>())) + return false; + + return isEmptyRecord(Context, FT, AllowArrays); +} + +bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { + const RecordType *RT = T->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return false; + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + for (const auto &I : CXXRD->bases()) + if (!isEmptyRecord(Context, I.getType(), true)) + return false; + + for (const auto *I : RD->fields()) + if (!isEmptyField(Context, I, AllowArrays)) + return false; + return true; +} + +const Type *CodeGen::isSingleElementStruct(QualType T, ASTContext &Context) { + const RecordType *RT = T->getAs<RecordType>(); + if (!RT) + return nullptr; + + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return nullptr; + + const Type *Found = nullptr; + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const auto &I : CXXRD->bases()) { + // Ignore empty records. + if (isEmptyRecord(Context, I.getType(), true)) + continue; + + // If we already found an element then this isn't a single-element struct. + if (Found) + return nullptr; + + // If this is non-empty and not a single element struct, the composite + // cannot be a single element struct. + Found = isSingleElementStruct(I.getType(), Context); + if (!Found) + return nullptr; + } + } + + // Check for single element. + for (const auto *FD : RD->fields()) { + QualType FT = FD->getType(); + + // Ignore empty fields. + if (isEmptyField(Context, FD, true)) + continue; + + // If we already found an element then this isn't a single-element + // struct. + if (Found) + return nullptr; + + // Treat single element arrays as the element. + while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { + if (AT->getSize().getZExtValue() != 1) + break; + FT = AT->getElementType(); + } + + if (!isAggregateTypeForABI(FT)) { + Found = FT.getTypePtr(); + } else { + Found = isSingleElementStruct(FT, Context); + if (!Found) + return nullptr; + } + } + + // We don't consider a struct a single-element struct if it has + // padding beyond the element type. + if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T)) + return nullptr; + + return Found; +} + +Address CodeGen::EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty, const ABIArgInfo &AI) { + // This default implementation defers to the llvm backend's va_arg + // instruction. It can handle only passing arguments directly + // (typically only handled in the backend for primitive types), or + // aggregates passed indirectly by pointer (NOTE: if the "byval" + // flag has ABI impact in the callee, this implementation cannot + // work.) + + // Only a few cases are covered here at the moment -- those needed + // by the default abi. + llvm::Value *Val; + + if (AI.isIndirect()) { + assert(!AI.getPaddingType() && + "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); + assert( + !AI.getIndirectRealign() && + "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!"); + + auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty); + CharUnits TyAlignForABI = TyInfo.Align; + + llvm::Type *ElementTy = CGF.ConvertTypeForMem(Ty); + llvm::Type *BaseTy = llvm::PointerType::getUnqual(ElementTy); + llvm::Value *Addr = + CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy); + return Address(Addr, ElementTy, TyAlignForABI); + } else { + assert((AI.isDirect() || AI.isExtend()) && + "Unexpected ArgInfo Kind in generic VAArg emitter!"); + + assert(!AI.getInReg() && + "Unexpected InReg seen in arginfo in generic VAArg emitter!"); + assert(!AI.getPaddingType() && + "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); + assert(!AI.getDirectOffset() && + "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!"); + assert(!AI.getCoerceToType() && + "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!"); + + Address Temp = CGF.CreateMemTemp(Ty, "varet"); + Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), + CGF.ConvertTypeForMem(Ty)); + CGF.Builder.CreateStore(Val, Temp); + return Temp; + } +} + +bool CodeGen::isSIMDVectorType(ASTContext &Context, QualType Ty) { + return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128; +} + +bool CodeGen::isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) { + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + for (const auto &I : CXXRD->bases()) + if (!isRecordWithSIMDVectorType(Context, I.getType())) + return false; + + for (const auto *i : RD->fields()) { + QualType FT = i->getType(); + + if (isSIMDVectorType(Context, FT)) + return true; + + if (isRecordWithSIMDVectorType(Context, FT)) + return true; + } + + return false; +} diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h new file mode 100644 index 000000000000..5f0cc289af68 --- /dev/null +++ b/clang/lib/CodeGen/ABIInfoImpl.h @@ -0,0 +1,152 @@ +//===- ABIInfoImpl.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFOIMPL_H +#define LLVM_CLANG_LIB_CODEGEN_ABIINFOIMPL_H + +#include "ABIInfo.h" +#include "CGCXXABI.h" + +namespace clang::CodeGen { + +/// DefaultABIInfo - The default implementation for ABI specific +/// details. This implementation provides information which results in +/// self-consistent and sensible LLVM IR generation, but does not +/// conform to any particular ABI. +class DefaultABIInfo : public ABIInfo { +public: + DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} + + virtual ~DefaultABIInfo(); + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; + + void computeInfo(CGFunctionInfo &FI) const override; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +// Helper for coercing an aggregate argument or return value into an integer +// array of the same size (including padding) and alignment. This alternate +// coercion happens only for the RenderScript ABI and can be removed after +// runtimes that rely on it are no longer supported. +// +// RenderScript assumes that the size of the argument / return value in the IR +// is the same as the size of the corresponding qualified type. This helper +// coerces the aggregate type into an array of the same size (including +// padding). This coercion is used in lieu of expansion of struct members or +// other canonical coercions that return a coerced-type of larger size. +// +// Ty - The argument / return value type +// Context - The associated ASTContext +// LLVMContext - The associated LLVMContext +ABIArgInfo coerceToIntArray(QualType Ty, ASTContext &Context, + llvm::LLVMContext &LLVMContext); + +void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, + llvm::Value *Value, unsigned FirstIndex, + unsigned LastIndex); + +bool isAggregateTypeForABI(QualType T); + +llvm::Type *getVAListElementType(CodeGenFunction &CGF); + +CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI); + +CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI); + +bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, + const ABIInfo &Info); + +/// Pass transparent unions as if they were the type of the first element. Sema +/// should ensure that all elements of the union have the same "machine type". +QualType useFirstFieldIfTransparentUnion(QualType Ty); + +// Dynamically round a pointer up to a multiple of the given alignment. +llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, + llvm::Value *Ptr, CharUnits Align); + +/// Emit va_arg for a platform using the common void* representation, +/// where arguments are simply emitted in an array of slots on the stack. +/// +/// This version implements the core direct-value passing rules. +/// +/// \param SlotSize - The size and alignment of a stack slot. +/// Each argument will be allocated to a multiple of this number of +/// slots, and all the slots will be aligned to this value. +/// \param AllowHigherAlign - The slot alignment is not a cap; +/// an argument type with an alignment greater than the slot size +/// will be emitted on a higher-alignment address, potentially +/// leaving one or more empty slots behind as padding. If this +/// is false, the returned address might be less-aligned than +/// DirectAlign. +/// \param ForceRightAdjust - Default is false. On big-endian platform and +/// if the argument is smaller than a slot, set this flag will force +/// right-adjust the argument in its slot irrespective of the type. +Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Address VAListAddr, + llvm::Type *DirectTy, CharUnits DirectSize, + CharUnits DirectAlign, CharUnits SlotSize, + bool AllowHigherAlign, + bool ForceRightAdjust = false); + +/// Emit va_arg for a platform using the common void* representation, +/// where arguments are simply emitted in an array of slots on the stack. +/// +/// \param IsIndirect - Values of this type are passed indirectly. +/// \param ValueInfo - The size and alignment of this type, generally +/// computed with getContext().getTypeInfoInChars(ValueTy). +/// \param SlotSizeAndAlign - The size and alignment of a stack slot. +/// Each argument will be allocated to a multiple of this number of +/// slots, and all the slots will be aligned to this value. +/// \param AllowHigherAlign - The slot alignment is not a cap; +/// an argument type with an alignment greater than the slot size +/// will be emitted on a higher-alignment address, potentially +/// leaving one or more empty slots behind as padding. +/// \param ForceRightAdjust - Default is false. On big-endian platform and +/// if the argument is smaller than a slot, set this flag will force +/// right-adjust the argument in its slot irrespective of the type. +Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType ValueTy, bool IsIndirect, + TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, + bool AllowHigherAlign, bool ForceRightAdjust = false); + +Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, + llvm::BasicBlock *Block1, Address Addr2, + llvm::BasicBlock *Block2, const llvm::Twine &Name = ""); + +/// isEmptyField - Return true iff a the field is "empty", that is it +/// is an unnamed bit-field or an (array of) empty record(s). +bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays); + +/// isEmptyRecord - Return true iff a structure contains only empty +/// fields. Note that a structure with a flexible array member is not +/// considered empty. +bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); + +/// isSingleElementStruct - Determine if a structure is a "single +/// element struct", i.e. it has exactly one non-empty field or +/// exactly one field which is itself a single element +/// struct. Structures with flexible array members are never +/// considered single element structs. +/// +/// \return The field declaration for the single non-empty field, if +/// it exists. +const Type *isSingleElementStruct(QualType T, ASTContext &Context); + +Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, + const ABIArgInfo &AI); + +bool isSIMDVectorType(ASTContext &Context, QualType Ty); + +bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty); + +} // namespace clang::CodeGen + +#endif // LLVM_CLANG_LIB_CODEGEN_ABIINFOIMPL_H diff --git a/clang/lib/CodeGen/Address.h b/clang/lib/CodeGen/Address.h index bddeac1d6dcb..cf48df8f5e73 100644 --- a/clang/lib/CodeGen/Address.h +++ b/clang/lib/CodeGen/Address.h @@ -22,77 +22,35 @@ namespace clang { namespace CodeGen { -// We try to save some space by using 6 bits over two PointerIntPairs to store -// the alignment. However, some arches don't support 3 bits in a PointerIntPair -// so we fallback to storing the alignment separately. -template <typename T, bool = alignof(llvm::Value *) >= 8> class AddressImpl {}; - -template <typename T> class AddressImpl<T, false> { - llvm::Value *Pointer; - llvm::Type *ElementType; - CharUnits Alignment; - -public: - AddressImpl(llvm::Value *Pointer, llvm::Type *ElementType, - CharUnits Alignment) - : Pointer(Pointer), ElementType(ElementType), Alignment(Alignment) {} - llvm::Value *getPointer() const { return Pointer; } - llvm::Type *getElementType() const { return ElementType; } - CharUnits getAlignment() const { return Alignment; } -}; - -template <typename T> class AddressImpl<T, true> { - // Int portion stores upper 3 bits of the log of the alignment. - llvm::PointerIntPair<llvm::Value *, 3, unsigned> Pointer; - // Int portion stores lower 3 bits of the log of the alignment. - llvm::PointerIntPair<llvm::Type *, 3, unsigned> ElementType; - -public: - AddressImpl(llvm::Value *Pointer, llvm::Type *ElementType, - CharUnits Alignment) - : Pointer(Pointer), ElementType(ElementType) { - if (Alignment.isZero()) - return; - // Currently the max supported alignment is much less than 1 << 63 and is - // guaranteed to be a power of 2, so we can store the log of the alignment - // into 6 bits. - assert(Alignment.isPowerOfTwo() && "Alignment cannot be zero"); - auto AlignLog = llvm::Log2_64(Alignment.getQuantity()); - assert(AlignLog < (1 << 6) && "cannot fit alignment into 6 bits"); - this->Pointer.setInt(AlignLog >> 3); - this->ElementType.setInt(AlignLog & 7); - } - llvm::Value *getPointer() const { return Pointer.getPointer(); } - llvm::Type *getElementType() const { return ElementType.getPointer(); } - CharUnits getAlignment() const { - unsigned AlignLog = (Pointer.getInt() << 3) | ElementType.getInt(); - return CharUnits::fromQuantity(CharUnits::QuantityType(1) << AlignLog); - } -}; +// Indicates whether a pointer is known not to be null. +enum KnownNonNull_t { NotKnownNonNull, KnownNonNull }; /// An aligned address. class Address { - AddressImpl<void> A; + llvm::PointerIntPair<llvm::Value *, 1, bool> PointerAndKnownNonNull; + llvm::Type *ElementType; + CharUnits Alignment; protected: - Address(std::nullptr_t) : A(nullptr, nullptr, CharUnits::Zero()) {} + Address(std::nullptr_t) : ElementType(nullptr) {} public: - Address(llvm::Value *Pointer, llvm::Type *ElementType, CharUnits Alignment) - : A(Pointer, ElementType, Alignment) { + Address(llvm::Value *Pointer, llvm::Type *ElementType, CharUnits Alignment, + KnownNonNull_t IsKnownNonNull = NotKnownNonNull) + : PointerAndKnownNonNull(Pointer, IsKnownNonNull), + ElementType(ElementType), Alignment(Alignment) { assert(Pointer != nullptr && "Pointer cannot be null"); assert(ElementType != nullptr && "Element type cannot be null"); - assert(llvm::cast<llvm::PointerType>(Pointer->getType()) - ->isOpaqueOrPointeeTypeMatches(ElementType) && - "Incorrect pointer element type"); } static Address invalid() { return Address(nullptr); } - bool isValid() const { return A.getPointer() != nullptr; } + bool isValid() const { + return PointerAndKnownNonNull.getPointer() != nullptr; + } llvm::Value *getPointer() const { assert(isValid()); - return A.getPointer(); + return PointerAndKnownNonNull.getPointer(); } /// Return the type of the pointer value. @@ -103,7 +61,7 @@ public: /// Return the type of the values stored in this address. llvm::Type *getElementType() const { assert(isValid()); - return A.getElementType(); + return ElementType; } /// Return the address space that this address resides in. @@ -119,19 +77,41 @@ public: /// Return the alignment of this pointer. CharUnits getAlignment() const { assert(isValid()); - return A.getAlignment(); + return Alignment; } /// Return address with different pointer, but same element type and /// alignment. - Address withPointer(llvm::Value *NewPointer) const { - return Address(NewPointer, getElementType(), getAlignment()); + Address withPointer(llvm::Value *NewPointer, + KnownNonNull_t IsKnownNonNull) const { + return Address(NewPointer, getElementType(), getAlignment(), + IsKnownNonNull); } /// Return address with different alignment, but same pointer and element /// type. Address withAlignment(CharUnits NewAlignment) const { - return Address(getPointer(), getElementType(), NewAlignment); + return Address(getPointer(), getElementType(), NewAlignment, + isKnownNonNull()); + } + + /// Return address with different element type, but same pointer and + /// alignment. + Address withElementType(llvm::Type *ElemTy) const { + return Address(getPointer(), ElemTy, getAlignment(), isKnownNonNull()); + } + + /// Whether the pointer is known not to be null. + KnownNonNull_t isKnownNonNull() const { + assert(isValid()); + return (KnownNonNull_t)PointerAndKnownNonNull.getInt(); + } + + /// Set the non-null bit. + Address setKnownNonNull() { + assert(isValid()); + PointerAndKnownNonNull.setInt(true); + return *this; } }; @@ -153,10 +133,8 @@ public: return llvm::cast<llvm::Constant>(Address::getPointer()); } - ConstantAddress getElementBitCast(llvm::Type *ElemTy) const { - llvm::Constant *BitCast = llvm::ConstantExpr::getBitCast( - getPointer(), ElemTy->getPointerTo(getAddressSpace())); - return ConstantAddress(BitCast, ElemTy, getAlignment()); + ConstantAddress withElementType(llvm::Type *ElemTy) const { + return ConstantAddress(getPointer(), ElemTy, getAlignment()); } static bool isaImpl(Address addr) { diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 10d6bff25e6d..cda03d69522d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -17,10 +17,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -39,7 +37,6 @@ #include "llvm/IRPrinter/IRPrintingPasses.h" #include "llvm/LTO/LTOBackend.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Object/OffloadBinary.h" #include "llvm/Passes/PassBuilder.h" @@ -52,15 +49,13 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/Coroutines/CoroCleanup.h" -#include "llvm/Transforms/Coroutines/CoroEarly.h" -#include "llvm/Transforms/Coroutines/CoroElide.h" -#include "llvm/Transforms/Coroutines/CoroSplit.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/TargetParser/SubtargetFeature.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/IPO/EmbedBitcodePass.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" @@ -79,18 +74,12 @@ #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/NameAnonGlobals.h" -#include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> #include <optional> using namespace clang; @@ -123,6 +112,7 @@ class EmitAssemblyHelper { const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; Module *TheModule; + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS; Timer CodeGenerationTime; @@ -187,9 +177,10 @@ public: const HeaderSearchOptions &HeaderSearchOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, Module *M) + const LangOptions &LOpts, Module *M, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) : Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts), - TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), + TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), VFS(std::move(VFS)), CodeGenerationTime("codegen", "Code Generation Time"), TargetTriple(TheModule->getTargetTriple()) {} @@ -294,6 +285,10 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, TLII->addVectorizableFunctionsFromVecLib( TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple); break; + case CodeGenOptions::ArmPL: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL, + TargetTriple); + break; default: break; } @@ -377,8 +372,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.BinutilsVersion = llvm::TargetMachine::parseBinutilsVersion(CodeGenOpts.BinutilsVersion); Options.UseInitArray = CodeGenOpts.UseInitArray; - Options.LowerGlobalDtorsViaCxaAtExit = - CodeGenOpts.RegisterGlobalDtorsWithAtExit; Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS; Options.CompressDebugSections = CodeGenOpts.getCompressDebugSections(); Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations; @@ -434,20 +427,20 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, CodeGenOpts.UniqueBasicBlockSectionNames; Options.TLSSize = CodeGenOpts.TLSSize; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; - Options.ExplicitEmulatedTLS = true; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.StackUsageOutput = CodeGenOpts.StackUsageOutput; Options.EmitAddrsig = CodeGenOpts.Addrsig; Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; - Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI; - Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; + Options.EnableAIXExtendedAltivecABI = LangOpts.EnableAIXExtendedAltivecABI; + Options.XRayFunctionIndex = CodeGenOpts.XRayFunctionIndex; Options.LoopAlignment = CodeGenOpts.LoopAlignment; Options.DebugStrictDwarf = CodeGenOpts.DebugStrictDwarf; Options.ObjectFilenameForDebug = CodeGenOpts.ObjectFilenameForDebug; Options.Hotpatch = CodeGenOpts.HotPatch; Options.JMCInstrument = CodeGenOpts.JMCInstrument; + Options.XCOFFReadOnlyPointers = CodeGenOpts.XCOFFReadOnlyPointers; switch (CodeGenOpts.getSwiftAsyncFramePointer()) { case CodeGenOptions::SwiftAsyncFramePointerKind::Auto: @@ -466,6 +459,8 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.EmitDwarfUnwind = CodeGenOpts.getEmitDwarfUnwind(); + Options.MCOptions.EmitCompactUnwindNonCanonical = + CodeGenOpts.EmitCompactUnwindNonCanonical; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; Options.MCOptions.MCUseDwarfDirectory = @@ -498,13 +493,14 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, static std::optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts) { - if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes) + if (CodeGenOpts.CoverageNotesFile.empty() && + CodeGenOpts.CoverageDataFile.empty()) return std::nullopt; // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if // LLVM's -default-gcov-version flag is set to something invalid. GCOVOptions Options; - Options.EmitNotes = CodeGenOpts.EmitGcovNotes; - Options.EmitData = CodeGenOpts.EmitGcovArcs; + Options.EmitNotes = !CodeGenOpts.CoverageNotesFile.empty(); + Options.EmitData = !CodeGenOpts.CoverageDataFile.empty(); llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version)); Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.Filter = CodeGenOpts.ProfileFilterFiles; @@ -640,7 +636,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, PassBuilder &PB) { // If the back-end supports KCFI operand bundle lowering, skip KCFIPass. if (TargetTriple.getArch() == llvm::Triple::x86_64 || - TargetTriple.isAArch64(64)) + TargetTriple.isAArch64(64) || TargetTriple.isRISCV()) return; // Ensure we lower KCFI operand bundles with -O0. @@ -675,7 +671,8 @@ static void addSanitizers(const Triple &TargetTriple, if (CodeGenOpts.hasSanitizeBinaryMetadata()) { MPM.addPass(SanitizerBinaryMetadataPass( - getSanitizerBinaryMetadataOptions(CodeGenOpts))); + getSanitizerBinaryMetadataOptions(CodeGenOpts), + CodeGenOpts.SanitizeMetadataIgnorelistFiles)); } auto MSanPass = [&](SanitizerMask Mask, bool CompileKernel) { @@ -767,33 +764,40 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (CodeGenOpts.hasProfileIRInstr()) // -fprofile-generate. - PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() - ? getDefaultProfileGenName() - : CodeGenOpts.InstrProfileOutput, - "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = PGOOptions( + CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName() + : CodeGenOpts.InstrProfileOutput, + "", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse : PGOOptions::NoCSAction; - PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, PGOOptions::IRUse, - CSAction, CodeGenOpts.DebugInfoForProfiling); + PGOOpt = PGOOptions( + CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS, + PGOOptions::IRUse, CSAction, CodeGenOpts.DebugInfoForProfiling); } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use PGOOpt = PGOOptions( CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile, - PGOOptions::SampleUse, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling); + CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling, + CodeGenOpts.PseudoProbeForProfiling); + else if (!CodeGenOpts.MemoryProfileUsePath.empty()) + // -fmemory-profile-use (without any of the above options) + PGOOpt = PGOOptions("", "", "", CodeGenOpts.MemoryProfileUsePath, VFS, + PGOOptions::NoAction, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.PseudoProbeForProfiling) // -fpseudo-probe-for-profiling - PGOOpt = - PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, - CodeGenOpts.DebugInfoForProfiling, true); + PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr, + PGOOptions::NoAction, PGOOptions::NoCSAction, + CodeGenOpts.DebugInfoForProfiling, true); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, - PGOOptions::NoCSAction, true); + PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr, + PGOOptions::NoAction, PGOOptions::NoCSAction, true); // Check to see if we want to generate a CS profile. if (CodeGenOpts.hasProfileCSIRInstr()) { @@ -810,12 +814,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline( : CodeGenOpts.InstrProfileOutput; PGOOpt->CSAction = PGOOptions::CSIRInstr; } else - PGOOpt = PGOOptions("", - CodeGenOpts.InstrProfileOutput.empty() - ? getDefaultProfileGenName() - : CodeGenOpts.InstrProfileOutput, - "", PGOOptions::NoAction, PGOOptions::CSIRInstr, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions("", + CodeGenOpts.InstrProfileOutput.empty() + ? getDefaultProfileGenName() + : CodeGenOpts.InstrProfileOutput, + "", /*MemoryProfile=*/"", nullptr, PGOOptions::NoAction, + PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling); } if (TM) TM->setPGOOption(PGOOpt); @@ -831,6 +836,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -845,15 +851,33 @@ void EmitAssemblyHelper::RunOptimizationPipeline( StandardInstrumentations SI( TheModule->getContext(), (CodeGenOpts.DebugPassManager || DebugPassStructure), - /*VerifyEach*/ false, PrintPassOpts); - SI.registerCallbacks(PIC, &FAM); + CodeGenOpts.VerifyEach, PrintPassOpts); + SI.registerCallbacks(PIC, &MAM); PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC); - if (CodeGenOpts.EnableAssignmentTracking) { + // Handle the assignment tracking feature options. + switch (CodeGenOpts.getAssignmentTrackingMode()) { + case CodeGenOptions::AssignmentTrackingOpts::Forced: PB.registerPipelineStartEPCallback( [&](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(AssignmentTrackingPass()); }); + break; + case CodeGenOptions::AssignmentTrackingOpts::Enabled: + // Disable assignment tracking in LTO builds for now as the performance + // cost is too high. Disable for LLDB tuning due to llvm.org/PR43126. + if (!CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.PrepareForLTO && + CodeGenOpts.getDebuggerTuning() != llvm::DebuggerKind::LLDB) { + PB.registerPipelineStartEPCallback( + [&](ModulePassManager &MPM, OptimizationLevel Level) { + // Only use assignment tracking if optimisations are enabled. + if (Level != OptimizationLevel::O0) + MPM.addPass(AssignmentTrackingPass()); + }); + } + break; + case CodeGenOptions::AssignmentTrackingOpts::Disabled: + break; } // Enable verify-debuginfo-preserve-each for new PM. @@ -866,7 +890,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!CodeGenOpts.DIBugsReportFilePath.empty()) Debugify.setOrigDIVerifyBugsReportFilePath( CodeGenOpts.DIBugsReportFilePath); - Debugify.registerCallbacks(PIC); + Debugify.registerCallbacks(PIC, MAM); } // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { @@ -982,20 +1006,28 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(InstrProfiling(*Options, false)); }); - if (CodeGenOpts.OptimizationLevel == 0) { - MPM = PB.buildO0DefaultPipeline(Level, IsLTO || IsThinLTO); - } else if (IsThinLTO) { + // TODO: Consider passing the MemoryProfileOutput to the pass builder via + // the PGOOptions, and set this up there. + if (!CodeGenOpts.MemoryProfileOutput.empty()) { + PB.registerOptimizerLastEPCallback( + [](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); + MPM.addPass(ModuleMemProfilerPass()); + }); + } + + bool IsThinOrUnifiedLTO = IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO); + if (CodeGenOpts.FatLTO) { + MPM = PB.buildFatLTODefaultPipeline(Level, IsThinOrUnifiedLTO, + IsThinOrUnifiedLTO || + shouldEmitRegularLTOSummary()); + } else if (IsThinOrUnifiedLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { MPM = PB.buildLTOPreLinkDefaultPipeline(Level); } else { MPM = PB.buildPerModuleDefaultPipeline(Level); } - - if (!CodeGenOpts.MemoryProfileOutput.empty()) { - MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); - MPM.addPass(ModuleMemProfilerPass()); - } } // Add a verifier pass if requested. We don't have to do this if the action @@ -1015,8 +1047,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!ThinLinkOS) return; } - MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() - : nullptr)); + if (CodeGenOpts.UnifiedLTO) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); + MPM.addPass(ThinLTOBitcodeWriterPass( + *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); } else { MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, /*EmitLTOSummary=*/true)); @@ -1027,11 +1061,13 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // targets bool EmitLTOSummary = shouldEmitRegularLTOSummary(); if (EmitLTOSummary) { - if (!TheModule->getModuleFlag("ThinLTO")) + if (!TheModule->getModuleFlag("ThinLTO") && !CodeGenOpts.UnifiedLTO) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", uint32_t(1)); + if (CodeGenOpts.UnifiedLTO) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); } if (Action == Backend_EmitBC) MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, @@ -1041,6 +1077,21 @@ void EmitAssemblyHelper::RunOptimizationPipeline( EmitLTOSummary)); } } + if (CodeGenOpts.FatLTO) { + // Set module flags, like EnableSplitLTOUnit and UnifiedLTO, since FatLTO + // uses a different action than Backend_EmitBC or Backend_EmitLL. + bool IsThinOrUnifiedLTO = + CodeGenOpts.PrepareForThinLTO || + (CodeGenOpts.PrepareForLTO && CodeGenOpts.UnifiedLTO); + if (!TheModule->getModuleFlag("ThinLTO")) + TheModule->addModuleFlag(Module::Error, "ThinLTO", + uint32_t(IsThinOrUnifiedLTO)); + if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + uint32_t(CodeGenOpts.EnableSplitLTOUnit)); + if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO")) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); + } // Now that we have all of the passes ready, run them. { @@ -1177,6 +1228,7 @@ static void runThinLTOBackend( Conf.ProfileRemapping = std::move(ProfileRemapping); Conf.DebugPassManager = CGOpts.DebugPassManager; + Conf.VerifyEach = CGOpts.VerifyEach; Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; Conf.RemarksFilename = CGOpts.OptRecordFile; Conf.RemarksPasses = CGOpts.OptRecordPasses; @@ -1219,9 +1271,9 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts, - const LangOptions &LOpts, - StringRef TDesc, Module *M, - BackendAction Action, + const LangOptions &LOpts, StringRef TDesc, + Module *M, BackendAction Action, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, std::unique_ptr<raw_pwrite_stream> OS) { llvm::TimeTraceScope TimeScope("Backend"); @@ -1264,7 +1316,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, } } - EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M); + EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M, VFS); AsmHelper.EmitAssembly(Action, std::move(OS)); // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 8ef95bb80846..222b0a192c85 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -80,22 +80,23 @@ namespace { AtomicSizeInBits = C.toBits( C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1) .alignTo(lvalue.getAlignment())); - auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer()); + llvm::Value *BitFieldPtr = lvalue.getBitFieldPointer(); auto OffsetInChars = (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) * lvalue.getAlignment(); - VoidPtrAddr = CGF.Builder.CreateConstGEP1_64( - CGF.Int8Ty, VoidPtrAddr, OffsetInChars.getQuantity()); - llvm::Type *IntTy = CGF.Builder.getIntNTy(AtomicSizeInBits); - auto Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - VoidPtrAddr, IntTy->getPointerTo(), "atomic_bitfield_base"); + llvm::Value *StoragePtr = CGF.Builder.CreateConstGEP1_64( + CGF.Int8Ty, BitFieldPtr, OffsetInChars.getQuantity()); + StoragePtr = CGF.Builder.CreateAddrSpaceCast( + StoragePtr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), + "atomic_bitfield_base"); BFI = OrigBFI; BFI.Offset = Offset; BFI.StorageSize = AtomicSizeInBits; BFI.StorageOffset += OffsetInChars; - LVal = LValue::MakeBitfield(Address(Addr, IntTy, lvalue.getAlignment()), - BFI, lvalue.getType(), lvalue.getBaseInfo(), - lvalue.getTBAAInfo()); + llvm::Type *StorageTy = CGF.Builder.getIntNTy(AtomicSizeInBits); + LVal = LValue::MakeBitfield( + Address(StoragePtr, StorageTy, lvalue.getAlignment()), BFI, + lvalue.getType(), lvalue.getBaseInfo(), lvalue.getTBAAInfo()); AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned); if (AtomicTy.isNull()) { llvm::APInt Size( @@ -161,7 +162,7 @@ namespace { } Address getAtomicAddressAsAtomicIntPointer() const { - return emitCastToAtomicIntPointer(getAtomicAddress()); + return castToAtomicIntPointer(getAtomicAddress()); } /// Is the atomic size larger than the underlying value type? @@ -183,7 +184,7 @@ namespace { /// Cast the given pointer to an integer pointer suitable for atomic /// operations if the source. - Address emitCastToAtomicIntPointer(Address Addr) const; + Address castToAtomicIntPointer(Address Addr) const; /// If Addr is compatible with the iN that will be used for an atomic /// operation, bitcast it. Otherwise, create a temporary that is suitable @@ -623,6 +624,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, : llvm::Instruction::Sub; [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub @@ -636,8 +638,11 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: - Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min - : llvm::AtomicRMWInst::UMin; + Op = E->getValueType()->isFloatingType() + ? llvm::AtomicRMWInst::FMin + : (E->getValueType()->isSignedIntegerType() + ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::UMin); break; case AtomicExpr::AO__atomic_max_fetch: @@ -647,8 +652,11 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: - Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max - : llvm::AtomicRMWInst::UMax; + Op = E->getValueType()->isFloatingType() + ? llvm::AtomicRMWInst::FMax + : (E->getValueType()->isSignedIntegerType() + ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::UMax); break; case AtomicExpr::AO__atomic_and_fetch: @@ -789,8 +797,7 @@ AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, ValTy = CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false); llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), SizeInBits); - Address Ptr = Address(CGF.Builder.CreateBitCast(Val, ITy->getPointerTo()), - ITy, Align); + Address Ptr = Address(Val, ITy, Align); Val = CGF.EmitLoadOfScalar(Ptr, false, CGF.getContext().getPointerType(ValTy), Loc); @@ -798,8 +805,7 @@ AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, Args.add(RValue::get(Val), ValTy); } else { // Non-optimized functions always take a reference. - Args.add(RValue::get(CGF.EmitCastToVoidPtr(Val)), - CGF.getContext().VoidPtrTy); + Args.add(RValue::get(Val), CGF.getContext().VoidPtrTy); } } @@ -897,6 +903,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_sub: if (MemTy->isPointerType()) { @@ -916,9 +923,19 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } [[fallthrough]]; case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__atomic_sub_fetch: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_min: ShouldCastToIntPtrTy = !MemTy->isFloatingType(); [[fallthrough]]; @@ -934,13 +951,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_nand: - case AtomicExpr::AO__c11_atomic_fetch_max: - case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_xor: - case AtomicExpr::AO__opencl_atomic_fetch_min: - case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: @@ -952,12 +965,6 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_nand_fetch: - case AtomicExpr::AO__atomic_max_fetch: - case AtomicExpr::AO__atomic_min_fetch: - case AtomicExpr::AO__atomic_fetch_max: - case AtomicExpr::AO__hip_atomic_fetch_max: - case AtomicExpr::AO__atomic_fetch_min: - case AtomicExpr::AO__hip_atomic_fetch_min: Val1 = EmitValToTemp(*this, E->getVal1()); break; } @@ -971,7 +978,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { AtomicInfo Atomics(*this, AtomicVal); if (ShouldCastToIntPtrTy) { - Ptr = Atomics.emitCastToAtomicIntPointer(Ptr); + Ptr = Atomics.castToAtomicIntPointer(Ptr); if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1); if (Val2.isValid()) @@ -979,13 +986,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } if (Dest.isValid()) { if (ShouldCastToIntPtrTy) - Dest = Atomics.emitCastToAtomicIntPointer(Dest); + Dest = Atomics.castToAtomicIntPointer(Dest); } else if (E->isCmpXChg()) Dest = CreateMemTemp(RValTy, "cmpxchg.bool"); else if (!RValTy->isVoidType()) { Dest = Atomics.CreateTempAlloca(); if (ShouldCastToIntPtrTy) - Dest = Atomics.emitCastToAtomicIntPointer(Dest); + Dest = Atomics.castToAtomicIntPointer(Dest); } // Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary . @@ -1013,6 +1020,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_sub: case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_min: @@ -1088,15 +1096,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { if (AS == LangAS::opencl_generic) return V; auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); - auto T = llvm::cast<llvm::PointerType>(V->getType()); - auto *DestType = llvm::PointerType::getWithSamePointeeType(T, DestAS); + auto *DestType = llvm::PointerType::get(getLLVMContext(), DestAS); return getTargetHooks().performAddrSpaceCast( *this, V, AS, LangAS::opencl_generic, DestType, false); }; - Args.add(RValue::get(CastToGenericAddrSpace( - EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())), + Args.add(RValue::get(CastToGenericAddrSpace(Ptr.getPointer(), + E->getPtr()->getType())), getContext().VoidPtrTy); std::string LibCallName; @@ -1129,10 +1136,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LibCallName = "__atomic_compare_exchange"; RetTy = getContext().BoolTy; HaveRetTy = true; - Args.add( - RValue::get(CastToGenericAddrSpace( - EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())), - getContext().VoidPtrTy); + Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(), + E->getVal1()->getType())), + getContext().VoidPtrTy); AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); Args.add(RValue::get(Order), getContext().IntTy); @@ -1218,6 +1224,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { [[fallthrough]]; case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: LibCallName = "__atomic_fetch_sub"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -1293,8 +1300,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } else { // Value is returned through parameter before the order. RetTy = getContext().VoidTy; - Args.add(RValue::get(EmitCastToVoidPtr(Dest.getPointer())), - getContext().VoidPtrTy); + Args.add(RValue::get(Dest.getPointer()), getContext().VoidPtrTy); } } // order is always the last parameter @@ -1329,16 +1335,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) ResVal = Builder.CreateNot(ResVal); - Builder.CreateStore( - ResVal, Builder.CreateElementBitCast(Dest, ResVal->getType())); + Builder.CreateStore(ResVal, Dest.withElementType(ResVal->getType())); } if (RValTy->isVoidType()) return RValue::get(nullptr); - return convertTempToRValue( - Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)), - RValTy, E->getExprLoc()); + return convertTempToRValue(Dest.withElementType(ConvertTypeForMem(RValTy)), + RValTy, E->getExprLoc()); } bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || @@ -1389,9 +1393,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { if (RValTy->isVoidType()) return RValue::get(nullptr); - return convertTempToRValue( - Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)), - RValTy, E->getExprLoc()); + return convertTempToRValue(Dest.withElementType(ConvertTypeForMem(RValTy)), + RValTy, E->getExprLoc()); } // Long case, when Order isn't obviously constant. @@ -1461,15 +1464,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { return RValue::get(nullptr); assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); - return convertTempToRValue( - Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)), - RValTy, E->getExprLoc()); + return convertTempToRValue(Dest.withElementType(ConvertTypeForMem(RValTy)), + RValTy, E->getExprLoc()); } -Address AtomicInfo::emitCastToAtomicIntPointer(Address addr) const { +Address AtomicInfo::castToAtomicIntPointer(Address addr) const { llvm::IntegerType *ty = llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits); - return CGF.Builder.CreateElementBitCast(addr, ty); + return addr.withElementType(ty); } Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const { @@ -1482,7 +1484,7 @@ Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const { Addr = Tmp; } - return emitCastToAtomicIntPointer(Addr); + return castToAtomicIntPointer(Addr); } RValue AtomicInfo::convertAtomicTempToRValue(Address addr, @@ -1554,7 +1556,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, } // Slam the integer into the temporary. - Address CastTemp = emitCastToAtomicIntPointer(Temp); + Address CastTemp = castToAtomicIntPointer(Temp); CGF.Builder.CreateStore(IntVal, CastTemp) ->setVolatile(TempIsVolatile); @@ -1566,10 +1568,8 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, // void __atomic_load(size_t size, void *mem, void *return, int order); CallArgList Args; Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType()); - Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicPointer())), - CGF.getContext().VoidPtrTy); - Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)), - CGF.getContext().VoidPtrTy); + Args.add(RValue::get(getAtomicPointer()), CGF.getContext().VoidPtrTy); + Args.add(RValue::get(AddForLoaded), CGF.getContext().VoidPtrTy); Args.add( RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))), CGF.getContext().IntTy); @@ -1732,7 +1732,7 @@ llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const { Address Addr = materializeRValue(RVal); // Cast the temporary to the atomic int type and pull a value out. - Addr = emitCastToAtomicIntPointer(Addr); + Addr = castToAtomicIntPointer(Addr); return CGF.Builder.CreateLoad(Addr); } @@ -1763,12 +1763,9 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr, // void *desired, int success, int failure); CallArgList Args; Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType()); - Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicPointer())), - CGF.getContext().VoidPtrTy); - Args.add(RValue::get(CGF.EmitCastToVoidPtr(ExpectedAddr)), - CGF.getContext().VoidPtrTy); - Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)), - CGF.getContext().VoidPtrTy); + Args.add(RValue::get(getAtomicPointer()), CGF.getContext().VoidPtrTy); + Args.add(RValue::get(ExpectedAddr), CGF.getContext().VoidPtrTy); + Args.add(RValue::get(DesiredAddr), CGF.getContext().VoidPtrTy); Args.add(RValue::get( llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))), CGF.getContext().IntTy); @@ -1910,7 +1907,7 @@ void AtomicInfo::EmitAtomicUpdateOp( /*NumReservedValues=*/2); PHI->addIncoming(OldVal, CurBB); Address NewAtomicAddr = CreateTempAlloca(); - Address NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr); + Address NewAtomicIntAddr = castToAtomicIntPointer(NewAtomicAddr); if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) || requiresMemSetZero(getAtomicAddress().getElementType())) { CGF.Builder.CreateStore(PHI, NewAtomicIntAddr); @@ -1992,7 +1989,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal, /*NumReservedValues=*/2); PHI->addIncoming(OldVal, CurBB); Address NewAtomicAddr = CreateTempAlloca(); - Address NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr); + Address NewAtomicIntAddr = castToAtomicIntPointer(NewAtomicAddr); if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) || requiresMemSetZero(getAtomicAddress().getElementType())) { CGF.Builder.CreateStore(PHI, NewAtomicIntAddr); @@ -2071,10 +2068,8 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, CallArgList args; args.add(RValue::get(atomics.getAtomicSizeValue()), getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicPointer())), - getContext().VoidPtrTy); - args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())), - getContext().VoidPtrTy); + args.add(RValue::get(atomics.getAtomicPointer()), getContext().VoidPtrTy); + args.add(RValue::get(srcAddr.getPointer()), getContext().VoidPtrTy); args.add( RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))), getContext().IntTy); @@ -2086,8 +2081,7 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, llvm::Value *intValue = atomics.convertRValueToInt(rvalue); // Do the atomic store. - Address addr = - atomics.emitCastToAtomicIntPointer(atomics.getAtomicAddress()); + Address addr = atomics.castToAtomicIntPointer(atomics.getAtomicAddress()); intValue = Builder.CreateIntCast( intValue, addr.getElementType(), /*isSigned=*/false); llvm::StoreInst *store = Builder.CreateStore(intValue, addr); diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 6e4a0dbf2335..cfbe3272196e 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1259,9 +1259,8 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) { // to byref*. auto &byrefInfo = getBlockByrefInfo(variable); - addr = Address(Builder.CreateLoad(addr), Int8Ty, byrefInfo.ByrefAlignment); - - addr = Builder.CreateElementBitCast(addr, byrefInfo.Type, "byref.addr"); + addr = Address(Builder.CreateLoad(addr), byrefInfo.Type, + byrefInfo.ByrefAlignment); addr = emitBlockByrefAddress(addr, byrefInfo, /*follow*/ true, variable->getName()); @@ -1427,7 +1426,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, // directly as BlockPointer. BlockPointer = Builder.CreatePointerCast( arg, - BlockInfo->StructureType->getPointerTo( + llvm::PointerType::get( + getLLVMContext(), getContext().getLangOpts().OpenCL ? getContext().getTargetAddressSpace(LangAS::opencl_generic) : 0), @@ -1934,14 +1934,12 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { auto AL = ApplyDebugLocation::CreateArtificial(*this); Address src = GetAddrOfLocalVar(&SrcDecl); - src = Address(Builder.CreateLoad(src), Int8Ty, blockInfo.BlockAlign); - src = Builder.CreateElementBitCast(src, blockInfo.StructureType, - "block.source"); + src = Address(Builder.CreateLoad(src), blockInfo.StructureType, + blockInfo.BlockAlign); Address dst = GetAddrOfLocalVar(&DstDecl); - dst = Address(Builder.CreateLoad(dst), Int8Ty, blockInfo.BlockAlign); - dst = - Builder.CreateElementBitCast(dst, blockInfo.StructureType, "block.dest"); + dst = Address(Builder.CreateLoad(dst), blockInfo.StructureType, + blockInfo.BlockAlign); for (auto &capture : blockInfo.SortedCaptures) { if (capture.isConstantOrTrivial()) @@ -2124,8 +2122,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { auto AL = ApplyDebugLocation::CreateArtificial(*this); Address src = GetAddrOfLocalVar(&SrcDecl); - src = Address(Builder.CreateLoad(src), Int8Ty, blockInfo.BlockAlign); - src = Builder.CreateElementBitCast(src, blockInfo.StructureType, "block"); + src = Address(Builder.CreateLoad(src), blockInfo.StructureType, + blockInfo.BlockAlign); CodeGenFunction::RunCleanupsScope cleanups(*this); @@ -2162,9 +2160,9 @@ public: void emitCopy(CodeGenFunction &CGF, Address destField, Address srcField) override { - destField = CGF.Builder.CreateElementBitCast(destField, CGF.Int8Ty); + destField = destField.withElementType(CGF.Int8Ty); - srcField = CGF.Builder.CreateElementBitCast(srcField, CGF.Int8PtrTy); + srcField = srcField.withElementType(CGF.Int8PtrTy); llvm::Value *srcValue = CGF.Builder.CreateLoad(srcField); unsigned flags = (Flags | BLOCK_BYREF_CALLER).getBitMask(); @@ -2177,7 +2175,7 @@ public: } void emitDispose(CodeGenFunction &CGF, Address field) override { - field = CGF.Builder.CreateElementBitCast(field, CGF.Int8PtrTy); + field = field.withElementType(CGF.Int8PtrTy); llvm::Value *value = CGF.Builder.CreateLoad(field); CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER, false); @@ -2369,17 +2367,15 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, if (generator.needsCopy()) { // dst->x Address destField = CGF.GetAddrOfLocalVar(&Dst); - destField = Address(CGF.Builder.CreateLoad(destField), CGF.Int8Ty, + destField = Address(CGF.Builder.CreateLoad(destField), byrefInfo.Type, byrefInfo.ByrefAlignment); - destField = CGF.Builder.CreateElementBitCast(destField, byrefInfo.Type); destField = CGF.emitBlockByrefAddress(destField, byrefInfo, false, "dest-object"); // src->x Address srcField = CGF.GetAddrOfLocalVar(&Src); - srcField = Address(CGF.Builder.CreateLoad(srcField), CGF.Int8Ty, + srcField = Address(CGF.Builder.CreateLoad(srcField), byrefInfo.Type, byrefInfo.ByrefAlignment); - srcField = CGF.Builder.CreateElementBitCast(srcField, byrefInfo.Type); srcField = CGF.emitBlockByrefAddress(srcField, byrefInfo, false, "src-object"); @@ -2435,9 +2431,8 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, if (generator.needsDispose()) { Address addr = CGF.GetAddrOfLocalVar(&Src); - addr = Address(CGF.Builder.CreateLoad(addr), CGF.Int8Ty, + addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.Type, byrefInfo.ByrefAlignment); - addr = CGF.Builder.CreateElementBitCast(addr, byrefInfo.Type); addr = CGF.emitBlockByrefAddress(addr, byrefInfo, false, "object"); generator.emitDispose(CGF, addr); diff --git a/clang/lib/CodeGen/CGBlocks.h b/clang/lib/CodeGen/CGBlocks.h index e8857d98894f..4ef1ae9f3365 100644 --- a/clang/lib/CodeGen/CGBlocks.h +++ b/clang/lib/CodeGen/CGBlocks.h @@ -287,12 +287,6 @@ public: // This could be zero if no forced alignment is required. CharUnits BlockHeaderForcedGapSize; - /// The next block in the block-info chain. Invalid if this block - /// info is not part of the CGF's block-info chain, which is true - /// if it corresponds to a global block or a block whose expression - /// has been encountered. - CGBlockInfo *NextBlockInfo; - void buildCaptureMap() { for (auto &C : SortedCaptures) Captures[C.Cap->getVariable()] = &C; diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 2fcfea64ede6..68535920088c 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -89,8 +89,6 @@ public: llvm::LoadInst *CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name = "") { - assert(llvm::cast<llvm::PointerType>(Addr->getType()) - ->isOpaqueOrPointeeTypeMatches(Ty)); return CreateAlignedLoad(Ty, Addr, Align.getAsAlign(), Name); } @@ -120,15 +118,11 @@ public: /// Emit a load from an i1 flag variable. llvm::LoadInst *CreateFlagLoad(llvm::Value *Addr, const llvm::Twine &Name = "") { - assert(llvm::cast<llvm::PointerType>(Addr->getType()) - ->isOpaqueOrPointeeTypeMatches(getInt1Ty())); return CreateAlignedLoad(getInt1Ty(), Addr, CharUnits::One(), Name); } /// Emit a store to an i1 flag variable. llvm::StoreInst *CreateFlagStore(bool Value, llvm::Value *Addr) { - assert(llvm::cast<llvm::PointerType>(Addr->getType()) - ->isOpaqueOrPointeeTypeMatches(getInt1Ty())); return CreateAlignedStore(getInt1(Value), Addr, CharUnits::One()); } @@ -157,19 +151,8 @@ public: using CGBuilderBaseTy::CreateAddrSpaceCast; Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name = "") { - assert(cast<llvm::PointerType>(Ty)->isOpaqueOrPointeeTypeMatches( - Addr.getElementType()) && - "Should not change the element type"); - return Addr.withPointer(CreateAddrSpaceCast(Addr.getPointer(), Ty, Name)); - } - - /// Cast the element type of the given address to a different type, - /// preserving information like the alignment and address space. - Address CreateElementBitCast(Address Addr, llvm::Type *Ty, - const llvm::Twine &Name = "") { - auto *PtrTy = Ty->getPointerTo(Addr.getAddressSpace()); - return Address(CreateBitCast(Addr.getPointer(), PtrTy, Name), Ty, - Addr.getAlignment()); + return Addr.withPointer(CreateAddrSpaceCast(Addr.getPointer(), Ty, Name), + Addr.isKnownNonNull()); } using CGBuilderBaseTy::CreatePointerBitCastOrAddrSpaceCast; @@ -178,7 +161,7 @@ public: const llvm::Twine &Name = "") { llvm::Value *Ptr = CreatePointerBitCastOrAddrSpaceCast(Addr.getPointer(), Ty, Name); - return Address(Ptr, ElementTy, Addr.getAlignment()); + return Address(Ptr, ElementTy, Addr.getAlignment(), Addr.isKnownNonNull()); } /// Given @@ -199,7 +182,7 @@ public: return Address( CreateStructGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), ElTy->getElementType(Index), - Addr.getAlignment().alignmentAtOffset(Offset)); + Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull()); } /// Given @@ -221,7 +204,8 @@ public: CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(), {getSize(CharUnits::Zero()), getSize(Index)}, Name), ElTy->getElementType(), - Addr.getAlignment().alignmentAtOffset(Index * EltSize)); + Addr.getAlignment().alignmentAtOffset(Index * EltSize), + Addr.isKnownNonNull()); } /// Given @@ -237,8 +221,8 @@ public: return Address(CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), - ElTy, - Addr.getAlignment().alignmentAtOffset(Index * EltSize)); + ElTy, Addr.getAlignment().alignmentAtOffset(Index * EltSize), + Addr.isKnownNonNull()); } /// Given @@ -255,7 +239,8 @@ public: return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getElementType(), - Addr.getAlignment().alignmentAtOffset(Index * EltSize)); + Addr.getAlignment().alignmentAtOffset(Index * EltSize), + NotKnownNonNull); } /// Create GEP with single dynamic index. The address alignment is reduced @@ -270,7 +255,7 @@ public: return Address( CreateGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), Addr.getElementType(), - Addr.getAlignment().alignmentOfArrayElement(EltSize)); + Addr.getAlignment().alignmentOfArrayElement(EltSize), NotKnownNonNull); } /// Given a pointer to i8, adjust it by a given constant offset. @@ -280,7 +265,8 @@ public: return Address(CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(), getSize(Offset), Name), Addr.getElementType(), - Addr.getAlignment().alignmentAtOffset(Offset)); + Addr.getAlignment().alignmentAtOffset(Offset), + Addr.isKnownNonNull()); } Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name = "") { @@ -288,7 +274,8 @@ public: return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), getSize(Offset), Name), Addr.getElementType(), - Addr.getAlignment().alignmentAtOffset(Offset)); + Addr.getAlignment().alignmentAtOffset(Offset), + NotKnownNonNull); } using CGBuilderBaseTy::CreateConstInBoundsGEP2_32; @@ -305,7 +292,8 @@ public: llvm_unreachable("offset of GEP with constants is always computable"); return Address(GEP, GEP->getResultElementType(), Addr.getAlignment().alignmentAtOffset( - CharUnits::fromQuantity(Offset.getSExtValue()))); + CharUnits::fromQuantity(Offset.getSExtValue())), + Addr.isKnownNonNull()); } using CGBuilderBaseTy::CreateMemCpy; @@ -369,7 +357,8 @@ public: using CGBuilderBaseTy::CreateLaunderInvariantGroup; Address CreateLaunderInvariantGroup(Address Addr) { - return Addr.withPointer(CreateLaunderInvariantGroup(Addr.getPointer())); + return Addr.withPointer(CreateLaunderInvariantGroup(Addr.getPointer()), + Addr.isKnownNonNull()); } }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f72e04a425d9..30f5f4e7061c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -28,8 +28,10 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" @@ -52,10 +54,10 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" -#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/X86TargetParser.h" +#include "llvm/TargetParser/AArch64TargetParser.h" +#include "llvm/TargetParser/X86TargetParser.h" #include <optional> #include <sstream> @@ -98,13 +100,29 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, // TODO: This list should be expanded or refactored after all GCC-compatible // std libcall builtins are implemented. - static SmallDenseMap<unsigned, StringRef, 8> F128Builtins{ + static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{ + {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"}, + {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"}, + {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"}, + {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"}, + {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"}, + {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"}, + {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"}, + {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"}, + {Builtin::BI__builtin_fprintf, "__fprintfieee128"}, {Builtin::BI__builtin_printf, "__printfieee128"}, + {Builtin::BI__builtin_snprintf, "__snprintfieee128"}, + {Builtin::BI__builtin_sprintf, "__sprintfieee128"}, + {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"}, + {Builtin::BI__builtin_vprintf, "__vprintfieee128"}, {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"}, {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"}, - {Builtin::BI__builtin_sprintf, "__sprintfieee128"}, - {Builtin::BI__builtin_snprintf, "__snprintfieee128"}, - {Builtin::BI__builtin_fprintf, "__fprintfieee128"}, + {Builtin::BI__builtin_fscanf, "__fscanfieee128"}, + {Builtin::BI__builtin_scanf, "__scanfieee128"}, + {Builtin::BI__builtin_sscanf, "__sscanfieee128"}, + {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"}, + {Builtin::BI__builtin_vscanf, "__vscanfieee128"}, + {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"}, {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"}, }; @@ -169,6 +187,21 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, return V; } +static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF, + const CallExpr *E) { + ASTContext &Ctx = CGF.getContext(); + Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0)); + unsigned Bytes = Ptr.getElementType()->isPointerTy() + ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity() + : Ptr.getElementType()->getScalarSizeInBits() / 8; + unsigned Align = Ptr.getAlignment().getQuantity(); + if (Align % Bytes != 0) { + DiagnosticsEngine &Diags = CGF.CGM.getDiags(); + Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned); + } + return Ptr.getPointer(); +} + /// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. static Value *MakeBinaryAtomicValue( @@ -181,13 +214,14 @@ static Value *MakeBinaryAtomicValue( E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); - llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); llvm::IntegerType *IntType = llvm::IntegerType::get(CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); + llvm::Type *IntPtrType = + llvm::PointerType::get(CGF.getLLVMContext(), AddrSpace); llvm::Value *Args[2]; Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); @@ -243,19 +277,16 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); - llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); - llvm::IntegerType *IntType = - llvm::IntegerType::get(CGF.getLLVMContext(), - CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); + llvm::IntegerType *IntType = llvm::IntegerType::get( + CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); llvm::Value *Args[2]; Args[1] = CGF.EmitScalarExpr(E->getArg(1)); llvm::Type *ValueType = Args[1]->getType(); Args[1] = EmitToInt(CGF, Args[1], T, IntType); - Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); + Args[0] = DestPtr; llvm::Value *Result = CGF.Builder.CreateAtomicRMW( Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); @@ -285,15 +316,13 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); - llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); Value *Args[3]; - Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); + Args[0] = DestPtr; Args[1] = CGF.EmitScalarExpr(E->getArg(1)); llvm::Type *ValueType = Args[1]->getType(); Args[1] = EmitToInt(CGF, Args[1], T, IntType); @@ -385,10 +414,8 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, // Convert to i128 pointers and values. llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); - llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); - Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy); - Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy), - Int128Ty, CGF.getContext().toCharUnitsFromBits(128)); + Address ComparandResult(ComparandPtr, Int128Ty, + CGF.getContext().toCharUnitsFromBits(128)); // (((i128)hi) << 64) | ((i128)lo) ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); @@ -451,7 +478,6 @@ static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); - Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize); Load->setVolatile(true); return Load; @@ -463,9 +489,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { Value *Value = CGF.EmitScalarExpr(E->getArg(1)); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = - llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8); - Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); llvm::StoreInst *Store = CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); Store->setVolatile(true); @@ -508,6 +531,25 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, } } +// Has second type mangled argument. +static Value *emitBinaryExpMaybeConstrainedFPBuiltin( + CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, + llvm::Intrinsic::ID ConstrainedIntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); + Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, + {Src0->getType(), Src1->getType()}); + return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); + } + + Function *F = + CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()}); + return CGF.Builder.CreateCall(F, {Src0, Src1}); +} + // Emit an intrinsic that has 3 operands of the same type as its result. // Depending on mode, this may be a constrained floating-point intrinsic. static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, @@ -611,6 +653,24 @@ emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, } } +static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, + llvm::Intrinsic::ID IntrinsicID) { + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + + QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType(); + llvm::Type *IntTy = CGF.ConvertType(IntPtrTy); + llvm::Function *F = + CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy}); + llvm::Value *Call = CGF.Builder.CreateCall(F, Src0); + + llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1); + LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy); + CGF.EmitStoreOfScalar(Exp, LV); + + return CGF.Builder.CreateExtractValue(Call, 0); +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -923,7 +983,7 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, // Build the constraints. FIXME: We should support immediates when possible. std::string Constraints = "={@ccc},r,r,~{cc},~{memory}"; - std::string MachineClobbers = CGF.getTarget().getClobbers(); + std::string_view MachineClobbers = CGF.getTarget().getClobbers(); if (!MachineClobbers.empty()) { Constraints += ','; Constraints += MachineClobbers; @@ -931,9 +991,9 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(E->getArg(1)->getType())); - llvm::Type *IntPtrType = IntType->getPointerTo(); + llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext()); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false); + llvm::FunctionType::get(CGF.Int8Ty, {PtrType, IntType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); @@ -1066,15 +1126,14 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, AsmOS << "$0, ${1:y}"; std::string Constraints = "=r,*Z,~{memory}"; - std::string MachineClobbers = CGF.getTarget().getClobbers(); + std::string_view MachineClobbers = CGF.getTarget().getClobbers(); if (!MachineClobbers.empty()) { Constraints += ','; Constraints += MachineClobbers; } - llvm::Type *IntPtrType = RetType->getPointerTo(); - llvm::FunctionType *FTy = - llvm::FunctionType::get(RetType, {IntPtrType}, false); + llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext()); + llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); @@ -1709,7 +1768,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, && "Unsupported builtin check kind"); Value *ArgValue = EmitScalarExpr(E); - if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) + if (!SanOpts.has(SanitizerKind::Builtin)) return ArgValue; SanitizerScope SanScope(this); @@ -1818,8 +1877,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Address Arg = GetAddrOfLocalVar(Args[I]); Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); - Addr = - Builder.CreateElementBitCast(Addr, Arg.getElementType(), "argDataCast"); + Addr = Addr.withElementType(Arg.getElementType()); Builder.CreateStore(Builder.CreateLoad(Arg), Addr); Offset += Size; ++I; @@ -2182,6 +2240,17 @@ static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) { } } +static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, + Value *V) { + if (CGF.Builder.getIsFPConstrained() && + CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) { + if (Value *Result = + CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM)) + return Result; + } + return nullptr; +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -2444,6 +2513,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Intrinsic::round, Intrinsic::experimental_constrained_round)); + case Builtin::BIroundeven: + case Builtin::BIroundevenf: + case Builtin::BIroundevenl: + case Builtin::BI__builtin_roundeven: + case Builtin::BI__builtin_roundevenf: + case Builtin::BI__builtin_roundevenf16: + case Builtin::BI__builtin_roundevenl: + case Builtin::BI__builtin_roundevenf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + Intrinsic::roundeven, + Intrinsic::experimental_constrained_roundeven)); + case Builtin::BIsin: case Builtin::BIsinf: case Builtin::BIsinl: @@ -2463,11 +2544,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: - case Builtin::BI__builtin_sqrtf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::sqrt, - Intrinsic::experimental_constrained_sqrt)); - + case Builtin::BI__builtin_sqrtf128: { + llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt); + SetSqrtFPAccuracy(Call); + return RValue::get(Call); + } case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: @@ -2523,7 +2605,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::llrint, Intrinsic::experimental_constrained_llrint)); - + case Builtin::BI__builtin_ldexp: + case Builtin::BI__builtin_ldexpf: + case Builtin::BI__builtin_ldexpl: + case Builtin::BI__builtin_ldexpf16: + case Builtin::BI__builtin_ldexpf128: { + return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::ldexp, + Intrinsic::experimental_constrained_ldexp)); + } default: break; } @@ -2801,8 +2891,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_assume_aligned: { const Expr *Ptr = E->getArg(0); Value *PtrValue = EmitScalarExpr(Ptr); - if (PtrValue->getType() != VoidPtrTy) - PtrValue = EmitCastToVoidPtr(PtrValue); Value *OffsetValue = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; @@ -2827,6 +2915,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); } + case Builtin::BI__builtin_assume_separate_storage: { + const Expr *Arg0 = E->getArg(0); + const Expr *Arg1 = E->getArg(1); + + Value *Value0 = EmitScalarExpr(Arg0); + Value *Value1 = EmitScalarExpr(Arg1); + + Value *Values[] = {Value0, Value1}; + OperandBundleDefT<Value *> OBD("separate_storage", Values); + Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); + return RValue::get(nullptr); + } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target // supports the builtin, otherwise just return the argument. @@ -2981,6 +3081,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); if (Builder.getIsFPConstrained()) { + // FIXME: llvm.powi has 2 mangling types, + // llvm.experimental.constrained.powi has one. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi, Src0->getType()); @@ -2991,6 +3093,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, { Src0->getType(), Src1->getType() }); return RValue::get(Builder.CreateCall(F, { Src0, Src1 })); } + case Builtin::BI__builtin_frexp: + case Builtin::BI__builtin_frexpf: + case Builtin::BI__builtin_frexpl: + case Builtin::BI__builtin_frexpf128: + case Builtin::BI__builtin_frexpf16: + return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); case Builtin::BI__builtin_isgreater: case Builtin::BI__builtin_isgreaterequal: case Builtin::BI__builtin_isless: @@ -3027,37 +3135,69 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // ZExt bool to int type. return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); } + case Builtin::BI__builtin_isnan: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); - llvm::Type *Ty = V->getType(); - const llvm::fltSemantics &Semantics = Ty->getFltSemantics(); - if (!Builder.getIsFPConstrained() || - Builder.getDefaultConstrainedExcept() == fp::ebIgnore || - !Ty->isIEEE()) { - V = Builder.CreateFCmpUNO(V, V, "cmp"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); - } + if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) + return RValue::get(Result); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan), + ConvertType(E->getType()))); + } - if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM)) + case Builtin::BI__builtin_isinf: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) return RValue::get(Result); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf), + ConvertType(E->getType()))); + } - // NaN has all exp bits set and a non zero significand. Therefore: - // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0) - unsigned bitsize = Ty->getScalarSizeInBits(); - llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize); - Value *IntV = Builder.CreateBitCast(V, IntTy); - APInt AndMask = APInt::getSignedMaxValue(bitsize); - Value *AbsV = - Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask)); - APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt(); - Value *Sub = - Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV); - // V = sign bit (Sub) <=> V = (Sub < 0) - V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1)); - if (bitsize > 32) - V = Builder.CreateTrunc(V, ConvertType(E->getType())); - return RValue::get(V); + case Builtin::BIfinite: + case Builtin::BI__finite: + case Builtin::BIfinitef: + case Builtin::BI__finitef: + case Builtin::BIfinitel: + case Builtin::BI__finitel: + case Builtin::BI__builtin_isfinite: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) + return RValue::get(Result); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite), + ConvertType(E->getType()))); + } + + case Builtin::BI__builtin_isnormal: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + return RValue::get( + Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal), + ConvertType(E->getType()))); + } + + case Builtin::BI__builtin_isfpclass: { + Expr::EvalResult Result; + if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext())) + break; + uint64_t Test = Result.Val.getInt().getLimitedValue(); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + Value *V = EmitScalarExpr(E->getArg(0)); + return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test), + ConvertType(E->getType()))); + } + + case Builtin::BI__builtin_nondeterministic_value: { + llvm::Type *Ty = ConvertType(E->getArg(0)->getType()); + + Value *Result = PoisonValue::get(Ty); + Result = Builder.CreateFreeze(Result); + + return RValue::get(Result); } case Builtin::BI__builtin_elementwise_abs: { @@ -3079,6 +3219,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_ceil: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + case Builtin::BI__builtin_elementwise_exp: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp")); + case Builtin::BI__builtin_elementwise_exp2: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2")); + case Builtin::BI__builtin_elementwise_log: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log")); + case Builtin::BI__builtin_elementwise_log2: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2")); + case Builtin::BI__builtin_elementwise_log10: + return RValue::get( + emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10")); + case Builtin::BI__builtin_elementwise_pow: { + return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow)); + } case Builtin::BI__builtin_elementwise_cos: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos")); @@ -3088,6 +3246,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); + case Builtin::BI__builtin_elementwise_round: + return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round, + "elt.round")); + case Builtin::BI__builtin_elementwise_rint: + return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint, + "elt.rint")); + case Builtin::BI__builtin_elementwise_nearbyint: + return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint, + "elt.nearbyint")); case Builtin::BI__builtin_elementwise_sin: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin")); @@ -3097,9 +3264,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc")); case Builtin::BI__builtin_elementwise_canonicalize: return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.trunc")); + emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); case Builtin::BI__builtin_elementwise_copysign: return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign)); + case Builtin::BI__builtin_elementwise_fma: + return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -3247,52 +3416,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } - case Builtin::BIfinite: - case Builtin::BI__finite: - case Builtin::BIfinitef: - case Builtin::BI__finitef: - case Builtin::BIfinitel: - case Builtin::BI__finitel: - case Builtin::BI__builtin_isinf: - case Builtin::BI__builtin_isfinite: { - // isinf(x) --> fabs(x) == infinity - // isfinite(x) --> fabs(x) != infinity - // x != NaN via the ordered compare in either case. - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); - Value *V = EmitScalarExpr(E->getArg(0)); - llvm::Type *Ty = V->getType(); - if (!Builder.getIsFPConstrained() || - Builder.getDefaultConstrainedExcept() == fp::ebIgnore || - !Ty->isIEEE()) { - Value *Fabs = EmitFAbs(*this, V); - Constant *Infinity = ConstantFP::getInfinity(V->getType()); - CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) - ? CmpInst::FCMP_OEQ - : CmpInst::FCMP_ONE; - Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); - return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); - } - - if (Value *Result = getTargetHooks().testFPKind(V, BuiltinID, Builder, CGM)) - return RValue::get(Result); - - // Inf values have all exp bits set and a zero significand. Therefore: - // isinf(V) == ((V << 1) == ((exp mask) << 1)) - // isfinite(V) == ((V << 1) < ((exp mask) << 1)) using unsigned comparison - unsigned bitsize = Ty->getScalarSizeInBits(); - llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize); - Value *IntV = Builder.CreateBitCast(V, IntTy); - Value *Shl1 = Builder.CreateShl(IntV, 1); - const llvm::fltSemantics &Semantics = Ty->getFltSemantics(); - APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt(); - Value *ExpMaskShl1 = llvm::ConstantInt::get(IntTy, ExpMask.shl(1)); - if (BuiltinID == Builtin::BI__builtin_isinf) - V = Builder.CreateICmpEQ(Shl1, ExpMaskShl1); - else - V = Builder.CreateICmpULT(Shl1, ExpMaskShl1); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); - } - case Builtin::BI__builtin_isinf_sign: { // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); @@ -3312,26 +3435,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } - case Builtin::BI__builtin_isnormal: { - // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); - // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. - Value *V = EmitScalarExpr(E->getArg(0)); - Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); - - Value *Abs = EmitFAbs(*this, V); - Value *IsLessThanInf = - Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); - APFloat Smallest = APFloat::getSmallestNormalized( - getContext().getFloatTypeSemantics(E->getArg(0)->getType())); - Value *IsNormal = - Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), - "isnormal"); - V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); - V = Builder.CreateAnd(V, IsNormal, "and"); - return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); - } - case Builtin::BI__builtin_flt_rounds: { Function *F = CGM.getIntrinsic(Intrinsic::get_rounding); @@ -3343,6 +3446,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_set_flt_rounds: { + Function *F = CGM.getIntrinsic(Intrinsic::set_rounding); + + Value *V = EmitScalarExpr(E->getArg(0)); + Builder.CreateCall(F, V); + return RValue::get(nullptr); + } + case Builtin::BI__builtin_fpclassify: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. @@ -3802,7 +3913,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Call LLVM's EH setjmp, which is lightweight. Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); - Buf = Builder.CreateElementBitCast(Buf, Int8Ty); return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } case Builtin::BI__builtin_longjmp: { @@ -3970,12 +4080,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__sync_lock_release_4: case Builtin::BI__sync_lock_release_8: case Builtin::BI__sync_lock_release_16: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Ptr = CheckAtomicAlignment(*this, E); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - StoreSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::Type *ITy = + llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8); llvm::StoreInst *Store = Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, StoreSize); @@ -4030,8 +4139,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); Value *Ptr = EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); - Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); Value *NewVal = Builder.getInt8(1); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa<llvm::ConstantInt>(Order)) { @@ -4113,7 +4220,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); Address Ptr = EmitPointerWithAlignment(E->getArg(0)); - Ptr = Builder.CreateElementBitCast(Ptr, Int8Ty); + Ptr = Ptr.withElementType(Int8Ty); Value *NewVal = Builder.getInt8(0); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa<llvm::ConstantInt>(Order)) { @@ -4549,13 +4656,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_InterlockedCompareExchangePointer: case Builtin::BI_InterlockedCompareExchangePointer_nf: { llvm::Type *RTy; - llvm::IntegerType *IntType = - IntegerType::get(getLLVMContext(), - getContext().getTypeSize(E->getType())); - llvm::Type *IntPtrType = IntType->getPointerTo(); + llvm::IntegerType *IntType = IntegerType::get( + getLLVMContext(), getContext().getTypeSize(E->getType())); - llvm::Value *Destination = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); + llvm::Value *Destination = EmitScalarExpr(E->getArg(0)); llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); RTy = Exchange->getType(); @@ -4674,6 +4778,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BImove: case Builtin::BImove_if_noexcept: case Builtin::BIforward: + case Builtin::BIforward_like: case Builtin::BIas_const: return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__GetExceptionInfo: { @@ -4922,7 +5027,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); llvm::Value *Kernel = - Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); llvm::Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); @@ -4976,7 +5081,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); llvm::Value *Kernel = - Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); llvm::Value *ElemPtr, *TmpSize, *TmpPtr; std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); @@ -5000,8 +5105,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Any calls now have event arguments passed. if (NumArgs >= 7) { - llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); - llvm::PointerType *EventPtrTy = EventTy->getPointerTo( + llvm::PointerType *PtrTy = llvm::PointerType::get( + CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *NumEvents = @@ -5013,33 +5118,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *EventWaitList = nullptr; if (E->getArg(4)->isNullPointerConstant( getContext(), Expr::NPC_ValueDependentIsNotNull)) { - EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy); + EventWaitList = llvm::ConstantPointerNull::get(PtrTy); } else { EventWaitList = E->getArg(4)->getType()->isArrayType() ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() : EmitScalarExpr(E->getArg(4)); // Convert to generic address space. - EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy); + EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy); } llvm::Value *EventRet = nullptr; if (E->getArg(5)->isNullPointerConstant( getContext(), Expr::NPC_ValueDependentIsNotNull)) { - EventRet = llvm::ConstantPointerNull::get(EventPtrTy); + EventRet = llvm::ConstantPointerNull::get(PtrTy); } else { EventRet = - Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy); + Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy); } auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = - Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); llvm::Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); std::vector<llvm::Type *> ArgTys = { - QueueTy, Int32Ty, RangeTy, Int32Ty, - EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; + QueueTy, Int32Ty, RangeTy, Int32Ty, + PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, EventWaitList, EventRet, @@ -5083,7 +5188,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); - Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Kernel = + Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( @@ -5097,7 +5203,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); - Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Kernel = + Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( @@ -5114,7 +5221,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); - Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Kernel = + Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); const char *Name = BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange @@ -5150,7 +5258,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN()) { - if (getLangOpts().OpenMPIsDevice) + if (getLangOpts().OpenMPIsTargetDevice) return EmitOpenMPDevicePrintfCallExpr(E); if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E); @@ -5354,8 +5462,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (PtrTy->getAddressSpace() != ArgValue->getType()->getPointerAddressSpace()) { ArgValue = Builder.CreateAddrSpaceCast( - ArgValue, - ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace())); + ArgValue, llvm::PointerType::get(getLLVMContext(), + PtrTy->getAddressSpace())); } } @@ -5385,7 +5493,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) { if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { V = Builder.CreateAddrSpaceCast( - V, V->getType()->getPointerTo(PtrTy->getAddressSpace())); + V, llvm::PointerType::get(getLLVMContext(), + PtrTy->getAddressSpace())); } } @@ -6643,6 +6752,21 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, }, { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, }, { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, }, + // The mangling rules cause us to have one ID for each type for vldap1(q)_lane + // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an + // arbitrary one to be handled as tha canonical variation. + { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 }, + { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 }, + { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 }, + { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, + { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, + { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, + { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 }, + { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 }, + { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 }, + { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, + { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, + { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, }; #undef NEONMAP0 @@ -6667,11 +6791,29 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #undef SVEMAP1 #undef SVEMAP2 +#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + { \ + #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier \ + } + +#define SMEMAP2(NameBase, TypeModifier) \ + { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier } +static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { +#define GET_SME_LLVM_INTRINSIC_MAP +#include "clang/Basic/arm_sme_builtin_cg.inc" +#undef GET_SME_LLVM_INTRINSIC_MAP +}; + +#undef SMEMAP1 +#undef SMEMAP2 + static bool NEONSIMDIntrinsicsProvenSorted = false; static bool AArch64SIMDIntrinsicsProvenSorted = false; static bool AArch64SISDIntrinsicsProvenSorted = false; static bool AArch64SVEIntrinsicsProvenSorted = false; +static bool AArch64SMEIntrinsicsProvenSorted = false; static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, @@ -7121,7 +7263,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = PoisonValue::get(Ty); - PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); + PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); LoadInst *Ld = Builder.CreateLoad(PtrOp0); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Ops[0] = Builder.CreateInsertElement(V, Ld, CI); @@ -7768,6 +7910,17 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == clang::ARM::BI__builtin_arm_clz || + BuiltinID == clang::ARM::BI__builtin_arm_clz64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); + Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); + if (BuiltinID == clang::ARM::BI__builtin_arm_clz64) + Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); + return Res; + } + + if (BuiltinID == clang::ARM::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); @@ -7900,8 +8053,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::Type *RealResTy = ConvertType(Ty); llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); - llvm::Type *PtrTy = IntTy->getPointerTo(); - LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); + llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext()); Function *F = CGM.getIntrinsic( BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex @@ -7934,7 +8086,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Val = EmitScalarExpr(E->getArg(0)); Builder.CreateStore(Val, Tmp); - Address LdPtr = Builder.CreateElementBitCast(Tmp, STy); + Address LdPtr = Tmp.withElementType(STy); Val = Builder.CreateLoad(LdPtr); Value *Arg0 = Builder.CreateExtractValue(Val, 0); @@ -7949,9 +8101,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *StoreAddr = EmitScalarExpr(E->getArg(1)); QualType Ty = E->getArg(0)->getType(); - llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), - getContext().getTypeSize(Ty)); - StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); + llvm::Type *StoreTy = + llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); if (StoreVal->getType()->isPointerTy()) StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); @@ -8309,7 +8460,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, [[fallthrough]]; case NEON::BI__builtin_neon_vld1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); + PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); Value *Ld = Builder.CreateLoad(PtrOp0); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } @@ -8373,9 +8524,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - auto St = Builder.CreateStore( - Ops[1], Builder.CreateElementBitCast(PtrOp0, Ops[1]->getType())); - return St; + return Builder.CreateStore(Ops[1], + PtrOp0.withElementType(Ops[1]->getType())); } case NEON::BI__builtin_neon_vtbl1_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), @@ -8820,6 +8970,8 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { return Builder.getInt32Ty(); case SVETypeFlags::EltTyInt64: return Builder.getInt64Ty(); + case SVETypeFlags::EltTyInt128: + return Builder.getInt128Ty(); case SVETypeFlags::EltTyFloat16: return Builder.getHalfTy(); @@ -8938,6 +9090,7 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, switch (VTy->getMinNumElements()) { default: llvm_unreachable("unsupported element count!"); + case 1: case 2: case 4: case 8: @@ -9223,13 +9376,9 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, Value *BasePtr = Ops[1]; // Implement the index operand if not omitted. - if (Ops.size() > 3) { - BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo()); + if (Ops.size() > 3) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); - } - // Prefetch intriniscs always expect an i8* - BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty)); Value *PrfOp = Ops.back(); Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType()); @@ -9251,13 +9400,12 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); - Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); + Value *BasePtr = Ops[1]; // Does the load have an offset? if (Ops.size() > 2) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); - BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); auto *Load = cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr})); @@ -9281,7 +9429,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); - Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); + Value *BasePtr = Ops[1]; // Does the store have an offset? if (Ops.size() == 4) @@ -9290,7 +9438,6 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, // Last value is always the data llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); - BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); auto *Store = cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr})); @@ -9299,6 +9446,84 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + +Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + Ops[3] = EmitSVEPredicateCast( + Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); + + SmallVector<Value *> NewOps; + NewOps.push_back(Ops[3]); + + llvm::Value *BasePtr = Ops[4]; + + // If the intrinsic contains the vnum parameter, multiply it with the vector + // size in bytes. + if (Ops.size() == 6) { + Function *StreamingVectorLength = + CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *StreamingVectorLengthCall = + Builder.CreateCall(StreamingVectorLength); + llvm::Value *Mulvl = + Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl"); + // The type of the ptr parameter is void *, so use Int8Ty here. + BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl); + } + NewOps.push_back(BasePtr); + NewOps.push_back(Ops[0]); + NewOps.push_back(EmitTileslice(Ops[2], Ops[1])); + Function *F = CGM.getIntrinsic(IntID); + return Builder.CreateCall(F, NewOps); +} + +Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + auto *VecTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(IntID, VecTy); + if (TypeFlags.isReadZA()) { + Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); + Ops[3] = EmitTileslice(Ops[4], Ops[3]); + Ops.erase(&Ops[4]); + } else if (TypeFlags.isWriteZA()) { + Ops[1] = EmitTileslice(Ops[2], Ops[1]); + Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy); + Ops.erase(&Ops[3]); + } + return Builder.CreateCall(F, Ops); +} + +Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + // svzero_za() intrinsic zeros the entire za tile and has no paramters. + if (Ops.size() == 0) + Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255)); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, Ops); +} + +Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()), + "mulvl"); + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL); + Ops[0] = EmitTileslice(Ops[1], Ops[0]); + Ops.erase(&Ops[1]); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9475,9 +9700,14 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (TypeFlags.isReverseCompare()) std::swap(Ops[1], Ops[2]); - - if (TypeFlags.isReverseUSDOT()) + else if (TypeFlags.isReverseUSDOT()) std::swap(Ops[1], Ops[2]); + else if (TypeFlags.isReverseMergeAnyBinOp() && + TypeFlags.getMergeType() == SVETypeFlags::MergeAny) + std::swap(Ops[1], Ops[2]); + else if (TypeFlags.isReverseMergeAnyAccOp() && + TypeFlags.getMergeType() == SVETypeFlags::MergeAny) + std::swap(Ops[1], Ops[3]); // Predicated intrinsics with _z suffix need a select w/ zeroinitializer. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { @@ -9720,6 +9950,64 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return nullptr; } +Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + // Find out if any arguments are required to be integer constant expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + + llvm::Type *Ty = ConvertType(E->getType()); + llvm::SmallVector<Value *, 4> Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + if ((ICEArguments & (1 << i)) == 0) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + else { + // If this is required to be a constant, constant fold it so that we know + // that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(i)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + + // Immediates for SVE llvm intrinsics are always 32bit. We can safely + // truncate because the immediate has been range checked and no valid + // immediate requires more than a handful of bits. + *Result = Result->extOrTrunc(32); + Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); + } + } + + auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, + AArch64SMEIntrinsicsProvenSorted); + SVETypeFlags TypeFlags(Builtin->TypeModifier); + if (TypeFlags.isLoad() || TypeFlags.isStore()) + return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) + return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za || + BuiltinID == SME::BI__builtin_sme_svzero_za) + return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za || + BuiltinID == SME::BI__builtin_sme_svstr_vnum_za) + return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (Builtin->LLVMIntrinsic != 0) { + // Predicates must match the main datatype. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) + if (PredTy->getElementType()->isIntegerTy(1)) + Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); + + Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, + getSVEOverloadTypes(TypeFlags, Ty, Ops)); + Value *Call = Builder.CreateCall(F, Ops); + return Call; + } + + /// Should not happen + return nullptr; +} + Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -9727,6 +10015,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID <= clang::AArch64::LastSVEBuiltin) return EmitAArch64SVEBuiltinExpr(BuiltinID, E); + if (BuiltinID >= clang::AArch64::FirstSMEBuiltin && + BuiltinID <= clang::AArch64::LastSMEBuiltin) + return EmitAArch64SMEBuiltinExpr(BuiltinID, E); + unsigned HintID = static_cast<unsigned>(-1); switch (BuiltinID) { default: break; @@ -9775,6 +10067,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == clang::AArch64::BI__builtin_arm_clz || + BuiltinID == clang::AArch64::BI__builtin_arm_clz64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); + Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); + if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64) + Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); + return Res; + } + if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, @@ -9929,8 +10231,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *RealResTy = ConvertType(Ty); llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); - llvm::Type *PtrTy = IntTy->getPointerTo(); - LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); + llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext()); Function *F = CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex @@ -9962,7 +10263,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Address Tmp = CreateMemTemp(E->getArg(0)->getType()); EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); - Tmp = Builder.CreateElementBitCast(Tmp, STy); + Tmp = Tmp.withElementType(STy); llvm::Value *Val = Builder.CreateLoad(Tmp); Value *Arg0 = Builder.CreateExtractValue(Val, 0); @@ -9978,9 +10279,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *StoreAddr = EmitScalarExpr(E->getArg(1)); QualType Ty = E->getArg(0)->getType(); - llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), - getContext().getTypeSize(Ty)); - StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); + llvm::Type *StoreTy = + llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); if (StoreVal->getType()->isPointerTy()) StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); @@ -10358,6 +10658,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst1q_v: case NEON::BI__builtin_neon_vst1_lane_v: case NEON::BI__builtin_neon_vst1q_lane_v: + case NEON::BI__builtin_neon_vldap1_lane_s64: + case NEON::BI__builtin_neon_vldap1q_lane_s64: + case NEON::BI__builtin_neon_vstl1_lane_s64: + case NEON::BI__builtin_neon_vstl1q_lane_s64: // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); @@ -10880,14 +11184,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); case NEON::BI__builtin_neon_vfmsh_f16: { - // FIXME: This should be an fneg instruction: - Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); - Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); + Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh"); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, - {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); + {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: @@ -11958,6 +12260,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, PtrOp0.getAlignment()); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); } + case NEON::BI__builtin_neon_vldap1_lane_s64: + case NEON::BI__builtin_neon_vldap1q_lane_s64: { + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Ty = llvm::PointerType::getUnqual(VTy->getElementType()); + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + llvm::LoadInst *LI = Builder.CreateAlignedLoad( + VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); + LI->setAtomic(llvm::AtomicOrdering::Acquire); + Ops[0] = LI; + return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane"); + } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = PoisonValue::get(Ty); @@ -11976,6 +12289,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty), PtrOp0.getAlignment()); + case NEON::BI__builtin_neon_vstl1_lane_s64: + case NEON::BI__builtin_neon_vstl1q_lane_s64: { + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); + Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); + llvm::StoreInst *SI = Builder.CreateAlignedStore( + Ops[1], Builder.CreateBitCast(Ops[0], Ty), PtrOp0.getAlignment()); + SI->setAtomic(llvm::AtomicOrdering::Release); + return SI; + } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: { llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); @@ -14312,7 +14635,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Unaligned nontemporal store of the scalar value. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); - SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); SI->setAlignment(llvm::Align(1)); return SI; } @@ -15750,9 +16073,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // If the user wants the entire vector, just load the entire vector. if (NumBytes == 16) { - Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo()); Value *LD = - Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1))); + Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1))); if (!IsLE) return LD; @@ -15805,7 +16127,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Storing the whole vector, simply store it on BE and reverse bytes and // store on LE. if (Width == 16) { - Value *BC = Builder.CreateBitCast(Op0, Op2->getType()->getPointerTo()); Value *StVec = Op2; if (IsLE) { SmallVector<int, 16> RevMask; @@ -15814,7 +16135,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); } return Builder.CreateStore( - StVec, Address(BC, Op2->getType(), CharUnits::fromQuantity(1))); + StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1))); } auto *ConvTy = Int64Ty; unsigned NumElts = 0; @@ -15842,14 +16163,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); Value *Ptr = Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); - Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo()); Value *Elt = Builder.CreateExtractElement(Vec, EltNo); if (IsLE && Width > 1) { Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); Elt = Builder.CreateCall(F, Elt); } return Builder.CreateStore( - Elt, Address(PtrBC, ConvTy, CharUnits::fromQuantity(1))); + Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1))); }; unsigned Stored = 0; unsigned RemainingBytes = NumBytes; @@ -16469,7 +16789,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // use custom code generation to expand a builtin call with a pointer to a // load (if the corresponding instruction accumulates its result) followed by // the call to the intrinsic and a store of the result. -#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate) \ +#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ case PPC::BI__builtin_##Name: #include "clang/Basic/BuiltinsPPC.def" { @@ -16497,7 +16817,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Vec = Builder.CreateLoad(Addr); Value *Call = Builder.CreateCall(F, {Vec}); llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); - Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo()); + Value *Ptr = Ops[0]; for (unsigned i=0; i<NumVecs; i++) { Value *Vec = Builder.CreateExtractValue(Call, i); llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i); @@ -16519,7 +16839,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } bool Accumulate; switch (BuiltinID) { - #define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \ + #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ case PPC::BI__builtin_##Name: \ ID = Intrinsic::ppc_##Intr; \ Accumulate = Acc; \ @@ -16790,11 +17110,8 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { } auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); - auto *DstTy = - CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace()); - auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy); auto *LD = CGF.Builder.CreateLoad( - Address(Cast, CGF.Int16Ty, CharUnits::fromQuantity(2))); + Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2))); llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); @@ -16813,11 +17130,8 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { // Indexing the HSA kernel_dispatch_packet struct. auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4); auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); - auto *DstTy = - CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace()); - auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy); auto *LD = CGF.Builder.CreateLoad( - Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4))); + Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4))); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; @@ -16950,12 +17264,21 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); + case AMDGPU::BI__builtin_amdgcn_logf: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log); + case AMDGPU::BI__builtin_amdgcn_exp2f: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2); case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: case AMDGPU::BI__builtin_amdgcn_ldexpf: - case AMDGPU::BI__builtin_amdgcn_ldexph: - return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); + case AMDGPU::BI__builtin_amdgcn_ldexph: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); + return Builder.CreateCall(F, {Src0, Src1}); + } case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: @@ -17128,7 +17451,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val}); } case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: { + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: { Intrinsic::ID IID; llvm::Type *ArgTy; switch (BuiltinID) { @@ -17140,6 +17464,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); IID = Intrinsic::amdgcn_ds_fadd; break; + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: + ArgTy = llvm::FixedVectorType::get( + llvm::Type::getHalfTy(getLLVMContext()), 2); + IID = Intrinsic::amdgcn_ds_fadd; + break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -17319,40 +17648,33 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_atomic_inc64: case AMDGPU::BI__builtin_amdgcn_atomic_dec32: case AMDGPU::BI__builtin_amdgcn_atomic_dec64: { - unsigned BuiltinAtomicOp; - llvm::Type *ResultType = ConvertType(E->getType()); - + llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: - BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc; + BinOp = llvm::AtomicRMWInst::UIncWrap; break; case AMDGPU::BI__builtin_amdgcn_atomic_dec32: case AMDGPU::BI__builtin_amdgcn_atomic_dec64: - BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec; + BinOp = llvm::AtomicRMWInst::UDecWrap; break; } Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - llvm::Function *F = - CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()}); - ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); - // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and - // scope as unsigned values - Value *MemOrder = Builder.getInt32(static_cast<int>(AO)); - Value *MemScope = Builder.getInt32(static_cast<int>(SSID)); - QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); bool Volatile = - PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); - Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile)); + PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); - return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile}); + llvm::AtomicRMWInst *RMW = + Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); + if (Volatile) + RMW->setVolatile(true); + return RMW; } case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { @@ -18071,27 +18393,76 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { #undef MMA_VARIANTS_B1_XOR } +static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF, + const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + QualType ArgType = E->getArg(0)->getType(); + clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType); + llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType()); + return CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), + {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())}); +} + +static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF, + const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Type *ElemTy = + CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); + return CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), + {Ptr, CGF.EmitScalarExpr(E->getArg(1))}); +} + +static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS, + CodeGenFunction &CGF, const CallExpr *E, + int SrcSize) { + return E->getNumArgs() == 3 + ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS), + {CGF.EmitScalarExpr(E->getArg(0)), + CGF.EmitScalarExpr(E->getArg(1)), + CGF.EmitScalarExpr(E->getArg(2))}) + : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID), + {CGF.EmitScalarExpr(E->getArg(0)), + CGF.EmitScalarExpr(E->getArg(1))}); +} + +static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID, + const CallExpr *E, CodeGenFunction &CGF) { + auto &C = CGF.CGM.getContext(); + if (!(C.getLangOpts().NativeHalfType || + !C.getTargetInfo().useFP16ConversionIntrinsics())) { + CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() + + " requires native half type support."); + return nullptr; + } + + if (IntrinsicID == Intrinsic::nvvm_ldg_global_f || + IntrinsicID == Intrinsic::nvvm_ldu_global_f) + return MakeLdgLdu(IntrinsicID, CGF, E); + + SmallVector<Value *, 16> Args; + auto *F = CGF.CGM.getIntrinsic(IntrinsicID); + auto *FTy = F->getFunctionType(); + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + C.GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { + assert((ICEArguments & (1 << i)) == 0); + auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i)); + auto *PTy = FTy->getParamType(i); + if (PTy != ArgValue->getType()) + ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy); + Args.push_back(ArgValue); + } + + return CGF.Builder.CreateCall(F, Args); +} } // namespace -Value * -CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - auto MakeLdg = [&](unsigned IntrinsicID) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - QualType ArgType = E->getArg(0)->getType(); - clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(ArgType); - llvm::Type *ElemTy = ConvertTypeForMem(ArgType->getPointeeType()); - return Builder.CreateCall( - CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), - {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); - }; - auto MakeScopedAtomic = [&](unsigned IntrinsicID) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - llvm::Type *ElemTy = - ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); - return Builder.CreateCall( - CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), - {Ptr, EmitScalarExpr(E->getArg(1))}); - }; +Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { switch (BuiltinID) { case NVPTX::BI__nvvm_atom_add_gen_i: case NVPTX::BI__nvvm_atom_add_gen_l: @@ -18175,8 +18546,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { } case NVPTX::BI__nvvm_ldg_c: + case NVPTX::BI__nvvm_ldg_sc: case NVPTX::BI__nvvm_ldg_c2: + case NVPTX::BI__nvvm_ldg_sc2: case NVPTX::BI__nvvm_ldg_c4: + case NVPTX::BI__nvvm_ldg_sc4: case NVPTX::BI__nvvm_ldg_s: case NVPTX::BI__nvvm_ldg_s2: case NVPTX::BI__nvvm_ldg_s4: @@ -18184,6 +18558,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { case NVPTX::BI__nvvm_ldg_i2: case NVPTX::BI__nvvm_ldg_i4: case NVPTX::BI__nvvm_ldg_l: + case NVPTX::BI__nvvm_ldg_l2: case NVPTX::BI__nvvm_ldg_ll: case NVPTX::BI__nvvm_ldg_ll2: case NVPTX::BI__nvvm_ldg_uc: @@ -18196,101 +18571,139 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { case NVPTX::BI__nvvm_ldg_ui2: case NVPTX::BI__nvvm_ldg_ui4: case NVPTX::BI__nvvm_ldg_ul: + case NVPTX::BI__nvvm_ldg_ul2: case NVPTX::BI__nvvm_ldg_ull: case NVPTX::BI__nvvm_ldg_ull2: // PTX Interoperability section 2.2: "For a vector with an even number of // elements, its alignment is set to number of elements times the alignment // of its member: n*alignof(t)." - return MakeLdg(Intrinsic::nvvm_ldg_global_i); + return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E); case NVPTX::BI__nvvm_ldg_f: case NVPTX::BI__nvvm_ldg_f2: case NVPTX::BI__nvvm_ldg_f4: case NVPTX::BI__nvvm_ldg_d: case NVPTX::BI__nvvm_ldg_d2: - return MakeLdg(Intrinsic::nvvm_ldg_global_f); + return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E); + + case NVPTX::BI__nvvm_ldu_c: + case NVPTX::BI__nvvm_ldu_sc: + case NVPTX::BI__nvvm_ldu_c2: + case NVPTX::BI__nvvm_ldu_sc2: + case NVPTX::BI__nvvm_ldu_c4: + case NVPTX::BI__nvvm_ldu_sc4: + case NVPTX::BI__nvvm_ldu_s: + case NVPTX::BI__nvvm_ldu_s2: + case NVPTX::BI__nvvm_ldu_s4: + case NVPTX::BI__nvvm_ldu_i: + case NVPTX::BI__nvvm_ldu_i2: + case NVPTX::BI__nvvm_ldu_i4: + case NVPTX::BI__nvvm_ldu_l: + case NVPTX::BI__nvvm_ldu_l2: + case NVPTX::BI__nvvm_ldu_ll: + case NVPTX::BI__nvvm_ldu_ll2: + case NVPTX::BI__nvvm_ldu_uc: + case NVPTX::BI__nvvm_ldu_uc2: + case NVPTX::BI__nvvm_ldu_uc4: + case NVPTX::BI__nvvm_ldu_us: + case NVPTX::BI__nvvm_ldu_us2: + case NVPTX::BI__nvvm_ldu_us4: + case NVPTX::BI__nvvm_ldu_ui: + case NVPTX::BI__nvvm_ldu_ui2: + case NVPTX::BI__nvvm_ldu_ui4: + case NVPTX::BI__nvvm_ldu_ul: + case NVPTX::BI__nvvm_ldu_ul2: + case NVPTX::BI__nvvm_ldu_ull: + case NVPTX::BI__nvvm_ldu_ull2: + return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E); + case NVPTX::BI__nvvm_ldu_f: + case NVPTX::BI__nvvm_ldu_f2: + case NVPTX::BI__nvvm_ldu_f4: + case NVPTX::BI__nvvm_ldu_d: + case NVPTX::BI__nvvm_ldu_d2: + return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E); case NVPTX::BI__nvvm_atom_cta_add_gen_i: case NVPTX::BI__nvvm_atom_cta_add_gen_l: case NVPTX::BI__nvvm_atom_cta_add_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_add_gen_i: case NVPTX::BI__nvvm_atom_sys_add_gen_l: case NVPTX::BI__nvvm_atom_sys_add_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_add_gen_f: case NVPTX::BI__nvvm_atom_cta_add_gen_d: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_add_gen_f: case NVPTX::BI__nvvm_atom_sys_add_gen_d: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_max_gen_i: case NVPTX::BI__nvvm_atom_cta_max_gen_ui: case NVPTX::BI__nvvm_atom_cta_max_gen_l: case NVPTX::BI__nvvm_atom_cta_max_gen_ul: case NVPTX::BI__nvvm_atom_cta_max_gen_ll: case NVPTX::BI__nvvm_atom_cta_max_gen_ull: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_max_gen_i: case NVPTX::BI__nvvm_atom_sys_max_gen_ui: case NVPTX::BI__nvvm_atom_sys_max_gen_l: case NVPTX::BI__nvvm_atom_sys_max_gen_ul: case NVPTX::BI__nvvm_atom_sys_max_gen_ll: case NVPTX::BI__nvvm_atom_sys_max_gen_ull: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_min_gen_i: case NVPTX::BI__nvvm_atom_cta_min_gen_ui: case NVPTX::BI__nvvm_atom_cta_min_gen_l: case NVPTX::BI__nvvm_atom_cta_min_gen_ul: case NVPTX::BI__nvvm_atom_cta_min_gen_ll: case NVPTX::BI__nvvm_atom_cta_min_gen_ull: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_min_gen_i: case NVPTX::BI__nvvm_atom_sys_min_gen_ui: case NVPTX::BI__nvvm_atom_sys_min_gen_l: case NVPTX::BI__nvvm_atom_sys_min_gen_ul: case NVPTX::BI__nvvm_atom_sys_min_gen_ll: case NVPTX::BI__nvvm_atom_sys_min_gen_ull: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_and_gen_i: case NVPTX::BI__nvvm_atom_cta_and_gen_l: case NVPTX::BI__nvvm_atom_cta_and_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_and_gen_i: case NVPTX::BI__nvvm_atom_sys_and_gen_l: case NVPTX::BI__nvvm_atom_sys_and_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_or_gen_i: case NVPTX::BI__nvvm_atom_cta_or_gen_l: case NVPTX::BI__nvvm_atom_cta_or_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_or_gen_i: case NVPTX::BI__nvvm_atom_sys_or_gen_l: case NVPTX::BI__nvvm_atom_sys_or_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_xor_gen_i: case NVPTX::BI__nvvm_atom_cta_xor_gen_l: case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_xor_gen_i: case NVPTX::BI__nvvm_atom_sys_xor_gen_l: case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: - return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); + return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_cas_gen_i: case NVPTX::BI__nvvm_atom_cta_cas_gen_l: case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { @@ -18555,6 +18968,243 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { CharUnits::fromQuantity(4)); return Result; } + // The following builtins require half type support + case NVPTX::BI__nvvm_ex2_approx_f16: + return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ex2_approx_f16x2: + return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ff2f16x2_rn: + return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ff2f16x2_rn_relu: + return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ff2f16x2_rz: + return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ff2f16x2_rz_relu: + return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_f16: + return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_f16x2: + return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_ftz_f16: + return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_ftz_f16x2: + return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16: + return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2: + return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16: + return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2: + return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fma_rn_relu_f16: + return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_relu_f16x2: + return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_sat_f16: + return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fma_rn_sat_f16x2: + return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_ftz_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_ftz_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_ftz_nan_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID, + E, *this); + case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2, + BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID, + E, *this); + case NVPTX::BI__nvvm_fmax_nan_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_nan_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID, + E, *this); + case NVPTX::BI__nvvm_fmax_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmin_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_ftz_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_ftz_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_ftz_nan_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID, + E, *this); + case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2, + BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID, + E, *this); + case NVPTX::BI__nvvm_fmin_nan_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_nan_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID, + E, *this); + case NVPTX::BI__nvvm_fmin_xorsign_abs_f16: + return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2: + return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, + *this); + case NVPTX::BI__nvvm_ldg_h: + return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ldg_h2: + return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ldu_h: + return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); + case NVPTX::BI__nvvm_ldu_h2: { + return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); + } + case NVPTX::BI__nvvm_cp_async_ca_shared_global_4: + return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4, + Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E, + 4); + case NVPTX::BI__nvvm_cp_async_ca_shared_global_8: + return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8, + Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E, + 8); + case NVPTX::BI__nvvm_cp_async_ca_shared_global_16: + return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16, + Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E, + 16); + case NVPTX::BI__nvvm_cp_async_cg_shared_global_16: + return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16, + Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E, + 16); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank)); + case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank)); + case NVPTX::BI__nvvm_is_explicit_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster)); + case NVPTX::BI__nvvm_isspacep_shared_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster), + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_mapa: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_mapa), + {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + case NVPTX::BI__nvvm_mapa_shared_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster), + {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); + case NVPTX::BI__nvvm_getctarank: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_getctarank), + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_getctarank_shared_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster), + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_barrier_cluster_arrive: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive)); + case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed)); + case NVPTX::BI__nvvm_barrier_cluster_wait: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait)); + case NVPTX::BI__nvvm_fence_sc_cluster: + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster)); default: return nullptr; } @@ -18633,15 +19283,14 @@ RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); // The result must point to the same underlying allocation. This means we // can use an inbounds GEP to enable better optimization. - Value *Base = EmitCastToVoidPtr(Args.Src); if (getLangOpts().isSignedOverflowDefined()) - Result = Builder.CreateGEP(Int8Ty, Base, Difference, "aligned_result"); + Result = + Builder.CreateGEP(Int8Ty, Args.Src, Difference, "aligned_result"); else - Result = EmitCheckedInBoundsGEP(Int8Ty, Base, Difference, + Result = EmitCheckedInBoundsGEP(Int8Ty, Args.Src, Difference, /*SignedIndices=*/true, /*isSubtraction=*/!AlignUp, E->getExprLoc(), "aligned_result"); - Result = Builder.CreatePointerCast(Result, Args.SrcType); // Emit an alignment assumption to ensure that the new alignment is // propagated to loads/stores, etc. emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); @@ -18823,6 +19472,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, Value); } + case WebAssembly::BI__builtin_wasm_ref_null_extern: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern); + return Builder.CreateCall(Callee); + } + case WebAssembly::BI__builtin_wasm_ref_null_func: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_swizzle_i8x16: { Value *Src = EmitScalarExpr(E->getArg(0)); Value *Indices = EmitScalarExpr(E->getArg(1)); @@ -19188,6 +19845,88 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); return Builder.CreateCall(Callee, {LHS, RHS, Acc}); } + case WebAssembly::BI__builtin_wasm_table_get: { + assert(E->getArg(0)->getType()->isArrayType()); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Index = EmitScalarExpr(E->getArg(1)); + Function *Callee; + if (E->getType().isWebAssemblyExternrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref); + else if (E->getType().isWebAssemblyFuncrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref); + else + llvm_unreachable( + "Unexpected reference type for __builtin_wasm_table_get"); + return Builder.CreateCall(Callee, {Table, Index}); + } + case WebAssembly::BI__builtin_wasm_table_set: { + assert(E->getArg(0)->getType()->isArrayType()); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Index = EmitScalarExpr(E->getArg(1)); + Value *Val = EmitScalarExpr(E->getArg(2)); + Function *Callee; + if (E->getArg(2)->getType().isWebAssemblyExternrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref); + else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref); + else + llvm_unreachable( + "Unexpected reference type for __builtin_wasm_table_set"); + return Builder.CreateCall(Callee, {Table, Index, Val}); + } + case WebAssembly::BI__builtin_wasm_table_size: { + assert(E->getArg(0)->getType()->isArrayType()); + Value *Value = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size); + return Builder.CreateCall(Callee, Value); + } + case WebAssembly::BI__builtin_wasm_table_grow: { + assert(E->getArg(0)->getType()->isArrayType()); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *NElems = EmitScalarExpr(E->getArg(2)); + + Function *Callee; + if (E->getArg(1)->getType().isWebAssemblyExternrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref); + else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); + else + llvm_unreachable( + "Unexpected reference type for __builtin_wasm_table_grow"); + + return Builder.CreateCall(Callee, {Table, Val, NElems}); + } + case WebAssembly::BI__builtin_wasm_table_fill: { + assert(E->getArg(0)->getType()->isArrayType()); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Index = EmitScalarExpr(E->getArg(1)); + Value *Val = EmitScalarExpr(E->getArg(2)); + Value *NElems = EmitScalarExpr(E->getArg(3)); + + Function *Callee; + if (E->getArg(2)->getType().isWebAssemblyExternrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref); + else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) + Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); + else + llvm_unreachable( + "Unexpected reference type for __builtin_wasm_table_fill"); + + return Builder.CreateCall(Callee, {Table, Index, Val, NElems}); + } + case WebAssembly::BI__builtin_wasm_table_copy: { + assert(E->getArg(0)->getType()->isArrayType()); + Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).getPointer(); + Value *DstIdx = EmitScalarExpr(E->getArg(2)); + Value *SrcIdx = EmitScalarExpr(E->getArg(3)); + Value *NElems = EmitScalarExpr(E->getArg(4)); + + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy); + + return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems}); + } default: return nullptr; } @@ -19278,8 +20017,7 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, // generate one (NewBase). The new base address needs to be stored. llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1) : Result; - llvm::Value *LV = Builder.CreateBitCast( - EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo()); + llvm::Value *LV = EmitScalarExpr(E->getArg(0)); Address Dest = EmitPointerWithAlignment(E->getArg(0)); llvm::Value *RetVal = Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); @@ -19320,9 +20058,7 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, // to be handled with stores of respective destination type. DestVal = Builder.CreateTrunc(DestVal, DestTy); - llvm::Value *DestForStore = - Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo()); - Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment()); + Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment()); // The updated value of the base pointer is returned. return Builder.CreateExtractValue(Result, 1); }; @@ -19350,8 +20086,8 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { // Get the type from the 0-th argument. llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); - Address PredAddr = Builder.CreateElementBitCast( - EmitPointerWithAlignment(E->getArg(2)), VecType); + Address PredAddr = + EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr)); llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); @@ -19370,8 +20106,8 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: { // Get the type from the 0-th argument. llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); - Address PredAddr = Builder.CreateElementBitCast( - EmitPointerWithAlignment(E->getArg(2)), VecType); + Address PredAddr = + EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); @@ -19465,7 +20201,20 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, assert(Error == ASTContext::GE_None && "Unexpected error"); } + if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load) + ICEArguments |= (1 << 1); + if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store) + ICEArguments |= (1 << 2); + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + // Handle aggregate argument, namely RVV tuple types in segment load/store + if (hasAggregateEvaluationKind(E->getArg(i)->getType())) { + LValue L = EmitAggExprToLValue(E->getArg(i)); + llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this)); + Ops.push_back(AggValue); + continue; + } + // If this is a normal argument, just emit it as a scalar. if ((ICEArguments & (1 << i)) == 0) { Ops.push_back(EmitScalarExpr(E->getArg(i))); @@ -19497,12 +20246,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_clz_64: case RISCV::BI__builtin_riscv_ctz_32: case RISCV::BI__builtin_riscv_ctz_64: - case RISCV::BI__builtin_riscv_clmul: - case RISCV::BI__builtin_riscv_clmulh: - case RISCV::BI__builtin_riscv_clmulr: - case RISCV::BI__builtin_riscv_xperm4: - case RISCV::BI__builtin_riscv_xperm8: - case RISCV::BI__builtin_riscv_brev8: + case RISCV::BI__builtin_riscv_clmul_32: + case RISCV::BI__builtin_riscv_clmul_64: + case RISCV::BI__builtin_riscv_clmulh_32: + case RISCV::BI__builtin_riscv_clmulh_64: + case RISCV::BI__builtin_riscv_clmulr_32: + case RISCV::BI__builtin_riscv_clmulr_64: + case RISCV::BI__builtin_riscv_xperm4_32: + case RISCV::BI__builtin_riscv_xperm4_64: + case RISCV::BI__builtin_riscv_xperm8_32: + case RISCV::BI__builtin_riscv_xperm8_64: + case RISCV::BI__builtin_riscv_brev8_32: + case RISCV::BI__builtin_riscv_brev8_64: case RISCV::BI__builtin_riscv_zip_32: case RISCV::BI__builtin_riscv_unzip_32: { switch (BuiltinID) { @@ -19515,35 +20270,49 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_clz_32: case RISCV::BI__builtin_riscv_clz_64: { Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return Result; } case RISCV::BI__builtin_riscv_ctz_32: case RISCV::BI__builtin_riscv_ctz_64: { Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return Result; } // Zbc - case RISCV::BI__builtin_riscv_clmul: + case RISCV::BI__builtin_riscv_clmul_32: + case RISCV::BI__builtin_riscv_clmul_64: ID = Intrinsic::riscv_clmul; break; - case RISCV::BI__builtin_riscv_clmulh: + case RISCV::BI__builtin_riscv_clmulh_32: + case RISCV::BI__builtin_riscv_clmulh_64: ID = Intrinsic::riscv_clmulh; break; - case RISCV::BI__builtin_riscv_clmulr: + case RISCV::BI__builtin_riscv_clmulr_32: + case RISCV::BI__builtin_riscv_clmulr_64: ID = Intrinsic::riscv_clmulr; break; // Zbkx - case RISCV::BI__builtin_riscv_xperm8: + case RISCV::BI__builtin_riscv_xperm8_32: + case RISCV::BI__builtin_riscv_xperm8_64: ID = Intrinsic::riscv_xperm8; break; - case RISCV::BI__builtin_riscv_xperm4: + case RISCV::BI__builtin_riscv_xperm4_32: + case RISCV::BI__builtin_riscv_xperm4_64: ID = Intrinsic::riscv_xperm4; break; // Zbkb - case RISCV::BI__builtin_riscv_brev8: + case RISCV::BI__builtin_riscv_brev8_32: + case RISCV::BI__builtin_riscv_brev8_64: ID = Intrinsic::riscv_brev8; break; case RISCV::BI__builtin_riscv_zip_32: @@ -19560,115 +20329,88 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, // Zk builtins - // Zknd - case RISCV::BI__builtin_riscv_aes32dsi_32: - ID = Intrinsic::riscv_aes32dsi; - break; - case RISCV::BI__builtin_riscv_aes32dsmi_32: - ID = Intrinsic::riscv_aes32dsmi; - break; - case RISCV::BI__builtin_riscv_aes64ds_64: - ID = Intrinsic::riscv_aes64ds; - break; - case RISCV::BI__builtin_riscv_aes64dsm_64: - ID = Intrinsic::riscv_aes64dsm; - break; - case RISCV::BI__builtin_riscv_aes64im_64: - ID = Intrinsic::riscv_aes64im; - break; - - // Zkne - case RISCV::BI__builtin_riscv_aes32esi_32: - ID = Intrinsic::riscv_aes32esi; - break; - case RISCV::BI__builtin_riscv_aes32esmi_32: - ID = Intrinsic::riscv_aes32esmi; - break; - case RISCV::BI__builtin_riscv_aes64es_64: - ID = Intrinsic::riscv_aes64es; - break; - case RISCV::BI__builtin_riscv_aes64esm_64: - ID = Intrinsic::riscv_aes64esm; - break; - - // Zknd & Zkne - case RISCV::BI__builtin_riscv_aes64ks1i_64: - ID = Intrinsic::riscv_aes64ks1i; - break; - case RISCV::BI__builtin_riscv_aes64ks2_64: - ID = Intrinsic::riscv_aes64ks2; - break; - // Zknh case RISCV::BI__builtin_riscv_sha256sig0: ID = Intrinsic::riscv_sha256sig0; - IntrinsicTypes = {ResultType}; break; case RISCV::BI__builtin_riscv_sha256sig1: ID = Intrinsic::riscv_sha256sig1; - IntrinsicTypes = {ResultType}; break; case RISCV::BI__builtin_riscv_sha256sum0: ID = Intrinsic::riscv_sha256sum0; - IntrinsicTypes = {ResultType}; break; case RISCV::BI__builtin_riscv_sha256sum1: ID = Intrinsic::riscv_sha256sum1; - IntrinsicTypes = {ResultType}; - break; - case RISCV::BI__builtin_riscv_sha512sig0_64: - ID = Intrinsic::riscv_sha512sig0; - break; - case RISCV::BI__builtin_riscv_sha512sig0h_32: - ID = Intrinsic::riscv_sha512sig0h; - break; - case RISCV::BI__builtin_riscv_sha512sig0l_32: - ID = Intrinsic::riscv_sha512sig0l; - break; - case RISCV::BI__builtin_riscv_sha512sig1_64: - ID = Intrinsic::riscv_sha512sig1; - break; - case RISCV::BI__builtin_riscv_sha512sig1h_32: - ID = Intrinsic::riscv_sha512sig1h; - break; - case RISCV::BI__builtin_riscv_sha512sig1l_32: - ID = Intrinsic::riscv_sha512sig1l; - break; - case RISCV::BI__builtin_riscv_sha512sum0_64: - ID = Intrinsic::riscv_sha512sum0; - break; - case RISCV::BI__builtin_riscv_sha512sum0r_32: - ID = Intrinsic::riscv_sha512sum0r; - break; - case RISCV::BI__builtin_riscv_sha512sum1_64: - ID = Intrinsic::riscv_sha512sum1; - break; - case RISCV::BI__builtin_riscv_sha512sum1r_32: - ID = Intrinsic::riscv_sha512sum1r; break; // Zksed case RISCV::BI__builtin_riscv_sm4ks: ID = Intrinsic::riscv_sm4ks; - IntrinsicTypes = {ResultType}; break; case RISCV::BI__builtin_riscv_sm4ed: ID = Intrinsic::riscv_sm4ed; - IntrinsicTypes = {ResultType}; break; // Zksh case RISCV::BI__builtin_riscv_sm3p0: ID = Intrinsic::riscv_sm3p0; - IntrinsicTypes = {ResultType}; break; case RISCV::BI__builtin_riscv_sm3p1: ID = Intrinsic::riscv_sm3p1; - IntrinsicTypes = {ResultType}; break; + // Zihintntl + case RISCV::BI__builtin_riscv_ntl_load: { + llvm::Type *ResTy = ConvertType(E->getType()); + ConstantInt *Mode = cast<ConstantInt>(Ops[1]); + + llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( + getLLVMContext(), + llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue()))); + llvm::MDNode *NontemporalNode = llvm::MDNode::get( + getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + + int Width; + if(ResTy->isScalableTy()) { + const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy); + llvm::Type *ScalarTy = ResTy->getScalarType(); + Width = ScalarTy->getPrimitiveSizeInBits() * + SVTy->getElementCount().getKnownMinValue(); + } else + Width = ResTy->getPrimitiveSizeInBits(); + LoadInst *Load = Builder.CreateLoad( + Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8))); + + Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); + Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), + RISCVDomainNode); + + return Load; + } + case RISCV::BI__builtin_riscv_ntl_store: { + ConstantInt *Mode = cast<ConstantInt>(Ops[2]); + + llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( + getLLVMContext(), + llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue()))); + llvm::MDNode *NontemporalNode = llvm::MDNode::get( + getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + + Value *BC = Builder.CreateBitCast( + Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()), "cast"); + + StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], BC); + Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); + Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), + RISCVDomainNode); + + return Store; + } + // Vector builtins are handled from here. #include "clang/Basic/riscv_vector_builtin_cg.inc" + // SiFive Vector builtins are handled from here. +#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc" } assert(ID != Intrinsic::not_intrinsic); diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index bb887df3e4e0..08769c98dc29 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/ReplaceConstant.h" #include "llvm/Support/Format.h" +#include "llvm/Support/VirtualFileSystem.h" using namespace clang; using namespace CodeGen; @@ -236,7 +237,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); - VoidPtrPtrTy = VoidPtrTy->getPointerTo(); + VoidPtrPtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); } llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { @@ -267,10 +268,8 @@ llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const { } llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { - auto *CallbackFnTy = getCallbackFnTy(); - auto *RegisterGlobalsFnTy = getRegisterGlobalsFnTy(); - llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy, - VoidPtrTy, CallbackFnTy->getPointerTo()}; + llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), VoidPtrTy, + VoidPtrTy, llvm::PointerType::getUnqual(Context)}; return llvm::FunctionType::get(VoidTy, Params, false); } @@ -359,9 +358,13 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); std::string KernelLaunchAPI = "LaunchKernel"; - if (CGF.getLangOpts().HIP && CGF.getLangOpts().GPUDefaultStream == - LangOptions::GPUDefaultStreamKind::PerThread) - KernelLaunchAPI = KernelLaunchAPI + "_spt"; + if (CGF.getLangOpts().GPUDefaultStream == + LangOptions::GPUDefaultStreamKind::PerThread) { + if (CGF.getLangOpts().HIP) + KernelLaunchAPI = KernelLaunchAPI + "_spt"; + else if (CGF.getLangOpts().CUDA) + KernelLaunchAPI = KernelLaunchAPI + "_ptsz"; + } auto LaunchKernelName = addPrefixToName(KernelLaunchAPI); IdentifierInfo &cudaLaunchKernelII = CGM.getContext().Idents.get(LaunchKernelName); @@ -536,8 +539,11 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { // void __cudaRegisterFunction(void **, const char *, char *, const char *, // int, uint3*, uint3*, dim3*, dim3*, int*) llvm::Type *RegisterFuncParams[] = { - VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, - VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; + VoidPtrPtrTy, CharPtrTy, + CharPtrTy, CharPtrTy, + IntTy, VoidPtrTy, + VoidPtrTy, VoidPtrTy, + VoidPtrTy, llvm::PointerType::getUnqual(Context)}; llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterFuncParams, false), addUnderscoredPrefixToName("RegisterFunction")); @@ -560,7 +566,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { NullPtr, NullPtr, NullPtr, - llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; + llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Context))}; Builder.CreateCall(RegisterFunc, Args); } @@ -721,8 +727,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // handle so CUDA runtime can figure out what to call on the GPU side. std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr; if (!CudaGpuBinaryFileName.empty()) { - llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName); + auto VFS = CGM.getFileSystem(); + auto CudaGpuBinaryOrErr = + VFS->getBufferForFile(CudaGpuBinaryFileName, -1, false); if (std::error_code EC = CudaGpuBinaryOrErr.getError()) { CGM.getDiags().Report(diag::err_cannot_open_file) << CudaGpuBinaryFileName << EC.message(); @@ -1195,8 +1202,23 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, GlobalDecl GD) { auto Loc = KernelHandles.find(F->getName()); - if (Loc != KernelHandles.end()) - return Loc->second; + if (Loc != KernelHandles.end()) { + auto OldHandle = Loc->second; + if (KernelStubs[OldHandle] == F) + return OldHandle; + + // We've found the function name, but F itself has changed, so we need to + // update the references. + if (CGM.getLangOpts().HIP) { + // For HIP compilation the handle itself does not change, so we only need + // to update the Stub value. + KernelStubs[OldHandle] = F; + return OldHandle; + } + // For non-HIP compilation, erase the old Stub and fall-through to creating + // new entries. + KernelStubs.erase(OldHandle); + } if (!CGM.getLangOpts().HIP) { KernelHandles[F->getName()] = F; diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index 86f548191d65..110e21f7cb6d 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -131,17 +131,10 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { if (Replacements.count(MangledName)) return false; - // Derive the type for the alias. llvm::Type *AliasValueType = getTypes().GetFunctionType(AliasDecl); - llvm::PointerType *AliasType = AliasValueType->getPointerTo(); - // Find the referent. Some aliases might require a bitcast, in - // which case the caller is responsible for ensuring the soundness - // of these semantics. - auto *Ref = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl)); - llvm::Constant *Aliasee = Ref; - if (Ref->getType() != AliasType) - Aliasee = llvm::ConstantExpr::getBitCast(Ref, AliasType); + // Find the referent. + auto *Aliasee = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl)); // Instead of creating as alias to a linkonce_odr, replace all of the uses // of the aliasee. @@ -170,7 +163,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // If we don't have a definition for the destructor yet or the definition is // avaialable_externally, don't emit an alias. We can't emit aliases to // declarations; that's just not how aliases work. - if (Ref->isDeclarationForLinker()) + if (Aliasee->isDeclarationForLinker()) return true; // Don't create an alias to a linker weak symbol. This avoids producing @@ -189,7 +182,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // Switch any previous uses to the alias. if (Entry) { - assert(Entry->getType() == AliasType && + assert(Entry->getValueType() == AliasValueType && + Entry->getAddressSpace() == Alias->getAddressSpace() && "declaration exists with different type"); Alias->takeName(Entry); Entry->replaceAllUsesWith(Alias); @@ -252,8 +246,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, "No kext in Microsoft ABI"); CodeGenModule &CGM = CGF.CGM; llvm::Value *VTable = CGM.getCXXABI().getAddrOfVTable(RD, CharUnits()); - Ty = Ty->getPointerTo(); - VTable = CGF.Builder.CreateBitCast(VTable, Ty->getPointerTo()); + Ty = llvm::PointerType::getUnqual(CGM.getLLVMContext()); assert(VTable && "BuildVirtualCall = kext vtbl pointer is null"); uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD); const VTableLayout &VTLayout = CGM.getItaniumVTableContext().getVTableLayout(RD); diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index 42e6c916bed0..7b77dd7875bc 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -46,11 +46,8 @@ CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer( ThisPtrForCall = This.getPointer(); const auto *FPT = MPT->getPointeeType()->castAs<FunctionProtoType>(); - const auto *RD = - cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( - CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); - llvm::Constant *FnPtr = llvm::Constant::getNullValue(FTy->getPointerTo()); + llvm::Constant *FnPtr = llvm::Constant::getNullValue( + llvm::PointerType::getUnqual(CGM.getLLVMContext())); return CGCallee::forDirect(FnPtr, FPT); } @@ -59,8 +56,8 @@ CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr, const MemberPointerType *MPT) { ErrorUnsupportedABI(CGF, "loads of member pointers"); - llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType()) - ->getPointerTo(Base.getAddressSpace()); + llvm::Type *Ty = + llvm::PointerType::get(CGF.getLLVMContext(), Base.getAddressSpace()); return llvm::Constant::getNullValue(Ty); } @@ -250,7 +247,7 @@ void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr, llvm::Value *&numElements, llvm::Value *&allocPtr, CharUnits &cookieSize) { // Derive a char* in the same address space as the pointer. - ptr = CGF.Builder.CreateElementBitCast(ptr, CGF.Int8Ty); + ptr = ptr.withElementType(CGF.Int8Ty); // If we don't need an array cookie, bail out early. if (!requiresArrayCookie(expr, eltTy)) { diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index a600768b2074..ad1ad08d0856 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -287,16 +287,26 @@ public: virtual bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr, QualType SrcRecordTy) = 0; + virtual bool shouldEmitExactDynamicCast(QualType DestRecordTy) = 0; - virtual llvm::Value * - EmitDynamicCastCall(CodeGenFunction &CGF, Address Value, - QualType SrcRecordTy, QualType DestTy, - QualType DestRecordTy, llvm::BasicBlock *CastEnd) = 0; + virtual llvm::Value *emitDynamicCastCall(CodeGenFunction &CGF, Address Value, + QualType SrcRecordTy, + QualType DestTy, + QualType DestRecordTy, + llvm::BasicBlock *CastEnd) = 0; - virtual llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, + virtual llvm::Value *emitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, - QualType SrcRecordTy, - QualType DestTy) = 0; + QualType SrcRecordTy) = 0; + + /// Emit a dynamic_cast from SrcRecordTy to DestRecordTy. The cast fails if + /// the dynamic type of Value is not exactly DestRecordTy. + virtual llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address Value, + QualType SrcRecordTy, + QualType DestTy, + QualType DestRecordTy, + llvm::BasicBlock *CastSuccess, + llvm::BasicBlock *CastFail) = 0; virtual bool EmitBadCastCall(CodeGenFunction &CGF) = 0; @@ -379,9 +389,8 @@ public: /// zero if no specific type is applicable, e.g. if the ABI expects the "this" /// parameter to point to some artificial offset in a complete object due to /// vbases being reordered. - virtual const CXXRecordDecl * - getThisArgumentTypeForMethod(const CXXMethodDecl *MD) { - return MD->getParent(); + virtual const CXXRecordDecl *getThisArgumentTypeForMethod(GlobalDecl GD) { + return cast<CXXMethodDecl>(GD.getDecl())->getParent(); } /// Perform ABI-specific "this" argument adjustment required prior to diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index dfa552161d7c..bd272e016e92 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -25,13 +25,13 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/Basic/CodeGenOptions.h" -#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -322,7 +322,9 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { SmallVector<CanQualType, 16> argTypes; SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - argTypes.push_back(DeriveThisType(MD->getParent(), MD)); + + const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(GD); + argTypes.push_back(DeriveThisType(ThisType, MD)); bool PassParams = true; @@ -1284,7 +1286,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // // FIXME: Assert that we aren't truncating non-padding bits when have access // to that information. - Src = CGF.Builder.CreateElementBitCast(Src, Ty); + Src = Src.withElementType(Ty); return CGF.Builder.CreateLoad(Src); } @@ -1309,7 +1311,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, auto *UndefVec = llvm::UndefValue::get(ScalableDst); auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); llvm::Value *Result = CGF.Builder.CreateInsertVector( - ScalableDst, UndefVec, Load, Zero, "castScalableSve"); + ScalableDst, UndefVec, Load, Zero, "cast.scalable"); if (NeedsBitcast) Result = CGF.Builder.CreateBitCast(Result, OrigType); return Result; @@ -1394,7 +1396,7 @@ static void CreateCoercedStore(llvm::Value *Src, if (isa<llvm::ScalableVectorType>(SrcTy) || isa<llvm::ScalableVectorType>(DstTy) || SrcSize.getFixedValue() <= DstSize.getFixedValue()) { - Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); + Dst = Dst.withElementType(SrcTy); CGF.EmitAggregateStore(Src, Dst, DstIsVolatile); } else { // Otherwise do coercion through memory. This is stupid, but @@ -1418,10 +1420,10 @@ static void CreateCoercedStore(llvm::Value *Src, static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr, const ABIArgInfo &info) { if (unsigned offset = info.getDirectOffset()) { - addr = CGF.Builder.CreateElementBitCast(addr, CGF.Int8Ty); + addr = addr.withElementType(CGF.Int8Ty); addr = CGF.Builder.CreateConstInBoundsByteGEP(addr, CharUnits::fromQuantity(offset)); - addr = CGF.Builder.CreateElementBitCast(addr, info.getCoerceToType()); + addr = addr.withElementType(info.getCoerceToType()); } return addr; } @@ -1636,9 +1638,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { if (retAI.getInAllocaSRet()) { // sret things on win32 aren't void, they return the sret pointer. QualType ret = FI.getReturnType(); - llvm::Type *ty = ConvertType(ret); unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret); - resultType = llvm::PointerType::get(ty, addressSpace); + resultType = llvm::PointerType::get(getLLVMContext(), addressSpace); } else { resultType = llvm::Type::getVoidTy(getLLVMContext()); } @@ -1660,18 +1661,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // Add type for sret argument. if (IRFunctionArgs.hasSRetArg()) { QualType Ret = FI.getReturnType(); - llvm::Type *Ty = ConvertType(Ret); unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret); ArgTypes[IRFunctionArgs.getSRetArgNo()] = - llvm::PointerType::get(Ty, AddressSpace); + llvm::PointerType::get(getLLVMContext(), AddressSpace); } // Add type for inalloca argument. - if (IRFunctionArgs.hasInallocaArg()) { - auto ArgStruct = FI.getArgStruct(); - assert(ArgStruct); - ArgTypes[IRFunctionArgs.getInallocaArgNo()] = ArgStruct->getPointerTo(); - } + if (IRFunctionArgs.hasInallocaArg()) + ArgTypes[IRFunctionArgs.getInallocaArgNo()] = + llvm::PointerType::getUnqual(getLLVMContext()); // Add in all of the required arguments. unsigned ArgNo = 0; @@ -1694,20 +1692,17 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { assert(NumIRArgs == 0); break; - case ABIArgInfo::Indirect: { + case ABIArgInfo::Indirect: assert(NumIRArgs == 1); // indirect arguments are always on the stack, which is alloca addr space. - llvm::Type *LTy = ConvertTypeForMem(it->type); - ArgTypes[FirstIRArg] = LTy->getPointerTo( - CGM.getDataLayout().getAllocaAddrSpace()); + ArgTypes[FirstIRArg] = llvm::PointerType::get( + getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace()); break; - } - case ABIArgInfo::IndirectAliased: { + case ABIArgInfo::IndirectAliased: assert(NumIRArgs == 1); - llvm::Type *LTy = ConvertTypeForMem(it->type); - ArgTypes[FirstIRArg] = LTy->getPointerTo(ArgInfo.getIndirectAddrSpace()); + ArgTypes[FirstIRArg] = llvm::PointerType::get( + getLLVMContext(), ArgInfo.getIndirectAddrSpace()); break; - } case ABIArgInfo::Extend: case ABIArgInfo::Direct: { // Fast-isel and the optimizer generally like scalar values better than @@ -1750,7 +1745,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) { const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); + const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); if (!isFuncTypeConvertible(FPT)) return llvm::StructType::get(getLLVMContext()); @@ -1828,10 +1823,33 @@ static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy, Module.getLangOpts().Sanitize.has(SanitizerKind::Return); } -void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, - bool HasOptnone, - bool AttrOnCallSite, - llvm::AttrBuilder &FuncAttrs) { +/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the +/// requested denormal behavior, accounting for the overriding behavior of the +/// -f32 case. +static void addDenormalModeAttrs(llvm::DenormalMode FPDenormalMode, + llvm::DenormalMode FP32DenormalMode, + llvm::AttrBuilder &FuncAttrs) { + if (FPDenormalMode != llvm::DenormalMode::getDefault()) + FuncAttrs.addAttribute("denormal-fp-math", FPDenormalMode.str()); + + if (FP32DenormalMode != FPDenormalMode && FP32DenormalMode.isValid()) + FuncAttrs.addAttribute("denormal-fp-math-f32", FP32DenormalMode.str()); +} + +/// Add default attributes to a function, which have merge semantics under +/// -mlink-builtin-bitcode and should not simply overwrite any existing +/// attributes in the linked library. +static void +addMergableDefaultFunctionAttributes(const CodeGenOptions &CodeGenOpts, + llvm::AttrBuilder &FuncAttrs) { + addDenormalModeAttrs(CodeGenOpts.FPDenormalMode, CodeGenOpts.FP32DenormalMode, + FuncAttrs); +} + +static void getTrivialDefaultFunctionAttributes( + StringRef Name, bool HasOptnone, const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts, bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. if (!HasOptnone) { if (CodeGenOpts.OptimizeSize) @@ -1873,15 +1891,6 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, if (CodeGenOpts.NullPointerIsValid) FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid); - if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE()) - FuncAttrs.addAttribute("denormal-fp-math", - CodeGenOpts.FPDenormalMode.str()); - if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) { - FuncAttrs.addAttribute( - "denormal-fp-math-f32", - CodeGenOpts.FP32DenormalMode.str()); - } - if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore) FuncAttrs.addAttribute("no-trapping-math", "true"); @@ -1960,7 +1969,7 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, } } - if (getLangOpts().assumeFunctionsAreConvergent()) { + if (LangOpts.assumeFunctionsAreConvergent()) { // Conservatively, mark all functions and calls in CUDA and OpenCL as // convergent (meaning, they may call an intrinsically convergent op, such // as __syncthreads() / barrier(), and so can't have certain optimizations @@ -1970,10 +1979,9 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, } // TODO: NoUnwind attribute should be added for other GPU modes HIP, - // SYCL, OpenMP offload. AFAIK, none of them support exceptions in device - // code. - if ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice) || - getLangOpts().OpenCL) { + // OpenMP offload. AFAIK, neither of them support exceptions in device code. + if ((LangOpts.CUDA && LangOpts.CUDAIsDevice) || LangOpts.OpenCL || + LangOpts.SYCLIsDevice) { FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } @@ -1984,6 +1992,98 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, } } +/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as +/// though we had emitted it ourselves. We remove any attributes on F that +/// conflict with the attributes we add here. +static void mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, const CodeGenOptions CodeGenOpts, + const LangOptions &LangOpts, const TargetOptions &TargetOpts, + bool WillInternalize) { + + llvm::AttrBuilder FuncAttrs(F.getContext()); + // Here we only extract the options that are relevant compared to the version + // from GetCPUAndFeaturesAttributes. + if (!TargetOpts.CPU.empty()) + FuncAttrs.addAttribute("target-cpu", TargetOpts.CPU); + if (!TargetOpts.TuneCPU.empty()) + FuncAttrs.addAttribute("tune-cpu", TargetOpts.TuneCPU); + + ::getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(), + CodeGenOpts, LangOpts, + /*AttrOnCallSite=*/false, FuncAttrs); + + if (!WillInternalize && F.isInterposable()) { + // Do not promote "dynamic" denormal-fp-math to this translation unit's + // setting for weak functions that won't be internalized. The user has no + // real control for how builtin bitcode is linked, so we shouldn't assume + // later copies will use a consistent mode. + F.addFnAttrs(FuncAttrs); + return; + } + + llvm::AttributeMask AttrsToRemove; + + llvm::DenormalMode DenormModeToMerge = F.getDenormalModeRaw(); + llvm::DenormalMode DenormModeToMergeF32 = F.getDenormalModeF32Raw(); + llvm::DenormalMode Merged = + CodeGenOpts.FPDenormalMode.mergeCalleeMode(DenormModeToMerge); + llvm::DenormalMode MergedF32 = CodeGenOpts.FP32DenormalMode; + + if (DenormModeToMergeF32.isValid()) { + MergedF32 = + CodeGenOpts.FP32DenormalMode.mergeCalleeMode(DenormModeToMergeF32); + } + + if (Merged == llvm::DenormalMode::getDefault()) { + AttrsToRemove.addAttribute("denormal-fp-math"); + } else if (Merged != DenormModeToMerge) { + // Overwrite existing attribute + FuncAttrs.addAttribute("denormal-fp-math", + CodeGenOpts.FPDenormalMode.str()); + } + + if (MergedF32 == llvm::DenormalMode::getDefault()) { + AttrsToRemove.addAttribute("denormal-fp-math-f32"); + } else if (MergedF32 != DenormModeToMergeF32) { + // Overwrite existing attribute + FuncAttrs.addAttribute("denormal-fp-math-f32", + CodeGenOpts.FP32DenormalMode.str()); + } + + F.removeFnAttrs(AttrsToRemove); + addDenormalModeAttrs(Merged, MergedF32, FuncAttrs); + F.addFnAttrs(FuncAttrs); +} + +void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, const CodeGenOptions CodeGenOpts, + const LangOptions &LangOpts, const TargetOptions &TargetOpts, + bool WillInternalize) { + + ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts, + TargetOpts, WillInternalize); +} + +void CodeGenModule::getTrivialDefaultFunctionAttributes( + StringRef Name, bool HasOptnone, bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + ::getTrivialDefaultFunctionAttributes(Name, HasOptnone, getCodeGenOpts(), + getLangOpts(), AttrOnCallSite, + FuncAttrs); +} + +void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, + bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, + FuncAttrs); + // If we're just getting the default, get the default values for mergeable + // attributes. + if (!AttrOnCallSite) + addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs); +} + void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { llvm::AttrBuilder FuncAttrs(F.getContext()); getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), @@ -1992,8 +2092,17 @@ void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { F.addFnAttrs(FuncAttrs); } +/// Apply default attributes to \p F, accounting for merge semantics of +/// attributes that should not overwrite existing attributes. +void CodeGenModule::mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, bool WillInternalize) { + ::mergeDefaultFunctionDefinitionAttributes(F, getCodeGenOpts(), getLangOpts(), + getTarget().getTargetOpts(), + WillInternalize); +} + void CodeGenModule::addDefaultFunctionDefinitionAttributes( - llvm::AttrBuilder &attrs) { + llvm::AttrBuilder &attrs) { getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false, /*for call*/ false, attrs); GetCPUAndFeaturesAttributes(GlobalDecl(), attrs); @@ -2105,6 +2214,39 @@ static bool IsArgumentMaybeUndef(const Decl *TargetDecl, return false; } +/// Test if it's legal to apply nofpclass for the given parameter type and it's +/// lowered IR type. +static bool canApplyNoFPClass(const ABIArgInfo &AI, QualType ParamType, + bool IsReturn) { + // Should only apply to FP types in the source, not ABI promoted. + if (!ParamType->hasFloatingRepresentation()) + return false; + + // The promoted-to IR type also needs to support nofpclass. + llvm::Type *IRTy = AI.getCoerceToType(); + if (llvm::AttributeFuncs::isNoFPClassCompatibleType(IRTy)) + return true; + + if (llvm::StructType *ST = dyn_cast<llvm::StructType>(IRTy)) { + return !IsReturn && AI.getCanBeFlattened() && + llvm::all_of(ST->elements(), [](llvm::Type *Ty) { + return llvm::AttributeFuncs::isNoFPClassCompatibleType(Ty); + }); + } + + return false; +} + +/// Return the nofpclass mask that can be applied to floating-point parameters. +static llvm::FPClassTest getNoFPClassTestMask(const LangOptions &LangOpts) { + llvm::FPClassTest Mask = llvm::fcNone; + if (LangOpts.NoHonorInfs) + Mask |= llvm::fcInf; + if (LangOpts.NoHonorNaNs) + Mask |= llvm::fcNan; + return Mask; +} + /// Construct the IR attribute list of a function or call. /// /// When adding an attribute, please consider where it should be handled: @@ -2200,6 +2342,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, FuncAttrs.addAttribute(llvm::Attribute::NoReturn); NBA = Fn->getAttr<NoBuiltinAttr>(); } + } + + if (isa<FunctionDecl>(TargetDecl) || isa<VarDecl>(TargetDecl)) { // Only place nomerge attribute on call sites, never functions. This // allows it to work on indirect virtual function calls. if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>()) @@ -2372,6 +2517,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::Direct: if (RetAI.getInReg()) RetAttrs.addAttribute(llvm::Attribute::InReg); + + if (canApplyNoFPClass(RetAI, RetTy, true)) + RetAttrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts())); + break; case ABIArgInfo::Ignore: break; @@ -2510,8 +2659,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, else if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign())); - break; + if (canApplyNoFPClass(AI, ParamType, false)) + Attrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts())); + break; case ABIArgInfo::Indirect: { if (AI.getInReg()) Attrs.addAttribute(llvm::Attribute::InReg); @@ -2743,13 +2894,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // If we're using inalloca, all the memory arguments are GEPs off of the last // parameter, which is a pointer to the complete memory area. Address ArgStruct = Address::invalid(); - if (IRFunctionArgs.hasInallocaArg()) { + if (IRFunctionArgs.hasInallocaArg()) ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()), FI.getArgStruct(), FI.getArgStructAlignment()); - assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo()); - } - // Name the struct return parameter. if (IRFunctionArgs.hasSRetArg()) { auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo()); @@ -2805,7 +2953,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::IndirectAliased: { assert(NumIRArgs == 1); Address ParamAddr = Address(Fn->getArg(FirstIRArg), ConvertTypeForMem(Ty), - ArgI.getIndirectAlign()); + ArgI.getIndirectAlign(), KnownNonNull); if (!hasScalarEvaluationKind(Ty)) { // Aggregates and complex variables are accessed by reference. All we @@ -2998,7 +3146,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, assert(NumIRArgs == 1); Coerced->setName(Arg->getName() + ".coerce"); ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector( - VecTyTo, Coerced, Zero, "castFixedSve"))); + VecTyTo, Coerced, Zero, "cast.fixed"))); break; } } @@ -3015,30 +3163,51 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType()); if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy && STy->getNumElements() > 1) { - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); - llvm::Type *DstTy = Ptr.getElementType(); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy); + llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy); + llvm::TypeSize PtrElementSize = + CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType()); + if (StructSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(StructSize == PtrElementSize && + "Only allow non-fractional movement of structure with" + "homogeneous scalable vector type"); + assert(STy->getNumElements() == NumIRArgs); - Address AddrToStoreInto = Address::invalid(); - if (SrcSize <= DstSize) { - AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); + llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto *AI = Fn->getArg(FirstIRArg + i); + AI->setName(Arg->getName() + ".coerce" + Twine(i)); + LoadedStructValue = + Builder.CreateInsertValue(LoadedStructValue, AI, i); + } + + Builder.CreateStore(LoadedStructValue, Ptr); } else { - AddrToStoreInto = - CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); - } + uint64_t SrcSize = StructSize.getFixedValue(); + uint64_t DstSize = PtrElementSize.getFixedValue(); - assert(STy->getNumElements() == NumIRArgs); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto AI = Fn->getArg(FirstIRArg + i); - AI->setName(Arg->getName() + ".coerce" + Twine(i)); - Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); - Builder.CreateStore(AI, EltPtr); - } + Address AddrToStoreInto = Address::invalid(); + if (SrcSize <= DstSize) { + AddrToStoreInto = Ptr.withElementType(STy); + } else { + AddrToStoreInto = + CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); + } - if (SrcSize > DstSize) { - Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize); - } + assert(STy->getNumElements() == NumIRArgs); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto AI = Fn->getArg(FirstIRArg + i); + AI->setName(Arg->getName() + ".coerce" + Twine(i)); + Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); + Builder.CreateStore(AI, EltPtr); + } + if (SrcSize > DstSize) { + Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize); + } + } } else { // Simple case, just do a coerced store of the argument into the alloca. assert(NumIRArgs == 1); @@ -3066,7 +3235,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ArgVals.push_back(ParamValue::forIndirect(alloca)); auto coercionType = ArgI.getCoerceAndExpandType(); - alloca = Builder.CreateElementBitCast(alloca, coercionType); + alloca = alloca.withElementType(coercionType); unsigned argIndex = FirstIRArg; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { @@ -3323,8 +3492,9 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) { // single-predecessors chain from the current insertion point. llvm::BasicBlock *StoreBB = store->getParent(); llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock(); + llvm::SmallPtrSet<llvm::BasicBlock *, 4> SeenBBs; while (IP != StoreBB) { - if (!(IP = IP->getSinglePredecessor())) + if (!SeenBBs.insert(IP).second || !(IP = IP->getSinglePredecessor())) return nullptr; } @@ -3667,7 +3837,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, // Load all of the coerced elements out into results. llvm::SmallVector<llvm::Value*, 4> results; - Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType); + Address addr = ReturnValue.withElementType(coercionType); for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { auto coercedEltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) @@ -3793,8 +3963,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, // FIXME: Generate IR in one pass, rather than going back and fixing up these // placeholders. llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty); - llvm::Type *IRPtrTy = IRTy->getPointerTo(); - llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy->getPointerTo()); + llvm::Type *IRPtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext()); + llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy); // FIXME: When we generate this IR in one pass, we shouldn't need // this win32-specific alignment hack. @@ -4762,7 +4932,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // the proper cpu features (and it won't cause code generation issues due to // function based code generation). if (TargetDecl->hasAttr<AlwaysInlineAttr>() && - TargetDecl->hasAttr<TargetAttr>()) + (TargetDecl->hasAttr<TargetAttr>() || + (CurFuncDecl && CurFuncDecl->hasAttr<TargetAttr>()))) checkTargetFeatures(Loc, FD); // Some architectures (such as x86-64) have the ABI changed based on @@ -4771,25 +4942,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), FD, CallArgs); } -#ifndef NDEBUG - if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) { - // For an inalloca varargs function, we don't expect CallInfo to match the - // function pointer's type, because the inalloca struct a will have extra - // fields in it for the varargs parameters. Code later in this function - // bitcasts the function pointer to the type derived from CallInfo. - // - // In other cases, we assert that the types match up (until pointers stop - // having pointee types). - if (Callee.isVirtual()) - assert(IRFuncTy == Callee.getVirtualFunctionType()); - else { - llvm::PointerType *PtrTy = - llvm::cast<llvm::PointerType>(Callee.getFunctionPointer()->getType()); - assert(PtrTy->isOpaqueOrPointeeTypeMatches(IRFuncTy)); - } - } -#endif - // 1. Set up the arguments. // If we're using inalloca, insert the allocation after the stack save. @@ -4911,10 +5063,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Store the RValue into the argument struct. Address Addr = Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); - // There are some cases where a trivial bitcast is not avoidable. The - // definition of a type later in a translation unit may change it's type - // from {}* to (%struct.foo*)*. - Addr = Builder.CreateElementBitCast(Addr, ConvertTypeForMem(I->Ty)); + Addr = Addr.withElementType(ConvertTypeForMem(I->Ty)); I->copyInto(*this, Addr); } break; @@ -5008,9 +5157,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, I->copyInto(*this, AI); } else { // Skip the extra memcpy call. - auto *T = llvm::PointerType::getWithSamePointeeType( - cast<llvm::PointerType>(V->getType()), - CGM.getDataLayout().getAllocaAddrSpace()); + auto *T = llvm::PointerType::get( + CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace()); llvm::Value *Val = getTargetHooks().performAddrSpaceCast( *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, @@ -5110,7 +5258,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateMemCpy(TempAlloca, Src, SrcSize); Src = TempAlloca; } else { - Src = Builder.CreateElementBitCast(Src, STy); + Src = Src.withElementType(STy); } assert(NumIRArgs == STy->getNumElements()); @@ -5174,7 +5322,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateStore(RV.getScalarVal(), addr); } - addr = Builder.CreateElementBitCast(addr, coercionType); + addr = addr.withElementType(coercionType); unsigned IRArgPos = FirstIRArg; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { @@ -5210,35 +5358,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If we're using inalloca, set up that argument. if (ArgMemory.isValid()) { llvm::Value *Arg = ArgMemory.getPointer(); - if (CallInfo.isVariadic()) { - // When passing non-POD arguments by value to variadic functions, we will - // end up with a variadic prototype and an inalloca call site. In such - // cases, we can't do any parameter mismatch checks. Give up and bitcast - // the callee. - unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace(); - CalleePtr = - Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS)); - } else { - llvm::Type *LastParamTy = - IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1); - if (Arg->getType() != LastParamTy) { -#ifndef NDEBUG - // Assert that these structs have equivalent element types. - llvm::StructType *FullTy = CallInfo.getArgStruct(); - if (!LastParamTy->isOpaquePointerTy()) { - llvm::StructType *DeclaredTy = cast<llvm::StructType>( - LastParamTy->getNonOpaquePointerElementType()); - assert(DeclaredTy->getNumElements() == FullTy->getNumElements()); - for (auto DI = DeclaredTy->element_begin(), - DE = DeclaredTy->element_end(), - FI = FullTy->element_begin(); - DI != DE; ++DI, ++FI) - assert(*DI == *FI); - } -#endif - Arg = Builder.CreateBitCast(Arg, LastParamTy); - } - } assert(IRFunctionArgs.hasInallocaArg()); IRCallArgs[IRFunctionArgs.getInallocaArgNo()] = Arg; } @@ -5558,8 +5677,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - Address addr = SRetPtr; - addr = Builder.CreateElementBitCast(addr, coercionType); + Address addr = SRetPtr.withElementType(coercionType); assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType()); bool requiresExtract = isa<llvm::StructType>(CI->getType()); @@ -5576,7 +5694,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(unpaddedIndex == 0); Builder.CreateStore(elt, eltAddr); } - // FALLTHROUGH [[fallthrough]]; } @@ -5626,6 +5743,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm_unreachable("bad evaluation kind"); } + // If coercing a fixed vector from a scalable vector for ABI + // compatibility, and the types match, use the llvm.vector.extract + // intrinsic to perform the conversion. + if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(RetIRTy)) { + llvm::Value *V = CI; + if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(V->getType())) { + if (FixedDst->getElementType() == ScalableSrc->getElementType()) { + llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); + V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed"); + return RValue::get(V); + } + } + } + Address DestPtr = ReturnValue.getValue(); bool DestIsVolatile = ReturnValue.isVolatile(); diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 59c3f304f59b..eaaf10c4eec6 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -30,6 +30,7 @@ class Value; namespace clang { class Decl; class FunctionDecl; +class TargetOptions; class VarDecl; namespace CodeGen { @@ -108,9 +109,6 @@ public: AbstractInfo = abstractInfo; assert(functionPtr && "configuring callee without function pointer"); assert(functionPtr->getType()->isPointerTy()); - assert(functionPtr->getType()->isOpaquePointerTy() || - functionPtr->getType()->getNonOpaquePointerElementType() - ->isFunctionTy()); } static CGCallee forBuiltin(unsigned builtinID, @@ -377,6 +375,14 @@ public: bool isExternallyDestructed() const { return IsExternallyDestructed; } }; +/// Helper to add attributes to \p F according to the CodeGenOptions and +/// LangOptions without requiring a CodeGenModule to be constructed. +void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, + const CodeGenOptions CodeGenOpts, + const LangOptions &LangOpts, + const TargetOptions &TargetOpts, + bool WillInternalize); + } // end namespace CodeGen } // end namespace clang diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 0795ea598411..93e7b54fca04 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -139,7 +139,7 @@ Address CodeGenFunction::LoadCXXThisAddress() { } llvm::Type *Ty = ConvertType(MD->getThisType()->getPointeeType()); - return Address(LoadCXXThis(), Ty, CXXThisAlignment); + return Address(LoadCXXThis(), Ty, CXXThisAlignment, KnownNonNull); } /// Emit the address of a field using a member data pointer. @@ -236,12 +236,10 @@ CodeGenFunction::GetAddressOfDirectBaseInCompleteClass(Address This, // TODO: for complete types, this should be possible with a GEP. Address V = This; if (!Offset.isZero()) { - V = Builder.CreateElementBitCast(V, Int8Ty); + V = V.withElementType(Int8Ty); V = Builder.CreateConstInBoundsByteGEP(V, Offset); } - V = Builder.CreateElementBitCast(V, ConvertType(Base)); - - return V; + return V.withElementType(ConvertType(Base)); } static Address @@ -272,8 +270,6 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr, // Apply the base offset. llvm::Value *ptr = addr.getPointer(); - unsigned AddrSpace = ptr->getType()->getPointerAddressSpace(); - ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8Ty->getPointerTo(AddrSpace)); ptr = CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, ptr, baseOffset, "add.ptr"); // If we have a virtual component, the alignment of the result will @@ -329,8 +325,8 @@ Address CodeGenFunction::GetAddressOfBaseClass( // Get the base pointer type. llvm::Type *BaseValueTy = ConvertType((PathEnd[-1])->getType()); - llvm::Type *BasePtrTy = - BaseValueTy->getPointerTo(Value.getType()->getPointerAddressSpace()); + llvm::Type *PtrTy = llvm::PointerType::get( + CGM.getLLVMContext(), Value.getType()->getPointerAddressSpace()); QualType DerivedTy = getContext().getRecordType(Derived); CharUnits DerivedAlign = CGM.getClassPointerAlignment(Derived); @@ -344,7 +340,7 @@ Address CodeGenFunction::GetAddressOfBaseClass( EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(), DerivedTy, DerivedAlign, SkippedChecks); } - return Builder.CreateElementBitCast(Value, BaseValueTy); + return Value.withElementType(BaseValueTy); } llvm::BasicBlock *origBB = nullptr; @@ -381,7 +377,7 @@ Address CodeGenFunction::GetAddressOfBaseClass( VirtualOffset, Derived, VBase); // Cast to the destination type. - Value = Builder.CreateElementBitCast(Value, BaseValueTy); + Value = Value.withElementType(BaseValueTy); // Build a phi if we needed a null check. if (NullCheckValue) { @@ -389,10 +385,10 @@ Address CodeGenFunction::GetAddressOfBaseClass( Builder.CreateBr(endBB); EmitBlock(endBB); - llvm::PHINode *PHI = Builder.CreatePHI(BasePtrTy, 2, "cast.result"); + llvm::PHINode *PHI = Builder.CreatePHI(PtrTy, 2, "cast.result"); PHI->addIncoming(Value.getPointer(), notNullBB); - PHI->addIncoming(llvm::Constant::getNullValue(BasePtrTy), origBB); - Value = Value.withPointer(PHI); + PHI->addIncoming(llvm::Constant::getNullValue(PtrTy), origBB); + Value = Value.withPointer(PHI, NotKnownNonNull); } return Value; @@ -410,14 +406,15 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr, getContext().getCanonicalType(getContext().getTagDeclType(Derived)); unsigned AddrSpace = BaseAddr.getAddressSpace(); llvm::Type *DerivedValueTy = ConvertType(DerivedTy); - llvm::Type *DerivedPtrTy = DerivedValueTy->getPointerTo(AddrSpace); + llvm::Type *DerivedPtrTy = + llvm::PointerType::get(getLLVMContext(), AddrSpace); llvm::Value *NonVirtualOffset = CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd); if (!NonVirtualOffset) { // No offset, we can just cast back. - return Builder.CreateElementBitCast(BaseAddr, DerivedValueTy); + return BaseAddr.withElementType(DerivedValueTy); } llvm::BasicBlock *CastNull = nullptr; @@ -998,8 +995,8 @@ namespace { private: void emitMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) { - DestPtr = CGF.Builder.CreateElementBitCast(DestPtr, CGF.Int8Ty); - SrcPtr = CGF.Builder.CreateElementBitCast(SrcPtr, CGF.Int8Ty); + DestPtr = DestPtr.withElementType(CGF.Int8Ty); + SrcPtr = SrcPtr.withElementType(CGF.Int8Ty); CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity()); } @@ -2132,8 +2129,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, if (SlotAS != ThisAS) { unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS); - llvm::Type *NewType = llvm::PointerType::getWithSamePointeeType( - This.getType(), TargetThisAS); + llvm::Type *NewType = + llvm::PointerType::get(getLLVMContext(), TargetThisAS); ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(), ThisAS, SlotAS, NewType); } @@ -2579,18 +2576,13 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { // Finally, store the address point. Use the same LLVM types as the field to // support optimization. unsigned GlobalsAS = CGM.getDataLayout().getDefaultGlobalsAddressSpace(); - unsigned ProgAS = CGM.getDataLayout().getProgramAddressSpace(); - llvm::Type *VTablePtrTy = - llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true) - ->getPointerTo(ProgAS) - ->getPointerTo(GlobalsAS); + llvm::Type *PtrTy = llvm::PointerType::get(CGM.getLLVMContext(), GlobalsAS); // vtable field is derived from `this` pointer, therefore they should be in // the same addr space. Note that this might not be LLVM address space 0. - VTableField = Builder.CreateElementBitCast(VTableField, VTablePtrTy); - VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); + VTableField = VTableField.withElementType(PtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); - TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTablePtrTy); + TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(PtrTy); CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); if (CGM.getCodeGenOpts().OptimizationLevel > 0 && CGM.getCodeGenOpts().StrictVTablePointers) @@ -2683,7 +2675,7 @@ void CodeGenFunction::InitializeVTablePointers(const CXXRecordDecl *RD) { llvm::Value *CodeGenFunction::GetVTablePtr(Address This, llvm::Type *VTableTy, const CXXRecordDecl *RD) { - Address VTablePtrSrc = Builder.CreateElementBitCast(This, VTableTy); + Address VTablePtrSrc = This.withElementType(VTableTy); llvm::Instruction *VTable = Builder.CreateLoad(VTablePtrSrc, "vtable"); TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTableTy); CGM.DecorateInstructionWithTBAA(VTable, TBAAInfo); diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 43758ac27e43..0bbab283603d 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -782,7 +782,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { if (!RequiresNormalCleanup) { // Mark CPP scope end for passed-by-value Arg temp // per Windows ABI which is "normally" Cleanup in callee - if (IsEHa && getInvokeDest()) { + if (IsEHa && getInvokeDest() && Builder.GetInsertBlock()) { if (Personality.isMSVCXXPersonality()) EmitSehCppScopeEnd(); } @@ -836,7 +836,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { EmitBlock(NormalEntry); // intercept normal cleanup to mark SEH scope end - if (IsEHa) { + if (IsEHa && getInvokeDest()) { if (Personality.isMSVCXXPersonality()) EmitSehCppScopeEnd(); else @@ -1031,6 +1031,8 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { if (!Personality.isMSVCPersonality()) { EHStack.pushTerminate(); PushedTerminate = true; + } else if (IsEHa && getInvokeDest()) { + EmitSehCppScopeEnd(); } // We only actually emit the cleanup code if the cleanup is either diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 775a4341558a..8437cda79beb 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -198,7 +198,9 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); + CGF.CurCoro.InSuspendBlock = true; auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); + CGF.CurCoro.InSuspendBlock = false; if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. BasicBlock *RealSuspendBlock = @@ -465,6 +467,123 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup { }; } +namespace { +struct GetReturnObjectManager { + CodeGenFunction &CGF; + CGBuilderTy &Builder; + const CoroutineBodyStmt &S; + // When true, performs RVO for the return object. + bool DirectEmit = false; + + Address GroActiveFlag; + CodeGenFunction::AutoVarEmission GroEmission; + + GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) + : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) { + // The call to get_Âreturn_Âobject is sequenced before the call to + // initial_Âsuspend and is invoked at most once, but there are caveats + // regarding on whether the prvalue result object may be initialized + // directly/eager or delayed, depending on the types involved. + // + // More info at https://github.com/cplusplus/papers/issues/1414 + // + // The general cases: + // 1. Same type of get_return_object and coroutine return type (direct + // emission): + // - Constructed in the return slot. + // 2. Different types (delayed emission): + // - Constructed temporary object prior to initial suspend initialized with + // a call to get_return_object() + // - When coroutine needs to to return to the caller and needs to construct + // return value for the coroutine it is initialized with expiring value of + // the temporary obtained above. + // + // Direct emission for void returning coroutines or GROs. + DirectEmit = [&]() { + auto *RVI = S.getReturnValueInit(); + assert(RVI && "expected RVI"); + auto GroType = RVI->getType(); + return CGF.getContext().hasSameType(GroType, CGF.FnRetTy); + }(); + } + + // The gro variable has to outlive coroutine frame and coroutine promise, but, + // it can only be initialized after coroutine promise was created, thus, we + // split its emission in two parts. EmitGroAlloca emits an alloca and sets up + // cleanups. Later when coroutine promise is available we initialize the gro + // and sets the flag that the cleanup is now active. + void EmitGroAlloca() { + if (DirectEmit) + return; + + auto *GroDeclStmt = dyn_cast_or_null<DeclStmt>(S.getResultDecl()); + if (!GroDeclStmt) { + // If get_return_object returns void, no need to do an alloca. + return; + } + + auto *GroVarDecl = cast<VarDecl>(GroDeclStmt->getSingleDecl()); + + // Set GRO flag that it is not initialized yet + GroActiveFlag = CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), + "gro.active"); + Builder.CreateStore(Builder.getFalse(), GroActiveFlag); + + GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + + // Remember the top of EHStack before emitting the cleanup. + auto old_top = CGF.EHStack.stable_begin(); + CGF.EmitAutoVarCleanups(GroEmission); + auto top = CGF.EHStack.stable_begin(); + + // Make the cleanup conditional on gro.active + for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top); b != e; + b++) { + if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*b)) { + assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?"); + Cleanup->setActiveFlag(GroActiveFlag); + Cleanup->setTestFlagInEHCleanup(); + Cleanup->setTestFlagInNormalCleanup(); + } + } + } + + void EmitGroInit() { + if (DirectEmit) { + // ReturnValue should be valid as long as the coroutine's return type + // is not void. The assertion could help us to reduce the check later. + assert(CGF.ReturnValue.isValid() == (bool)S.getReturnStmt()); + // Now we have the promise, initialize the GRO. + // We need to emit `get_return_object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_return_Âobject is sequenced before the call to + // initial_suspend and is invoked at most once. + // + // So we couldn't emit return value when we emit return statment, + // otherwise the call to get_return_object wouldn't be in front + // of initial_suspend. + if (CGF.ReturnValue.isValid()) { + CGF.EmitAnyExprToMem(S.getReturnValue(), CGF.ReturnValue, + S.getReturnValue()->getType().getQualifiers(), + /*IsInit*/ true); + } + return; + } + + if (!GroActiveFlag.isValid()) { + // No Gro variable was allocated. Simply emit the call to + // get_return_object. + CGF.EmitStmt(S.getResultDecl()); + return; + } + + CGF.EmitAutoVarInit(GroEmission); + Builder.CreateStore(Builder.getTrue(), GroActiveFlag); + } +}; +} // namespace + static void emitBodyAndFallthrough(CodeGenFunction &CGF, const CoroutineBodyStmt &S, Stmt *Body) { CGF.EmitStmt(Body); @@ -511,6 +630,8 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // See if allocation was successful. auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); auto *Cond = Builder.CreateICmpNE(AllocateCall, NullPtr); + // Expect the allocation to be successful. + emitCondLikelihoodViaExpectIntrinsic(Cond, Stmt::LH_Likely); Builder.CreateCondBr(Cond, InitBB, RetOnFailureBB); // If not, return OnAllocFailure object. @@ -531,6 +652,9 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; + GetReturnObjectManager GroManager(*this, S); + GroManager.EmitGroAlloca(); + CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); { CGDebugInfo *DI = getDebugInfo(); @@ -568,23 +692,8 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // promise local variable was not emitted yet. CoroId->setArgOperand(1, PromiseAddrVoidPtr); - // ReturnValue should be valid as long as the coroutine's return type - // is not void. The assertion could help us to reduce the check later. - assert(ReturnValue.isValid() == (bool)S.getReturnStmt()); - // Now we have the promise, initialize the GRO. - // We need to emit `get_return_object` first. According to: - // [dcl.fct.def.coroutine]p7 - // The call to get_return_Âobject is sequenced before the call to - // initial_suspend and is invoked at most once. - // - // So we couldn't emit return value when we emit return statment, - // otherwise the call to get_return_object wouldn't be in front - // of initial_suspend. - if (ReturnValue.isValid()) { - EmitAnyExprToMem(S.getReturnValue(), ReturnValue, - S.getReturnValue()->getType().getQualifiers(), - /*IsInit*/ true); - } + // Now we have the promise, initialize the GRO + GroManager.EmitGroInit(); EHStack.pushCleanup<CallCoroEnd>(EHCleanup); @@ -650,7 +759,8 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { if (Stmt *Ret = S.getReturnStmt()) { // Since we already emitted the return value above, so we shouldn't // emit it again here. - cast<ReturnStmt>(Ret)->setRetValue(nullptr); + if (GroManager.DirectEmit) + cast<ReturnStmt>(Ret)->setRetValue(nullptr); EmitStmt(Ret); } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 3bde43cc1db3..f049a682cfed 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -18,6 +18,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclFriend.h" @@ -72,8 +73,6 @@ CGDebugInfo::CGDebugInfo(CodeGenModule &CGM) : CGM(CGM), DebugKind(CGM.getCodeGenOpts().getDebugInfo()), DebugTypeExtRefs(CGM.getCodeGenOpts().DebugTypeExtRefs), DBuilder(CGM.getModule()) { - for (const auto &KV : CGM.getCodeGenOpts().DebugPrefixMap) - DebugPrefixMap[KV.first] = KV.second; CreateCompileUnit(); } @@ -469,12 +468,9 @@ llvm::DIFile *CGDebugInfo::createFile( } std::string CGDebugInfo::remapDIPath(StringRef Path) const { - if (DebugPrefixMap.empty()) - return Path.str(); - SmallString<256> P = Path; - for (const auto &Entry : DebugPrefixMap) - if (llvm::sys::path::replace_path_prefix(P, Entry.first, Entry.second)) + for (auto &[From, To] : llvm::reverse(CGM.getCodeGenOpts().DebugPrefixMap)) + if (llvm::sys::path::replace_path_prefix(P, From, To)) break; return P.str().str(); } @@ -527,6 +523,7 @@ void CGDebugInfo::CreateCompileUnit() { // Get absolute path name. SourceManager &SM = CGM.getContext().getSourceManager(); auto &CGO = CGM.getCodeGenOpts(); + const LangOptions &LO = CGM.getLangOpts(); std::string MainFileName = CGO.MainFileName; if (MainFileName.empty()) MainFileName = "<stdin>"; @@ -541,9 +538,15 @@ void CGDebugInfo::CreateCompileUnit() { MainFileDir = std::string(MainFile->getDir().getName()); if (!llvm::sys::path::is_absolute(MainFileName)) { llvm::SmallString<1024> MainFileDirSS(MainFileDir); - llvm::sys::path::append(MainFileDirSS, MainFileName); - MainFileName = - std::string(llvm::sys::path::remove_leading_dotslash(MainFileDirSS)); + llvm::sys::path::Style Style = + LO.UseTargetPathSeparator + ? (CGM.getTarget().getTriple().isOSWindows() + ? llvm::sys::path::Style::windows_backslash + : llvm::sys::path::Style::posix) + : llvm::sys::path::Style::native; + llvm::sys::path::append(MainFileDirSS, Style, MainFileName); + MainFileName = std::string( + llvm::sys::path::remove_leading_dotslash(MainFileDirSS, Style)); } // If the main file name provided is identical to the input file name, and // if the input file is a preprocessed source, use the module name for @@ -559,7 +562,6 @@ void CGDebugInfo::CreateCompileUnit() { } llvm::dwarf::SourceLanguage LangTag; - const LangOptions &LO = CGM.getLangOpts(); if (LO.CPlusPlus) { if (LO.ObjC) LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus; @@ -595,20 +597,20 @@ void CGDebugInfo::CreateCompileUnit() { llvm::DICompileUnit::DebugEmissionKind EmissionKind; switch (DebugKind) { - case codegenoptions::NoDebugInfo: - case codegenoptions::LocTrackingOnly: + case llvm::codegenoptions::NoDebugInfo: + case llvm::codegenoptions::LocTrackingOnly: EmissionKind = llvm::DICompileUnit::NoDebug; break; - case codegenoptions::DebugLineTablesOnly: + case llvm::codegenoptions::DebugLineTablesOnly: EmissionKind = llvm::DICompileUnit::LineTablesOnly; break; - case codegenoptions::DebugDirectivesOnly: + case llvm::codegenoptions::DebugDirectivesOnly: EmissionKind = llvm::DICompileUnit::DebugDirectivesOnly; break; - case codegenoptions::DebugInfoConstructor: - case codegenoptions::LimitedDebugInfo: - case codegenoptions::FullDebugInfo: - case codegenoptions::UnusedTypeInfo: + case llvm::codegenoptions::DebugInfoConstructor: + case llvm::codegenoptions::LimitedDebugInfo: + case llvm::codegenoptions::FullDebugInfo: + case llvm::codegenoptions::UnusedTypeInfo: EmissionKind = llvm::DICompileUnit::FullDebug; break; } @@ -635,17 +637,21 @@ void CGDebugInfo::CreateCompileUnit() { SDK = *It; } + llvm::DICompileUnit::DebugNameTableKind NameTableKind = + static_cast<llvm::DICompileUnit::DebugNameTableKind>( + CGOpts.DebugNameTable); + if (CGM.getTarget().getTriple().isNVPTX()) + NameTableKind = llvm::DICompileUnit::DebugNameTableKind::None; + else if (CGM.getTarget().getTriple().getVendor() == llvm::Triple::Apple) + NameTableKind = llvm::DICompileUnit::DebugNameTableKind::Apple; + // Create new compile unit. TheCU = DBuilder.createCompileUnit( LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "", LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO, CGOpts.DwarfDebugFlags, RuntimeVers, CGOpts.SplitDwarfFile, EmissionKind, DwoId, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, - CGM.getTarget().getTriple().isNVPTX() - ? llvm::DICompileUnit::DebugNameTableKind::None - : static_cast<llvm::DICompileUnit::DebugNameTableKind>( - CGOpts.DebugNameTable), - CGOpts.DebugRangesBaseAddress, remapDIPath(Sysroot), SDK); + NameTableKind, CGOpts.DebugRangesBaseAddress, remapDIPath(Sysroot), SDK); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -727,24 +733,41 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { #include "clang/Basic/AArch64SVEACLETypes.def" { ASTContext::BuiltinVectorTypeInfo Info = - CGM.getContext().getBuiltinVectorTypeInfo(BT); - unsigned NumElemsPerVG = (Info.EC.getKnownMinValue() * Info.NumVectors) / 2; + // For svcount_t, only the lower 2 bytes are relevant. + BT->getKind() == BuiltinType::SveCount + ? ASTContext::BuiltinVectorTypeInfo( + CGM.getContext().BoolTy, llvm::ElementCount::getFixed(16), + 1) + : CGM.getContext().getBuiltinVectorTypeInfo(BT); + + // A single vector of bytes may not suffice as the representation of + // svcount_t tuples because of the gap between the active 16bits of + // successive tuple members. Currently no such tuples are defined for + // svcount_t, so assert that NumVectors is 1. + assert((BT->getKind() != BuiltinType::SveCount || Info.NumVectors == 1) && + "Unsupported number of vectors for svcount_t"); // Debuggers can't extract 1bit from a vector, so will display a - // bitpattern for svbool_t instead. + // bitpattern for predicates instead. + unsigned NumElems = Info.EC.getKnownMinValue() * Info.NumVectors; if (Info.ElementType == CGM.getContext().BoolTy) { - NumElemsPerVG /= 8; + NumElems /= 8; Info.ElementType = CGM.getContext().UnsignedCharTy; } - auto *LowerBound = - llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( - llvm::Type::getInt64Ty(CGM.getLLVMContext()), 0)); - SmallVector<uint64_t, 9> Expr( - {llvm::dwarf::DW_OP_constu, NumElemsPerVG, llvm::dwarf::DW_OP_bregx, - /* AArch64::VG */ 46, 0, llvm::dwarf::DW_OP_mul, - llvm::dwarf::DW_OP_constu, 1, llvm::dwarf::DW_OP_minus}); - auto *UpperBound = DBuilder.createExpression(Expr); + llvm::Metadata *LowerBound, *UpperBound; + LowerBound = llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), 0)); + if (Info.EC.isScalable()) { + unsigned NumElemsPerVG = NumElems / 2; + SmallVector<uint64_t, 9> Expr( + {llvm::dwarf::DW_OP_constu, NumElemsPerVG, llvm::dwarf::DW_OP_bregx, + /* AArch64::VG */ 46, 0, llvm::dwarf::DW_OP_mul, + llvm::dwarf::DW_OP_constu, 1, llvm::dwarf::DW_OP_minus}); + UpperBound = DBuilder.createExpression(Expr); + } else + UpperBound = llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), NumElems - 1)); llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange( /*count*/ nullptr, LowerBound, UpperBound, /*stride*/ nullptr); @@ -817,6 +840,17 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return DBuilder.createVectorType(/*Size=*/0, Align, ElemTy, SubscriptArray); } + +#define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS) \ + case BuiltinType::Id: { \ + if (!SingletonId) \ + SingletonId = \ + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, \ + MangledName, TheCU, TheCU->getFile(), 0); \ + return SingletonId; \ + } +#include "clang/Basic/WebAssemblyReferenceTypes.def" + case BuiltinType::UChar: case BuiltinType::Char_U: Encoding = llvm::dwarf::DW_ATE_unsigned_char; @@ -1455,9 +1489,9 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, return F; } -llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, - llvm::DIScope *RecordTy, - const RecordDecl *RD) { +llvm::DIDerivedType * +CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, + llvm::DIScope *RecordTy, const RecordDecl *RD) { StringRef Name = BitFieldDecl->getName(); QualType Ty = BitFieldDecl->getType(); SourceLocation Loc = BitFieldDecl->getLocation(); @@ -1488,6 +1522,78 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, Flags, DebugType, Annotations); } +llvm::DIDerivedType *CGDebugInfo::createBitFieldSeparatorIfNeeded( + const FieldDecl *BitFieldDecl, const llvm::DIDerivedType *BitFieldDI, + llvm::ArrayRef<llvm::Metadata *> PreviousFieldsDI, const RecordDecl *RD) { + + if (!CGM.getTargetCodeGenInfo().shouldEmitDWARFBitFieldSeparators()) + return nullptr; + + /* + Add a *single* zero-bitfield separator between two non-zero bitfields + separated by one or more zero-bitfields. This is used to distinguish between + structures such the ones below, where the memory layout is the same, but how + the ABI assigns fields to registers differs. + + struct foo { + int space[4]; + char a : 8; // on amdgpu, passed on v4 + char b : 8; + char x : 8; + char y : 8; + }; + struct bar { + int space[4]; + char a : 8; // on amdgpu, passed on v4 + char b : 8; + char : 0; + char x : 8; // passed on v5 + char y : 8; + }; + */ + if (PreviousFieldsDI.empty()) + return nullptr; + + // If we already emitted metadata for a 0-length bitfield, nothing to do here. + auto *PreviousMDEntry = + PreviousFieldsDI.empty() ? nullptr : PreviousFieldsDI.back(); + auto *PreviousMDField = + dyn_cast_or_null<llvm::DIDerivedType>(PreviousMDEntry); + if (!PreviousMDField || !PreviousMDField->isBitField() || + PreviousMDField->getSizeInBits() == 0) + return nullptr; + + auto PreviousBitfield = RD->field_begin(); + std::advance(PreviousBitfield, BitFieldDecl->getFieldIndex() - 1); + + assert(PreviousBitfield->isBitField()); + + ASTContext &Context = CGM.getContext(); + if (!PreviousBitfield->isZeroLengthBitField(Context)) + return nullptr; + + QualType Ty = PreviousBitfield->getType(); + SourceLocation Loc = PreviousBitfield->getLocation(); + llvm::DIFile *VUnit = getOrCreateFile(Loc); + llvm::DIType *DebugType = getOrCreateType(Ty, VUnit); + llvm::DIScope *RecordTy = BitFieldDI->getScope(); + + llvm::DIFile *File = getOrCreateFile(Loc); + unsigned Line = getLineNumber(Loc); + + uint64_t StorageOffsetInBits = + cast<llvm::ConstantInt>(BitFieldDI->getStorageOffsetInBits()) + ->getZExtValue(); + + llvm::DINode::DIFlags Flags = + getAccessFlag(PreviousBitfield->getAccess(), RD); + llvm::DINodeArray Annotations = + CollectBTFDeclTagAnnotations(*PreviousBitfield); + return DBuilder.createBitFieldMemberType( + RecordTy, "", File, Line, 0, StorageOffsetInBits, StorageOffsetInBits, + Flags, DebugType, Annotations); +} + llvm::DIType *CGDebugInfo::createFieldType( StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, @@ -1596,7 +1702,11 @@ void CGDebugInfo::CollectRecordNormalField( llvm::DIType *FieldType; if (field->isBitField()) { - FieldType = createBitFieldType(field, RecordTy, RD); + llvm::DIDerivedType *BitFieldType; + FieldType = BitFieldType = createBitFieldType(field, RecordTy, RD); + if (llvm::DIType *Separator = + createBitFieldSeparatorIfNeeded(field, BitFieldType, elements, RD)) + elements.push_back(Separator); } else { auto Align = getDeclAlignIfRequired(field, CGM.getContext()); llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(field); @@ -1835,27 +1945,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( ContainingType = RecordTy; } - // We're checking for deleted C++ special member functions - // [Ctors,Dtors, Copy/Move] - auto checkAttrDeleted = [&](const auto *Method) { - if (Method->getCanonicalDecl()->isDeleted()) - SPFlags |= llvm::DISubprogram::SPFlagDeleted; - }; - - switch (Method->getKind()) { - - case Decl::CXXConstructor: - case Decl::CXXDestructor: - checkAttrDeleted(Method); - break; - case Decl::CXXMethod: - if (Method->isCopyAssignmentOperator() || - Method->isMoveAssignmentOperator()) - checkAttrDeleted(Method); - break; - default: - break; - } + if (Method->getCanonicalDecl()->isDeleted()) + SPFlags |= llvm::DISubprogram::SPFlagDeleted; if (Method->isNoReturn()) Flags |= llvm::DINode::FlagNoReturn; @@ -1885,7 +1976,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // In this debug mode, emit type info for a class when its constructor type // info is emitted. - if (DebugKind == codegenoptions::DebugInfoConstructor) + if (DebugKind == llvm::codegenoptions::DebugInfoConstructor) if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(Method)) completeUnusedClass(*CD->getParent()); @@ -2010,15 +2101,10 @@ CGDebugInfo::CollectTemplateParams(std::optional<TemplateArgs> OArgs, for (unsigned i = 0, e = Args.Args.size(); i != e; ++i) { const TemplateArgument &TA = Args.Args[i]; StringRef Name; - bool defaultParameter = false; - if (Args.TList) { + const bool defaultParameter = TA.getIsDefaulted(); + if (Args.TList) Name = Args.TList->getParam(i)->getName(); - NamedDecl const *ND = Args.TList->getParam(i); - defaultParameter = clang::isSubstitutedDefaultArgument( - CGM.getContext(), TA, ND, Args.Args, Args.TList->getDepth()); - } - switch (TA.getKind()) { case TemplateArgument::Type: { llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit); @@ -2362,7 +2448,7 @@ void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI, QualType AllocatedTy, SourceLocation Loc) { if (CGM.getCodeGenOpts().getDebugInfo() <= - codegenoptions::DebugLineTablesOnly) + llvm::codegenoptions::DebugLineTablesOnly) return; llvm::MDNode *node; if (AllocatedTy->isVoidType()) @@ -2374,7 +2460,7 @@ void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI, } void CGDebugInfo::completeType(const EnumDecl *ED) { - if (DebugKind <= codegenoptions::DebugLineTablesOnly) + if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getEnumType(ED); void *TyPtr = Ty.getAsOpaquePtr(); @@ -2387,7 +2473,7 @@ void CGDebugInfo::completeType(const EnumDecl *ED) { } void CGDebugInfo::completeType(const RecordDecl *RD) { - if (DebugKind > codegenoptions::LimitedDebugInfo || + if (DebugKind > llvm::codegenoptions::LimitedDebugInfo || !CGM.getLangOpts().CPlusPlus) completeRequiredType(RD); } @@ -2449,14 +2535,18 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) { } void CGDebugInfo::completeClass(const RecordDecl *RD) { - if (DebugKind <= codegenoptions::DebugLineTablesOnly) + if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getRecordType(RD); void *TyPtr = Ty.getAsOpaquePtr(); auto I = TypeCache.find(TyPtr); if (I != TypeCache.end() && !cast<llvm::DIType>(I->second)->isForwardDecl()) return; - llvm::DIType *Res = CreateTypeDefinition(Ty->castAs<RecordType>()); + + // We want the canonical definition of the structure to not + // be the typedef. Since that would lead to circular typedef + // metadata. + auto [Res, PrefRes] = CreateTypeDefinition(Ty->castAs<RecordType>()); assert(!Res->isForwardDecl()); TypeCache[TyPtr].reset(Res); } @@ -2483,12 +2573,21 @@ static bool canUseCtorHoming(const CXXRecordDecl *RD) { if (isClassOrMethodDLLImport(RD)) return false; - return !RD->isLambda() && !RD->isAggregate() && - !RD->hasTrivialDefaultConstructor() && - !RD->hasConstexprNonCopyMoveConstructor(); + if (RD->isLambda() || RD->isAggregate() || + RD->hasTrivialDefaultConstructor() || + RD->hasConstexprNonCopyMoveConstructor()) + return false; + + for (const CXXConstructorDecl *Ctor : RD->ctors()) { + if (Ctor->isCopyOrMoveConstructor()) + continue; + if (!Ctor->isDeleted()) + return true; + } + return false; } -static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, +static bool shouldOmitDefinition(llvm::codegenoptions::DebugInfoKind DebugKind, bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) @@ -2501,10 +2600,10 @@ static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, // Only emit forward declarations in line tables only to keep debug info size // small. This only applies to CodeView, since we don't emit types in DWARF // line tables only. - if (DebugKind == codegenoptions::DebugLineTablesOnly) + if (DebugKind == llvm::codegenoptions::DebugLineTablesOnly) return true; - if (DebugKind > codegenoptions::LimitedDebugInfo || + if (DebugKind > llvm::codegenoptions::LimitedDebugInfo || RD->hasAttr<StandaloneDebugAttr>()) return false; @@ -2540,7 +2639,7 @@ static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, // In constructor homing mode, only emit complete debug info for a class // when its constructor is emitted. - if ((DebugKind == codegenoptions::DebugInfoConstructor) && + if ((DebugKind == llvm::codegenoptions::DebugInfoConstructor) && canUseCtorHoming(CXXDecl)) return true; @@ -2567,10 +2666,25 @@ llvm::DIType *CGDebugInfo::CreateType(const RecordType *Ty) { return T; } - return CreateTypeDefinition(Ty); + auto [Def, Pref] = CreateTypeDefinition(Ty); + + return Pref ? Pref : Def; +} + +llvm::DIType *CGDebugInfo::GetPreferredNameType(const CXXRecordDecl *RD, + llvm::DIFile *Unit) { + if (!RD) + return nullptr; + + auto const *PNA = RD->getAttr<PreferredNameAttr>(); + if (!PNA) + return nullptr; + + return getOrCreateType(PNA->getTypedefType(), Unit); } -llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { +std::pair<llvm::DIType *, llvm::DIType *> +CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { RecordDecl *RD = Ty->getDecl(); // Get overall information about the record type for the debug info. @@ -2586,7 +2700,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { const RecordDecl *D = RD->getDefinition(); if (!D || !D->isCompleteDefinition()) - return FwdDecl; + return {FwdDecl, nullptr}; if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) CollectContainingType(CXXDecl, FwdDecl); @@ -2625,7 +2739,12 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) { llvm::MDNode::replaceWithPermanent(llvm::TempDICompositeType(FwdDecl)); RegionMap[Ty->getDecl()].reset(FwdDecl); - return FwdDecl; + + if (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB) + if (auto *PrefDI = GetPreferredNameType(CXXDecl, DefUnit)) + return {FwdDecl, PrefDI}; + + return {FwdDecl, nullptr}; } llvm::DIType *CGDebugInfo::CreateType(const ObjCObjectType *Ty, @@ -3173,7 +3292,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, Flags); const FunctionProtoType *FPT = - Ty->getPointeeType()->getAs<FunctionProtoType>(); + Ty->getPointeeType()->castAs<FunctionProtoType>(); return DBuilder.createMemberPointerType( getOrCreateInstanceMethodType( CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()), @@ -3368,7 +3487,8 @@ void CGDebugInfo::completeTemplateDefinition( } void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { - if (DebugKind <= codegenoptions::DebugLineTablesOnly || D.isDynamicClass()) + if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly || + D.isDynamicClass()) return; completeClassData(&D); @@ -3653,7 +3773,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { void CGDebugInfo::CollectContainingType(const CXXRecordDecl *RD, llvm::DICompositeType *RealDecl) { // A class's primary base or the class itself contains the vtable. - llvm::DICompositeType *ContainingType = nullptr; + llvm::DIType *ContainingType = nullptr; const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD); if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) { // Seek non-virtual primary base root. @@ -3665,9 +3785,8 @@ void CGDebugInfo::CollectContainingType(const CXXRecordDecl *RD, else break; } - ContainingType = cast<llvm::DICompositeType>( - getOrCreateType(QualType(PBase->getTypeForDecl(), 0), - getOrCreateFile(RD->getLocation()))); + ContainingType = getOrCreateType(QualType(PBase->getTypeForDecl(), 0), + getOrCreateFile(RD->getLocation())); } else if (RD->isDynamicClass()) ContainingType = RealDecl; @@ -3702,17 +3821,18 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, // No need to replicate the linkage name if it isn't different from the // subprogram name, no need to have it at all unless coverage is enabled or // debug is set to more than just line tables or extra debug info is needed. - if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs && - !CGM.getCodeGenOpts().EmitGcovNotes && - !CGM.getCodeGenOpts().DebugInfoForProfiling && - !CGM.getCodeGenOpts().PseudoProbeForProfiling && - DebugKind <= codegenoptions::DebugLineTablesOnly)) + if (LinkageName == Name || + (CGM.getCodeGenOpts().CoverageNotesFile.empty() && + CGM.getCodeGenOpts().CoverageDataFile.empty() && + !CGM.getCodeGenOpts().DebugInfoForProfiling && + !CGM.getCodeGenOpts().PseudoProbeForProfiling && + DebugKind <= llvm::codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); // Emit the function scope in line tables only mode (if CodeView) to // differentiate between function names. if (CGM.getCodeGenOpts().hasReducedDebugInfo() || - (DebugKind == codegenoptions::DebugLineTablesOnly && + (DebugKind == llvm::codegenoptions::DebugLineTablesOnly && CGM.getCodeGenOpts().EmitCodeView)) { if (const NamespaceDecl *NSDecl = dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext())) @@ -3904,7 +4024,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { } llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { - if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) + if (!D || DebugKind <= llvm::codegenoptions::DebugLineTablesOnly) return nullptr; const auto *FD = dyn_cast<FunctionDecl>(D); @@ -3941,7 +4061,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DISubprogram *CGDebugInfo::getObjCMethodDeclaration( const Decl *D, llvm::DISubroutineType *FnType, unsigned LineNo, llvm::DINode::DIFlags Flags, llvm::DISubprogram::DISPFlags SPFlags) { - if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly) + if (!D || DebugKind <= llvm::codegenoptions::DebugLineTablesOnly) return nullptr; const auto *OMD = dyn_cast<ObjCMethodDecl>(D); @@ -3981,7 +4101,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, llvm::DIFile *F) { // In CodeView, we emit the function types in line tables only because the // only way to distinguish between functions is by display name and type. - if (!D || (DebugKind <= codegenoptions::DebugLineTablesOnly && + if (!D || (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly && !CGM.getCodeGenOpts().EmitCodeView)) // Create fake but valid subroutine type. Otherwise -verify would fail, and // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields. @@ -4219,10 +4339,9 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D); llvm::DISubroutineType *STy = getOrCreateFunctionType(D, FnType, Unit); - llvm::DISubprogram *SP = - DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo, STy, - ScopeLine, Flags, SPFlags, TParamsArray.get(), - getFunctionDeclaration(D), nullptr, Annotations); + llvm::DISubprogram *SP = DBuilder.createFunction( + FDContext, Name, LinkageName, Unit, LineNo, STy, ScopeLine, Flags, + SPFlags, TParamsArray.get(), nullptr, nullptr, Annotations); // Preserve btf_decl_tag attributes for parameters of extern functions // for BPF target. The parameters created in this loop are attached as @@ -4337,7 +4456,7 @@ void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder, CGM.getLLVMContext(), getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back(), CurInlinedAt)); - if (DebugKind <= codegenoptions::DebugLineTablesOnly) + if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly) return; // Create a new lexical block and push it on the stack. @@ -4351,7 +4470,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder, // Provide an entry in the line table for the end of the block. EmitLocation(Builder, Loc); - if (DebugKind <= codegenoptions::DebugLineTablesOnly) + if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly) return; LexicalBlockStack.pop_back(); @@ -4834,9 +4953,10 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( llvm::DILocalVariable * CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, - unsigned ArgNo, CGBuilderTy &Builder) { + unsigned ArgNo, CGBuilderTy &Builder, + bool UsePointerValue) { assert(CGM.getCodeGenOpts().hasReducedDebugInfo()); - return EmitDeclare(VD, AI, ArgNo, Builder); + return EmitDeclare(VD, AI, ArgNo, Builder, UsePointerValue); } namespace { @@ -5167,11 +5287,11 @@ std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const { const NamedDecl *ND = dyn_cast<NamedDecl>(D); if (!ND) return Name; - codegenoptions::DebugTemplateNamesKind TemplateNamesKind = + llvm::codegenoptions::DebugTemplateNamesKind TemplateNamesKind = CGM.getCodeGenOpts().getDebugSimpleTemplateNames(); if (!CGM.getCodeGenOpts().hasReducedDebugInfo()) - TemplateNamesKind = codegenoptions::DebugTemplateNamesKind::Full; + TemplateNamesKind = llvm::codegenoptions::DebugTemplateNamesKind::Full; std::optional<TemplateArgs> Args; @@ -5254,12 +5374,12 @@ std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const { PrintingPolicy PP = getPrintingPolicy(); - if (TemplateNamesKind == codegenoptions::DebugTemplateNamesKind::Full || + if (TemplateNamesKind == llvm::codegenoptions::DebugTemplateNamesKind::Full || !Reconstitutable) { ND->getNameForDiagnostic(OS, PP, Qualified); } else { - bool Mangled = - TemplateNamesKind == codegenoptions::DebugTemplateNamesKind::Mangled; + bool Mangled = TemplateNamesKind == + llvm::codegenoptions::DebugTemplateNamesKind::Mangled; // check if it's a template if (Mangled) OS << "_STN|"; @@ -5747,8 +5867,9 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) { llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { // Call site-related attributes are only useful in optimized programs, and // when there's a possibility of debugging backtraces. - if (!CGM.getLangOpts().Optimize || DebugKind == codegenoptions::NoDebugInfo || - DebugKind == codegenoptions::LocTrackingOnly) + if (!CGM.getLangOpts().Optimize || + DebugKind == llvm::codegenoptions::NoDebugInfo || + DebugKind == llvm::codegenoptions::LocTrackingOnly) return llvm::DINode::FlagZero; // Call site-related attributes are available in DWARF v5. Some debuggers, diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 95484a060cd8..1fd08626358b 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -56,7 +56,7 @@ class CGDebugInfo { friend class ApplyDebugLocation; friend class SaveAndRestoreLocation; CodeGenModule &CGM; - const codegenoptions::DebugInfoKind DebugKind; + const llvm::codegenoptions::DebugInfoKind DebugKind; bool DebugTypeExtRefs; llvm::DIBuilder DBuilder; llvm::DICompileUnit *TheCU = nullptr; @@ -80,13 +80,12 @@ class CGDebugInfo { #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ llvm::DIType *Id##Ty = nullptr; #include "clang/Basic/OpenCLExtensionTypes.def" +#define WASM_TYPE(Name, Id, SingletonId) llvm::DIType *SingletonId = nullptr; +#include "clang/Basic/WebAssemblyReferenceTypes.def" /// Cache of previously constructed Types. llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache; - std::map<llvm::StringRef, llvm::StringRef, std::greater<llvm::StringRef>> - DebugPrefixMap; - /// Cache that maps VLA types to size expressions for that type, /// represented by instantiated Metadata nodes. llvm::SmallDenseMap<QualType, llvm::Metadata *> SizeExprCache; @@ -149,7 +148,7 @@ class CGDebugInfo { llvm::BumpPtrAllocator DebugInfoNames; StringRef CWDName; - llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache; + llvm::StringMap<llvm::TrackingMDRef> DIFileCache; llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache; /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations and global alias variables) that aren't covered @@ -190,7 +189,15 @@ class CGDebugInfo { llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F); /// Get structure or union type. llvm::DIType *CreateType(const RecordType *Tyg); - llvm::DIType *CreateTypeDefinition(const RecordType *Ty); + + /// Create definition for the specified 'Ty'. + /// + /// \returns A pair of 'llvm::DIType's. The first is the definition + /// of the 'Ty'. The second is the type specified by the preferred_name + /// attribute on 'Ty', which can be a nullptr if no such attribute + /// exists. + std::pair<llvm::DIType *, llvm::DIType *> + CreateTypeDefinition(const RecordType *Ty); llvm::DICompositeType *CreateLimitedType(const RecordType *Ty); void CollectContainingType(const CXXRecordDecl *RD, llvm::DICompositeType *CT); @@ -274,6 +281,12 @@ class CGDebugInfo { llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes, llvm::DINode::DIFlags StartingFlags); + /// Helper function that returns the llvm::DIType that the + /// PreferredNameAttr attribute on \ref RD refers to. If no such + /// attribute exists, returns nullptr. + llvm::DIType *GetPreferredNameType(const CXXRecordDecl *RD, + llvm::DIFile *Unit); + struct TemplateArgs { const TemplateParameterList *TList; llvm::ArrayRef<TemplateArgument> Args; @@ -320,9 +333,15 @@ class CGDebugInfo { } /// Create new bit field member. - llvm::DIType *createBitFieldType(const FieldDecl *BitFieldDecl, - llvm::DIScope *RecordTy, - const RecordDecl *RD); + llvm::DIDerivedType *createBitFieldType(const FieldDecl *BitFieldDecl, + llvm::DIScope *RecordTy, + const RecordDecl *RD); + + /// Create an anonnymous zero-size separator for bit-field-decl if needed on + /// the target. + llvm::DIDerivedType *createBitFieldSeparatorIfNeeded( + const FieldDecl *BitFieldDecl, const llvm::DIDerivedType *BitFieldDI, + llvm::ArrayRef<llvm::Metadata *> PreviousFieldsDI, const RecordDecl *RD); /// Helpers for collecting fields of a record. /// @{ @@ -487,10 +506,9 @@ public: /// Emit call to \c llvm.dbg.declare for an argument variable /// declaration. - llvm::DILocalVariable *EmitDeclareOfArgVariable(const VarDecl *Decl, - llvm::Value *AI, - unsigned ArgNo, - CGBuilderTy &Builder); + llvm::DILocalVariable * + EmitDeclareOfArgVariable(const VarDecl *Decl, llvm::Value *AI, unsigned ArgNo, + CGBuilderTy &Builder, bool UsePointerValue = false); /// Emit call to \c llvm.dbg.declare for the block-literal argument /// to a block invocation function. @@ -811,7 +829,13 @@ public: ApplyDebugLocation(ApplyDebugLocation &&Other) : CGF(Other.CGF) { Other.CGF = nullptr; } - ApplyDebugLocation &operator=(ApplyDebugLocation &&) = default; + + // Define copy assignment operator. + ApplyDebugLocation &operator=(ApplyDebugLocation &&Other) { + CGF = Other.CGF; + Other.CGF = nullptr; + return *this; + } ~ApplyDebugLocation(); diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index ceaddc4e694a..b0d6eb05acc2 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -292,7 +292,8 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( if (AS != ExpectedAS) { Addr = getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, AS, ExpectedAS, - LTy->getPointerTo(getContext().getTargetAddressSpace(ExpectedAS))); + llvm::PointerType::get(getLLVMContext(), + getContext().getTargetAddressSpace(ExpectedAS))); } setStaticLocalDeclAddress(&D, Addr); @@ -394,13 +395,15 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, OldGV->eraseFromParent(); } - GV->setConstant(CGM.isTypeConstant(D.getType(), true)); + bool NeedsDtor = + D.needsDestruction(getContext()) == QualType::DK_cxx_destructor; + + GV->setConstant(CGM.isTypeConstant(D.getType(), true, !NeedsDtor)); GV->setInitializer(Init); emitter.finalize(GV); - if (D.needsDestruction(getContext()) == QualType::DK_cxx_destructor && - HaveInsertPoint()) { + if (NeedsDtor && HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the // destructor. @@ -467,6 +470,9 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, else if (D.hasAttr<UsedAttr>()) CGM.addUsedOrCompilerUsedGlobal(var); + if (CGM.getCodeGenOpts().KeepPersistentStorageVariables) + CGM.addUsedOrCompilerUsedGlobal(var); + // We may have to cast the constant because of the initializer // mismatch above. // @@ -578,6 +584,16 @@ namespace { } }; + struct KmpcAllocFree final : EHScopeStack::Cleanup { + std::pair<llvm::Value *, llvm::Value *> AddrSizePair; + KmpcAllocFree(const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) + : AddrSizePair(AddrSizePair) {} + void Emit(CodeGenFunction &CGF, Flags EmissionFlags) override { + auto &RT = CGF.CGM.getOpenMPRuntime(); + RT.getKmpcFreeShared(CGF, AddrSizePair); + } + }; + struct ExtendGCLifetime final : EHScopeStack::Cleanup { const VarDecl &Var; ExtendGCLifetime(const VarDecl *var) : Var(*var) {} @@ -724,8 +740,8 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF, // Handle a formal type change to avoid asserting. auto srcAddr = srcLV.getAddress(CGF); if (needsCast) { - srcAddr = CGF.Builder.CreateElementBitCast( - srcAddr, destLV.getAddress(CGF).getElementType()); + srcAddr = + srcAddr.withElementType(destLV.getAddress(CGF).getElementType()); } // If it was an l-value, use objc_copyWeak. @@ -1170,7 +1186,7 @@ static Address createUnnamedGlobalForMemcpyFrom(CodeGenModule &CGM, llvm::Constant *Constant, CharUnits Align) { Address SrcPtr = CGM.createUnnamedGlobalFrom(D, Constant, Align); - return Builder.CreateElementBitCast(SrcPtr, CGM.Int8Ty); + return SrcPtr.withElementType(CGM.Int8Ty); } static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, @@ -1204,7 +1220,7 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, bool valueAlreadyCorrect = constant->isNullValue() || isa<llvm::UndefValue>(constant); if (!valueAlreadyCorrect) { - Loc = Builder.CreateElementBitCast(Loc, Ty); + Loc = Loc.withElementType(Ty); emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder, IsAutoInit); } @@ -1403,9 +1419,6 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( else { // Create an artificial VarDecl to generate debug info for. IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; - assert(cast<llvm::PointerType>(VlaSize.NumElts->getType()) - ->isOpaqueOrPointeeTypeMatches(SizeTy) && - "Number of VLA elements must be SizeTy"); auto QT = getContext().getIntTypeForBitwidth( SizeTy->getScalarSizeInBits(), false); auto *ArtificialDecl = VarDecl::Create( @@ -1481,10 +1494,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // emit it as a global instead. // Exception is if a variable is located in non-constant address space // in OpenCL. + bool NeedsDtor = + D.needsDestruction(getContext()) == QualType::DK_cxx_destructor; if ((!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) && (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && - !isEscapingByRef && CGM.isTypeConstant(Ty, true))) { + !isEscapingByRef && CGM.isTypeConstant(Ty, true, !NeedsDtor))) { EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); // Signal this condition to later callbacks. @@ -1581,28 +1596,59 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { } else { EnsureInsertPoint(); - if (!DidCallStackSave) { - // Save the stack. - Address Stack = - CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); + // Delayed globalization for variable length declarations. This ensures that + // the expression representing the length has been emitted and can be used + // by the definition of the VLA. Since this is an escaped declaration, in + // OpenMP we have to use a call to __kmpc_alloc_shared(). The matching + // deallocation call to __kmpc_free_shared() is emitted later. + bool VarAllocated = false; + if (getLangOpts().OpenMPIsTargetDevice) { + auto &RT = CGM.getOpenMPRuntime(); + if (RT.isDelayedVariableLengthDecl(*this, &D)) { + // Emit call to __kmpc_alloc_shared() instead of the alloca. + std::pair<llvm::Value *, llvm::Value *> AddrSizePair = + RT.getKmpcAllocShared(*this, &D); - llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); - llvm::Value *V = Builder.CreateCall(F); - Builder.CreateStore(V, Stack); + // Save the address of the allocation: + LValue Base = MakeAddrLValue(AddrSizePair.first, D.getType(), + CGM.getContext().getDeclAlign(&D), + AlignmentSource::Decl); + address = Base.getAddress(*this); - DidCallStackSave = true; + // Push a cleanup block to emit the call to __kmpc_free_shared in the + // appropriate location at the end of the scope of the + // __kmpc_alloc_shared functions: + pushKmpcAllocFree(NormalCleanup, AddrSizePair); - // Push a cleanup block and restore the stack there. - // FIXME: in general circumstances, this should be an EH cleanup. - pushStackRestore(NormalCleanup, Stack); + // Mark variable as allocated: + VarAllocated = true; + } } - auto VlaSize = getVLASize(Ty); - llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type); + if (!VarAllocated) { + if (!DidCallStackSave) { + // Save the stack. + Address Stack = + CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); + + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); + llvm::Value *V = Builder.CreateCall(F); + Builder.CreateStore(V, Stack); - // Allocate memory for the array. - address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts, - &AllocaAddr); + DidCallStackSave = true; + + // Push a cleanup block and restore the stack there. + // FIXME: in general circumstances, this should be an EH cleanup. + pushStackRestore(NormalCleanup, Stack); + } + + auto VlaSize = getVLASize(Ty); + llvm::Type *llvmTy = ConvertTypeForMem(VlaSize.Type); + + // Allocate memory for the array. + address = CreateTempAlloca(llvmTy, alignment, "vla", VlaSize.NumElts, + &AllocaAddr); + } // If we have debug info enabled, properly describe the VLA dimensions for // this type by registering the vla size expression for each of the @@ -1788,7 +1834,7 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); llvm::Value *BaseSizeInChars = llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); - Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); + Address Begin = Loc.withElementType(Int8Ty); llvm::Value *End = Builder.CreateInBoundsGEP( Begin.getElementType(), Begin.getPointer(), SizeVal, "vla.end"); llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); @@ -1919,7 +1965,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { return EmitStoreThroughLValue(RValue::get(constant), lv, true); } - emitStoresForConstant(CGM, D, Builder.CreateElementBitCast(Loc, CGM.Int8Ty), + emitStoresForConstant(CGM, D, Loc.withElementType(CGM.Int8Ty), type.isVolatileQualified(), Builder, constant, /*IsAutoInit=*/false); } @@ -2139,6 +2185,11 @@ void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) { EHStack.pushCleanup<CallStackRestore>(Kind, SPMem); } +void CodeGenFunction::pushKmpcAllocFree( + CleanupKind Kind, std::pair<llvm::Value *, llvm::Value *> AddrSizePair) { + EHStack.pushCleanup<KmpcAllocFree>(Kind, AddrSizePair); +} + void CodeGenFunction::pushLifetimeExtendedDestroy(CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, @@ -2451,7 +2502,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, assert((isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D)) && "Invalid argument to EmitParmDecl"); - Arg.getAnyValue()->setName(D.getName()); + // Set the name of the parameter's initial value to make IR easier to + // read. Don't modify the names of globals. + if (!isa<llvm::GlobalValue>(Arg.getAnyValue())) + Arg.getAnyValue()->setName(D.getName()); QualType Ty = D.getType(); @@ -2476,17 +2530,30 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, Address AllocaPtr = Address::invalid(); bool DoStore = false; bool IsScalar = hasScalarEvaluationKind(Ty); + bool UseIndirectDebugAddress = false; + // If we already have a pointer to the argument, reuse the input pointer. if (Arg.isIndirect()) { - // If we have a prettier pointer type at this point, bitcast to that. DeclPtr = Arg.getIndirectAddress(); - DeclPtr = Builder.CreateElementBitCast(DeclPtr, ConvertTypeForMem(Ty), - D.getName()); + DeclPtr = DeclPtr.withElementType(ConvertTypeForMem(Ty)); // Indirect argument is in alloca address space, which may be different // from the default address space. auto AllocaAS = CGM.getASTAllocaAddressSpace(); auto *V = DeclPtr.getPointer(); AllocaPtr = DeclPtr; + + // For truly ABI indirect arguments -- those that are not `byval` -- store + // the address of the argument on the stack to preserve debug information. + ABIArgInfo ArgInfo = CurFnInfo->arguments()[ArgNo - 1].info; + if (ArgInfo.isIndirect()) + UseIndirectDebugAddress = !ArgInfo.getIndirectByVal(); + if (UseIndirectDebugAddress) { + auto PtrTy = getContext().getPointerType(Ty); + AllocaPtr = CreateMemTemp(PtrTy, getContext().getTypeAlignInChars(PtrTy), + D.getName() + ".indirect_addr"); + EmitStoreOfScalar(V, AllocaPtr, /* Volatile */ false, PtrTy); + } + auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS; auto DestLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default; @@ -2494,9 +2561,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, assert(getContext().getTargetAddressSpace(SrcLangAS) == CGM.getDataLayout().getAllocaAddrSpace()); auto DestAS = getContext().getTargetAddressSpace(DestLangAS); - auto *T = DeclPtr.getElementType()->getPointerTo(DestAS); - DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast( - *this, V, SrcLangAS, DestLangAS, T, true)); + auto *T = llvm::PointerType::get(getLLVMContext(), DestAS); + DeclPtr = + DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast( + *this, V, SrcLangAS, DestLangAS, T, true), + DeclPtr.isKnownNonNull()); } // Push a destructor cleanup for this parameter if the ABI requires it. @@ -2603,7 +2672,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (CGM.getCodeGenOpts().hasReducedDebugInfo() && !CurFuncIsThunk && !NoDebugInfo) { llvm::DILocalVariable *DILocalVar = DI->EmitDeclareOfArgVariable( - &D, AllocaPtr.getPointer(), ArgNo, Builder); + &D, AllocaPtr.getPointer(), ArgNo, Builder, UseIndirectDebugAddress); if (const auto *Var = dyn_cast_or_null<ParmVarDecl>(&D)) DI->getParamDbgMappings().insert({Var, DILocalVar}); } diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index dcd811ea257b..be8fb6c274db 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -122,8 +122,8 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, if (CGF.getContext().getLangOpts().OpenCL) { auto DestAS = CGM.getTargetCodeGenInfo().getAddrSpaceOfCxaAtexitPtrParam(); - auto DestTy = CGF.getTypes().ConvertType(Type)->getPointerTo( - CGM.getContext().getTargetAddressSpace(DestAS)); + auto DestTy = llvm::PointerType::get( + CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(DestAS)); auto SrcAS = D.getType().getQualifiers().getAddressSpace(); if (DestAS == SrcAS) Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy); @@ -132,12 +132,11 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, // of the global destructor function should be adjusted accordingly. Argument = llvm::ConstantPointerNull::get(DestTy); } else { - Argument = llvm::ConstantExpr::getBitCast( - Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); + Argument = Addr.getPointer(); } // Otherwise, the standard logic requires a helper function. } else { - Addr = Addr.getElementBitCast(CGF.ConvertTypeForMem(Type)); + Addr = Addr.withElementType(CGF.ConvertTypeForMem(Type)); Func = CodeGenFunction(CGM) .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind), CGF.needsEHCleanup(DtorKind), &D); @@ -199,8 +198,8 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, unsigned ActualAddrSpace = GV->getAddressSpace(); llvm::Constant *DeclPtr = GV; if (ActualAddrSpace != ExpectedAddrSpace) { - llvm::PointerType *PTy = llvm::PointerType::getWithSamePointeeType( - GV->getType(), ExpectedAddrSpace); + llvm::PointerType *PTy = + llvm::PointerType::get(getLLVMContext(), ExpectedAddrSpace); DeclPtr = llvm::ConstantExpr::getAddrSpaceCast(DeclPtr, PTy); } @@ -214,9 +213,11 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, &D, DeclAddr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(), PerformInit, this); } + bool NeedsDtor = + D.needsDestruction(getContext()) == QualType::DK_cxx_destructor; if (PerformInit) EmitDeclInit(*this, D, DeclAddr); - if (CGM.isTypeConstant(D.getType(), true)) + if (CGM.isTypeConstant(D.getType(), true, !NeedsDtor)) EmitDeclInvariant(*this, D, DeclPtr); else EmitDeclDestroy(*this, D, DeclAddr); @@ -642,7 +643,7 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() { This is arranged to be run only once regardless of how many times the module might be included transitively. This arranged by using a guard variable. - If there are no initalizers at all (and also no imported modules) we reduce + If there are no initializers at all (and also no imported modules) we reduce this to an empty function (since the Itanium ABI requires that this function be available to a caller, which might be produced by a different implementation). @@ -878,13 +879,15 @@ CodeGenModule::EmitCXXGlobalInitFunc() { // Include the filename in the symbol name. Including "sub_" matches gcc // and makes sure these symbols appear lexicographically behind the symbols - // with priority emitted above. + // with priority emitted above. Module implementation units behave the same + // way as a non-modular TU with imports. llvm::Function *Fn; - if (CXX20ModuleInits && getContext().getModuleForCodeGen()) { + if (CXX20ModuleInits && getContext().getCurrentNamedModule() && + !getContext().getCurrentNamedModule()->isModuleImplementation()) { SmallString<256> InitFnName; llvm::raw_svector_ostream Out(InitFnName); cast<ItaniumMangleContext>(getCXXABI().getMangleContext()) - .mangleModuleInitializer(getContext().getModuleForCodeGen(), Out); + .mangleModuleInitializer(getContext().getCurrentNamedModule(), Out); Fn = CreateGlobalInitOrCleanUpFunction( FTy, llvm::Twine(InitFnName), FI, SourceLocation(), false, llvm::GlobalVariable::ExternalLinkage); diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 6fa7871588f7..9cb7d4c7731d 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -401,7 +401,7 @@ void CodeGenFunction::EmitAnyExprToExn(const Expr *e, Address addr) { // __cxa_allocate_exception returns a void*; we need to cast this // to the appropriate type for the object. llvm::Type *ty = ConvertTypeForMem(e->getType()); - Address typedAddr = Builder.CreateElementBitCast(addr, ty); + Address typedAddr = addr.withElementType(ty); // FIXME: this isn't quite right! If there's a final unelided call // to a copy constructor, then according to [except.terminate]p1 we @@ -646,7 +646,7 @@ void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // Under async exceptions, catch(...) need to catch HW exception too // Mark scope with SehTryBegin as a SEH __try scope if (getLangOpts().EHAsynch) - EmitRuntimeCallOrInvoke(getSehTryBeginFn(CGM)); + EmitSehTryScopeBegin(); } } } @@ -1842,7 +1842,7 @@ Address CodeGenFunction::recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF, llvm::Value *ChildVar = Builder.CreateBitCast(RecoverCall, ParentVar.getType()); ChildVar->setName(ParentVar.getName()); - return ParentVar.withPointer(ChildVar); + return ParentVar.withPointer(ChildVar, KnownNonNull); } void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, @@ -2101,7 +2101,6 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF, // pointer is stored in the second field. So, GEP 20 bytes backwards and // load the pointer. SEHInfo = Builder.CreateConstInBoundsGEP1_32(Int8Ty, EntryFP, -20); - SEHInfo = Builder.CreateBitCast(SEHInfo, Int8PtrTy->getPointerTo()); SEHInfo = Builder.CreateAlignedLoad(Int8PtrTy, SEHInfo, getPointerAlign()); SEHCodeSlotStack.push_back(recoverAddrOfEscapedLocal( ParentCGF, ParentCGF.SEHCodeSlotStack.back(), ParentFP)); @@ -2114,10 +2113,9 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF, // CONTEXT *ContextRecord; // }; // int exceptioncode = exception_pointers->ExceptionRecord->ExceptionCode; - llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo(); + llvm::Type *RecordTy = llvm::PointerType::getUnqual(getLLVMContext()); llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy); - llvm::Value *Ptrs = Builder.CreateBitCast(SEHInfo, PtrsTy->getPointerTo()); - llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0); + llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, SEHInfo, 0); Rec = Builder.CreateAlignedLoad(RecordTy, Rec, getPointerAlign()); llvm::Value *Code = Builder.CreateAlignedLoad(Int32Ty, Rec, getIntAlign()); assert(!SEHCodeSlotStack.empty() && "emitting EH code outside of __except"); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index c26dd1b23321..ed6095f7cfeb 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -33,13 +33,16 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" +#include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/xxhash.h" #include "llvm/Transforms/Utils/SanitizerStats.h" #include <optional> @@ -52,18 +55,6 @@ using namespace CodeGen; // Miscellaneous Helper Methods //===--------------------------------------------------------------------===// -llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { - unsigned addressSpace = - cast<llvm::PointerType>(value->getType())->getAddressSpace(); - - llvm::PointerType *destType = Int8PtrTy; - if (addressSpace) - destType = llvm::Type::getInt8PtrTy(getLLVMContext(), addressSpace); - - if (value->getType() == destType) return value; - return Builder.CreateBitCast(value, destType); -} - /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, @@ -72,7 +63,7 @@ Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, llvm::Value *ArraySize) { auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); Alloca->setAlignment(Align.getAsAlign()); - return Address(Alloca, Ty, Align); + return Address(Alloca, Ty, Align, KnownNonNull); } /// CreateTempAlloca - This creates a alloca and inserts it into the entry @@ -102,7 +93,7 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); } - return Address(V, Ty, Align); + return Address(V, Ty, Align, KnownNonNull); } /// CreateTempAlloca - This creates an alloca and inserts it into the entry @@ -151,7 +142,7 @@ Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, Result = Address( Builder.CreateBitCast(Result.getPointer(), VectorTy->getPointerTo()), - VectorTy, Result.getAlignment()); + VectorTy, Result.getAlignment(), KnownNonNull); } return Result; } @@ -401,7 +392,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, QualType Ty = Inner->getType(); if (CGF.CGM.getCodeGenOpts().MergeAllConstants && (Ty->isArrayType() || Ty->isRecordType()) && - CGF.CGM.isTypeConstant(Ty, true)) + CGF.CGM.isTypeConstant(Ty, true, false)) if (auto Init = ConstantEmitter(CGF).tryEmitAbstract(Inner, Ty)) { auto AS = CGF.CGM.GetGlobalConstantAddressSpace(); auto *GV = new llvm::GlobalVariable( @@ -541,13 +532,17 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Avoid creating a conditional cleanup just to hold an llvm.lifetime.end // marker. Instead, start the lifetime of a conditional temporary earlier // so that it's unconditional. Don't do this with sanitizers which need - // more precise lifetime marks. + // more precise lifetime marks. However when inside an "await.suspend" + // block, we should always avoid conditional cleanup because it creates + // boolean marker that lives across await_suspend, which can destroy coro + // frame. ConditionalEvaluation *OldConditional = nullptr; CGBuilderTy::InsertPoint OldIP; if (isInConditionalBranch() && !E->getType().isDestructedType() && - !SanOpts.has(SanitizerKind::HWAddress) && - !SanOpts.has(SanitizerKind::Memory) && - !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) { + ((!SanOpts.has(SanitizerKind::HWAddress) && + !SanOpts.has(SanitizerKind::Memory) && + !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) || + inSuspendBlock())) { OldConditional = OutermostConditional; OutermostConditional = nullptr; @@ -1035,11 +1030,10 @@ void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E, // LValue Expression Emission //===----------------------------------------------------------------------===// -/// EmitPointerWithAlignment - Given an expression of pointer type, try to -/// derive a more accurate bound on the alignment of the pointer. -Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, - LValueBaseInfo *BaseInfo, - TBAAAccessInfo *TBAAInfo) { +static Address EmitPointerWithAlignment(const Expr *E, LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo, + KnownNonNull_t IsKnownNonNull, + CodeGenFunction &CGF) { // We allow this with ObjC object pointers because of fragile ABIs. assert(E->getType()->isPointerType() || E->getType()->isObjCObjectPointerType()); @@ -1048,7 +1042,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Casts: if (const CastExpr *CE = dyn_cast<CastExpr>(E)) { if (const auto *ECE = dyn_cast<ExplicitCastExpr>(CE)) - CGM.EmitExplicitCastExprType(ECE, this); + CGF.CGM.EmitExplicitCastExprType(ECE, &CGF); switch (CE->getCastKind()) { // Non-converting casts (but not C's implicit conversion from void*). @@ -1061,49 +1055,51 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, LValueBaseInfo InnerBaseInfo; TBAAAccessInfo InnerTBAAInfo; - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), - &InnerBaseInfo, - &InnerTBAAInfo); + Address Addr = CGF.EmitPointerWithAlignment( + CE->getSubExpr(), &InnerBaseInfo, &InnerTBAAInfo, IsKnownNonNull); if (BaseInfo) *BaseInfo = InnerBaseInfo; if (TBAAInfo) *TBAAInfo = InnerTBAAInfo; if (isa<ExplicitCastExpr>(CE)) { LValueBaseInfo TargetTypeBaseInfo; TBAAAccessInfo TargetTypeTBAAInfo; - CharUnits Align = CGM.getNaturalPointeeTypeAlignment( + CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment( E->getType(), &TargetTypeBaseInfo, &TargetTypeTBAAInfo); if (TBAAInfo) - *TBAAInfo = CGM.mergeTBAAInfoForCast(*TBAAInfo, - TargetTypeTBAAInfo); + *TBAAInfo = + CGF.CGM.mergeTBAAInfoForCast(*TBAAInfo, TargetTypeTBAAInfo); // If the source l-value is opaque, honor the alignment of the // casted-to type. if (InnerBaseInfo.getAlignmentSource() != AlignmentSource::Decl) { if (BaseInfo) BaseInfo->mergeForCast(TargetTypeBaseInfo); - Addr = Address(Addr.getPointer(), Addr.getElementType(), Align); + Addr = Address(Addr.getPointer(), Addr.getElementType(), Align, + IsKnownNonNull); } } - if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && + if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast) && CE->getCastKind() == CK_BitCast) { if (auto PT = E->getType()->getAs<PointerType>()) - EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr, - /*MayBeNull=*/true, - CodeGenFunction::CFITCK_UnrelatedCast, - CE->getBeginLoc()); + CGF.EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr, + /*MayBeNull=*/true, + CodeGenFunction::CFITCK_UnrelatedCast, + CE->getBeginLoc()); } - llvm::Type *ElemTy = ConvertTypeForMem(E->getType()->getPointeeType()); - Addr = Builder.CreateElementBitCast(Addr, ElemTy); + llvm::Type *ElemTy = + CGF.ConvertTypeForMem(E->getType()->getPointeeType()); + Addr = Addr.withElementType(ElemTy); if (CE->getCastKind() == CK_AddressSpaceConversion) - Addr = Builder.CreateAddrSpaceCast(Addr, ConvertType(E->getType())); + Addr = CGF.Builder.CreateAddrSpaceCast(Addr, + CGF.ConvertType(E->getType())); return Addr; } break; // Array-to-pointer decay. case CK_ArrayToPointerDecay: - return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo, TBAAInfo); + return CGF.EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo, TBAAInfo); // Derived-to-base conversions. case CK_UncheckedDerivedToBase: @@ -1112,13 +1108,15 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // conservatively pretend that the complete object is of the base class // type. if (TBAAInfo) - *TBAAInfo = CGM.getTBAAAccessInfo(E->getType()); - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo); + *TBAAInfo = CGF.CGM.getTBAAAccessInfo(E->getType()); + Address Addr = CGF.EmitPointerWithAlignment( + CE->getSubExpr(), BaseInfo, nullptr, + (KnownNonNull_t)(IsKnownNonNull || + CE->getCastKind() == CK_UncheckedDerivedToBase)); auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl(); - return GetAddressOfBaseClass(Addr, Derived, - CE->path_begin(), CE->path_end(), - ShouldNullCheckClassCastValue(CE), - CE->getExprLoc()); + return CGF.GetAddressOfBaseClass( + Addr, Derived, CE->path_begin(), CE->path_end(), + CGF.ShouldNullCheckClassCastValue(CE), CE->getExprLoc()); } // TODO: Is there any reason to treat base-to-derived conversions @@ -1131,10 +1129,10 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Unary &. if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { if (UO->getOpcode() == UO_AddrOf) { - LValue LV = EmitLValue(UO->getSubExpr()); + LValue LV = CGF.EmitLValue(UO->getSubExpr(), IsKnownNonNull); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); - return LV.getAddress(*this); + return LV.getAddress(CGF); } } @@ -1146,10 +1144,10 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, case Builtin::BIaddressof: case Builtin::BI__addressof: case Builtin::BI__builtin_addressof: { - LValue LV = EmitLValue(Call->getArg(0)); + LValue LV = CGF.EmitLValue(Call->getArg(0), IsKnownNonNull); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); - return LV.getAddress(*this); + return LV.getAddress(CGF); } } } @@ -1158,9 +1156,21 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Otherwise, use the alignment of the type. CharUnits Align = - CGM.getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, TBAAInfo); - llvm::Type *ElemTy = ConvertTypeForMem(E->getType()->getPointeeType()); - return Address(EmitScalarExpr(E), ElemTy, Align); + CGF.CGM.getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, TBAAInfo); + llvm::Type *ElemTy = CGF.ConvertTypeForMem(E->getType()->getPointeeType()); + return Address(CGF.EmitScalarExpr(E), ElemTy, Align, IsKnownNonNull); +} + +/// EmitPointerWithAlignment - Given an expression of pointer type, try to +/// derive a more accurate bound on the alignment of the pointer. +Address CodeGenFunction::EmitPointerWithAlignment( + const Expr *E, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo, + KnownNonNull_t IsKnownNonNull) { + Address Addr = + ::EmitPointerWithAlignment(E, BaseInfo, TBAAInfo, IsKnownNonNull, *this); + if (IsKnownNonNull && !Addr.isKnownNonNull()) + Addr.setKnownNonNull(); + return Addr; } llvm::Value *CodeGenFunction::EmitNonNullRValueCheck(RValue RV, QualType T) { @@ -1270,7 +1280,16 @@ LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) { /// type of the same size of the lvalue's type. If the lvalue has a variable /// length type, this is not possible. /// -LValue CodeGenFunction::EmitLValue(const Expr *E) { +LValue CodeGenFunction::EmitLValue(const Expr *E, + KnownNonNull_t IsKnownNonNull) { + LValue LV = EmitLValueHelper(E, IsKnownNonNull); + if (IsKnownNonNull && !LV.isKnownNonNull()) + LV.setKnownNonNull(); + return LV; +} + +LValue CodeGenFunction::EmitLValueHelper(const Expr *E, + KnownNonNull_t IsKnownNonNull) { ApplyDebugLocation DL(*this, E); switch (E->getStmtClass()) { default: return EmitUnsupportedLValue(E, "l-value expression"); @@ -1298,7 +1317,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::UserDefinedLiteralClass: return EmitCallExprLValue(cast<CallExpr>(E)); case Expr::CXXRewrittenBinaryOperatorClass: - return EmitLValue(cast<CXXRewrittenBinaryOperator>(E)->getSemanticForm()); + return EmitLValue(cast<CXXRewrittenBinaryOperator>(E)->getSemanticForm(), + IsKnownNonNull); case Expr::VAArgExprClass: return EmitVAArgExprLValue(cast<VAArgExpr>(E)); case Expr::DeclRefExprClass: @@ -1311,12 +1331,13 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { ->getPointeeType(); return MakeNaturalAlignAddrLValue(Result, RetType); } - return EmitLValue(cast<ConstantExpr>(E)->getSubExpr()); + return EmitLValue(cast<ConstantExpr>(E)->getSubExpr(), IsKnownNonNull); } case Expr::ParenExprClass: - return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); + return EmitLValue(cast<ParenExpr>(E)->getSubExpr(), IsKnownNonNull); case Expr::GenericSelectionExprClass: - return EmitLValue(cast<GenericSelectionExpr>(E)->getResultExpr()); + return EmitLValue(cast<GenericSelectionExpr>(E)->getResultExpr(), + IsKnownNonNull); case Expr::PredefinedExprClass: return EmitPredefinedLValue(cast<PredefinedExpr>(E)); case Expr::StringLiteralClass: @@ -1340,15 +1361,16 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast<ExprWithCleanups>(E); RunCleanupsScope Scope(*this); - LValue LV = EmitLValue(cleanups->getSubExpr()); + LValue LV = EmitLValue(cleanups->getSubExpr(), IsKnownNonNull); if (LV.isSimple()) { // Defend against branches out of gnu statement expressions surrounded by // cleanups. Address Addr = LV.getAddress(*this); llvm::Value *V = Addr.getPointer(); Scope.ForceCleanup({&V}); - return LValue::MakeAddr(Addr.withPointer(V), LV.getType(), getContext(), - LV.getBaseInfo(), LV.getTBAAInfo()); + return LValue::MakeAddr(Addr.withPointer(V, Addr.isKnownNonNull()), + LV.getType(), getContext(), LV.getBaseInfo(), + LV.getTBAAInfo()); } // FIXME: Is it possible to create an ExprWithCleanups that produces a // bitfield lvalue or some other non-simple lvalue? @@ -1358,12 +1380,12 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXDefaultArgExprClass: { auto *DAE = cast<CXXDefaultArgExpr>(E); CXXDefaultArgExprScope Scope(*this, DAE); - return EmitLValue(DAE->getExpr()); + return EmitLValue(DAE->getExpr(), IsKnownNonNull); } case Expr::CXXDefaultInitExprClass: { auto *DIE = cast<CXXDefaultInitExpr>(E); CXXDefaultInitExprScope Scope(*this, DIE); - return EmitLValue(DIE->getExpr()); + return EmitLValue(DIE->getExpr(), IsKnownNonNull); } case Expr::CXXTypeidExprClass: return EmitCXXTypeidLValue(cast<CXXTypeidExpr>(E)); @@ -1395,11 +1417,12 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::BinaryConditionalOperatorClass: return EmitConditionalOperatorLValue(cast<BinaryConditionalOperator>(E)); case Expr::ChooseExprClass: - return EmitLValue(cast<ChooseExpr>(E)->getChosenSubExpr()); + return EmitLValue(cast<ChooseExpr>(E)->getChosenSubExpr(), IsKnownNonNull); case Expr::OpaqueValueExprClass: return EmitOpaqueValueLValue(cast<OpaqueValueExpr>(E)); case Expr::SubstNonTypeTemplateParmExprClass: - return EmitLValue(cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement()); + return EmitLValue(cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement(), + IsKnownNonNull); case Expr::ImplicitCastExprClass: case Expr::CStyleCastExprClass: case Expr::CXXFunctionalCastExprClass: @@ -1691,7 +1714,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, bool isNontemporal) { if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr.getPointer())) if (GV->isThreadLocal()) - Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV)); + Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV), + NotKnownNonNull); if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { // Boolean vectors use `iN` as storage type. @@ -1719,10 +1743,9 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, if (!CGM.getCodeGenOpts().PreserveVec3Type && VTy->getNumElements() == 3) { - // Bitcast to vec4 type. llvm::VectorType *vec4Ty = llvm::FixedVectorType::get(VTy->getElementType(), 4); - Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); + Address Cast = Addr.withElementType(vec4Ty); // Now load value. llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); @@ -1743,7 +1766,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, if (isNontemporal) { llvm::MDNode *Node = llvm::MDNode::get( Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); - Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + Load->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); } CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); @@ -1806,7 +1829,7 @@ static Address MaybeConvertMatrixAddress(Address Addr, CodeGenFunction &CGF, auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(), ArrayTy->getNumElements()); - return Address(CGF.Builder.CreateElementBitCast(Addr, VectorTy)); + return Addr.withElementType(VectorTy); } auto *VectorTy = dyn_cast<llvm::VectorType>(Addr.getElementType()); if (VectorTy && !IsVector) { @@ -1814,7 +1837,7 @@ static Address MaybeConvertMatrixAddress(Address Addr, CodeGenFunction &CGF, VectorTy->getElementType(), cast<llvm::FixedVectorType>(VectorTy)->getNumElements()); - return Address(CGF.Builder.CreateElementBitCast(Addr, ArrayTy)); + return Addr.withElementType(ArrayTy); } return Addr; @@ -1839,7 +1862,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool isInit, bool isNontemporal) { if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr.getPointer())) if (GV->isThreadLocal()) - Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV)); + Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV), + NotKnownNonNull); llvm::Type *SrcTy = Value->getType(); if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { @@ -1861,7 +1885,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4); } if (Addr.getElementType() != SrcTy) { - Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); + Addr = Addr.withElementType(SrcTy); } } } @@ -1881,7 +1905,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, llvm::MDNode *Node = llvm::MDNode::get(Store->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); - Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + Store->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); } CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); @@ -2043,9 +2067,7 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { QualType EQT = LV.getType()->castAs<VectorType>()->getElementType(); llvm::Type *VectorElementTy = CGM.getTypes().ConvertType(EQT); - Address CastToPointerElement = - Builder.CreateElementBitCast(VectorAddress, VectorElementTy, - "conv.ptr.element"); + Address CastToPointerElement = VectorAddress.withElementType(VectorElementTy); const llvm::Constant *Elts = LV.getExtVectorElts(); unsigned ix = getAccessedFieldNo(0, Elts); @@ -2488,7 +2510,7 @@ static LValue EmitThreadPrivateVarDeclLValue( Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc); - Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy); + Addr = Addr.withElementType(RealVarTy); return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } @@ -2566,7 +2588,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T); // Check if the variable is marked as declare target with link clause in // device codegen. - if (CGF.getLangOpts().OpenMPIsDevice) { + if (CGF.getLangOpts().OpenMPIsTargetDevice) { Address Addr = emitDeclTargetVarDeclLValue(CGF, VD, T); if (Addr.isValid()) return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); @@ -2848,8 +2870,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // Handle threadlocal function locals. if (VD->getTLSKind() != VarDecl::TLS_None) - addr = - addr.withPointer(Builder.CreateThreadLocalAddress(addr.getPointer())); + addr = addr.withPointer( + Builder.CreateThreadLocalAddress(addr.getPointer()), NotKnownNonNull); // Check for OpenMP threadprivate variables. if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd && @@ -3188,7 +3210,7 @@ enum class CheckRecoverableKind { static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) { assert(Kind.countPopulation() == 1); - if (Kind == SanitizerKind::Function || Kind == SanitizerKind::Vptr) + if (Kind == SanitizerKind::Vptr) return CheckRecoverableKind::AlwaysRecoverable; else if (Kind == SanitizerKind::Return || Kind == SanitizerKind::Unreachable) return CheckRecoverableKind::Unrecoverable; @@ -3333,7 +3355,7 @@ void CodeGenFunction::EmitCheck( CGM.getDataLayout().getDefaultGlobalsAddressSpace()); InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - Args.push_back(EmitCastToVoidPtr(InfoPtr)); + Args.push_back(InfoPtr); ArgTypes.push_back(Args.back()->getType()); } @@ -3606,7 +3628,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. llvm::Type *NewTy = ConvertType(E->getType()); - Addr = Builder.CreateElementBitCast(Addr, NewTy); + Addr = Addr.withElementType(NewTy); // Note that VLA pointers are always decayed, so we don't need to do // anything here. @@ -3625,7 +3647,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, if (BaseInfo) *BaseInfo = LV.getBaseInfo(); if (TBAAInfo) *TBAAInfo = CGM.getTBAAAccessInfo(EltType); - return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType)); + return Addr.withElementType(ConvertTypeForMem(EltType)); } /// isSimpleArrayDecayOperand - If the specified expr is a simple decay from an @@ -3871,18 +3893,14 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // correctly, so we need to cast to i8*. FIXME: is this actually // true? A lot of other things in the fragile ABI would break... llvm::Type *OrigBaseElemTy = Addr.getElementType(); - Addr = Builder.CreateElementBitCast(Addr, Int8Ty); // Do the GEP. CharUnits EltAlign = getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize); llvm::Value *EltPtr = - emitArraySubscriptGEP(*this, Addr.getElementType(), Addr.getPointer(), - ScaledIdx, false, SignedIndices, E->getExprLoc()); - Addr = Address(EltPtr, Addr.getElementType(), EltAlign); - - // Cast back. - Addr = Builder.CreateElementBitCast(Addr, OrigBaseElemTy); + emitArraySubscriptGEP(*this, Int8Ty, Addr.getPointer(), ScaledIdx, + false, SignedIndices, E->getExprLoc()); + Addr = Address(EltPtr, OrigBaseElemTy, EltAlign); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3960,7 +3978,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. llvm::Type *NewTy = CGF.ConvertType(BaseTy); - Addr = CGF.Builder.CreateElementBitCast(Addr, NewTy); + Addr = Addr.withElementType(NewTy); // Note that VLA pointers are always decayed, so we don't need to do // anything here. @@ -3970,8 +3988,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, Addr = CGF.Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } - return CGF.Builder.CreateElementBitCast(Addr, - CGF.ConvertTypeForMem(ElTy)); + return Addr.withElementType(CGF.ConvertTypeForMem(ElTy)); } LValueBaseInfo TypeBaseInfo; TBAAAccessInfo TypeTBAAInfo; @@ -4067,6 +4084,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, } } else { auto *CAT = C.getAsConstantArrayType(ArrayTy); + assert(CAT && "unexpected type for array initializer"); ConstLength = CAT->getSize(); } if (Length) { @@ -4287,7 +4305,7 @@ static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, CGF.getContext().getFieldOffset(Field)); if (Offset.isZero()) return Base; - Base = CGF.Builder.CreateElementBitCast(Base, CGF.Int8Ty); + Base = Base.withElementType(CGF.Int8Ty); return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset); } @@ -4375,8 +4393,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS); - if (Addr.getElementType() != FieldIntTy) - Addr = Builder.CreateElementBitCast(Addr, FieldIntTy); + Addr = Addr.withElementType(FieldIntTy); if (UseVolatile) { const unsigned VolatileOffset = Info.VolatileStorageOffset.getQuantity(); if (VolatileOffset) @@ -4463,8 +4480,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, } if (FieldType->isReferenceType()) - addr = Builder.CreateElementBitCast( - addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); + addr = addr.withElementType(CGM.getTypes().ConvertTypeForMem(FieldType)); } else { if (!IsInPreservedAIRegion && (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) @@ -4489,11 +4505,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, } // Make sure that the address is pointing to the right type. This is critical - // for both unions and structs. A union needs a bitcast, a struct element - // will need a bitcast if the LLVM type laid out doesn't match the desired - // type. - addr = Builder.CreateElementBitCast( - addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); + // for both unions and structs. + addr = addr.withElementType(CGM.getTypes().ConvertTypeForMem(FieldType)); if (field->hasAttr<AnnotateAttr>()) addr = EmitFieldAnnotations(field, addr); @@ -4520,7 +4533,7 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, // Make sure that the address is pointing to the right type. llvm::Type *llvmType = ConvertTypeForMem(FieldType); - V = Builder.CreateElementBitCast(V, llvmType, Field->getName()); + V = V.withElementType(llvmType); // TODO: Generate TBAA information that describes this access as a structure // member access and not just an access to an object of the field's type. This @@ -4811,7 +4824,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { if (V.isValid()) { llvm::Type *T = ConvertTypeForMem(E->getType()); if (V.getElementType() != T) - LV.setAddress(Builder.CreateElementBitCast(V, T)); + LV.setAddress(V.withElementType(T)); } } return LV; @@ -4870,8 +4883,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { CGM.EmitExplicitCastExprType(CE, this); LValue LV = EmitLValue(E->getSubExpr()); - Address V = Builder.CreateElementBitCast( - LV.getAddress(*this), + Address V = LV.getAddress(*this).withElementType( ConvertTypeForMem(CE->getTypeAsWritten()->getPointeeType())); if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) @@ -4895,8 +4907,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); - Address V = Builder.CreateElementBitCast(LV.getAddress(*this), - ConvertType(E->getType())); + Address V = LV.getAddress(*this).withElementType(ConvertType(E->getType())); return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); } @@ -5106,7 +5117,7 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { functionType = ptrType->getPointeeType(); } else { functionType = E->getType(); - calleePtr = EmitLValue(E).getPointer(*this); + calleePtr = EmitLValue(E, KnownNonNull).getPointer(*this); } assert(functionType->isFunctionType()); @@ -5206,8 +5217,8 @@ CodeGenFunction::EmitCXXTypeidLValue(const CXXTypeidExpr *E) { } Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) { - return Builder.CreateElementBitCast(CGM.GetAddrOfMSGuidDecl(E->getGuidDecl()), - ConvertType(E->getType())); + return CGM.GetAddrOfMSGuidDecl(E->getGuidDecl()) + .withElementType(ConvertType(E->getType())); } LValue CodeGenFunction::EmitCXXUuidofLValue(const CXXUuidofExpr *E) { @@ -5308,33 +5319,56 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl().getDecl(); + assert((!isa_and_present<FunctionDecl>(TargetDecl) || + !cast<FunctionDecl>(TargetDecl)->isImmediateFunction()) && + "trying to emit a call to an immediate function"); + CalleeType = getContext().getCanonicalType(CalleeType); auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType(); CGCallee Callee = OrigCallee; - if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function) && - (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) { + if (SanOpts.has(SanitizerKind::Function) && + (!TargetDecl || !isa<FunctionDecl>(TargetDecl)) && + !isa<FunctionNoProtoType>(PointeeType)) { if (llvm::Constant *PrefixSig = CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { SanitizerScope SanScope(this); - // Remove any (C++17) exception specifications, to allow calling e.g. a - // noexcept function through a non-noexcept pointer. - auto ProtoTy = - getContext().getFunctionTypeWithExceptionSpec(PointeeType, EST_None); - llvm::Constant *FTRTTIConst = - CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true); + auto *TypeHash = getUBSanFunctionTypeHash(PointeeType); + llvm::Type *PrefixSigType = PrefixSig->getType(); llvm::StructType *PrefixStructTy = llvm::StructType::get( CGM.getLLVMContext(), {PrefixSigType, Int32Ty}, /*isPacked=*/true); llvm::Value *CalleePtr = Callee.getFunctionPointer(); + // On 32-bit Arm, the low bit of a function pointer indicates whether + // it's using the Arm or Thumb instruction set. The actual first + // instruction lives at the same address either way, so we must clear + // that low bit before using the function address to find the prefix + // structure. + // + // This applies to both Arm and Thumb target triples, because + // either one could be used in an interworking context where it + // might be passed function pointers of both types. + llvm::Value *AlignedCalleePtr; + if (CGM.getTriple().isARM() || CGM.getTriple().isThumb()) { + llvm::Value *CalleeAddress = + Builder.CreatePtrToInt(CalleePtr, IntPtrTy); + llvm::Value *Mask = llvm::ConstantInt::get(IntPtrTy, ~1); + llvm::Value *AlignedCalleeAddress = + Builder.CreateAnd(CalleeAddress, Mask); + AlignedCalleePtr = + Builder.CreateIntToPtr(AlignedCalleeAddress, CalleePtr->getType()); + } else { + AlignedCalleePtr = CalleePtr; + } + llvm::Value *CalleePrefixStruct = Builder.CreateBitCast( - CalleePtr, llvm::PointerType::getUnqual(PrefixStructTy)); + AlignedCalleePtr, llvm::PointerType::getUnqual(PrefixStructTy)); llvm::Value *CalleeSigPtr = - Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 0); + Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 0); llvm::Value *CalleeSig = Builder.CreateAlignedLoad(PrefixSigType, CalleeSigPtr, getIntAlign()); llvm::Value *CalleeSigMatch = Builder.CreateICmpEQ(CalleeSig, PrefixSig); @@ -5344,19 +5378,17 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee Builder.CreateCondBr(CalleeSigMatch, TypeCheck, Cont); EmitBlock(TypeCheck); - llvm::Value *CalleeRTTIPtr = - Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 1); - llvm::Value *CalleeRTTIEncoded = - Builder.CreateAlignedLoad(Int32Ty, CalleeRTTIPtr, getPointerAlign()); - llvm::Value *CalleeRTTI = - DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded); - llvm::Value *CalleeRTTIMatch = - Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst); + llvm::Value *CalleeTypeHash = Builder.CreateAlignedLoad( + Int32Ty, + Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 1), + getPointerAlign()); + llvm::Value *CalleeTypeHashMatch = + Builder.CreateICmpEQ(CalleeTypeHash, TypeHash); llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()), EmitCheckTypeDescriptor(CalleeType)}; - EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function), + EmitCheck(std::make_pair(CalleeTypeHashMatch, SanitizerKind::Function), SanitizerHandler::FunctionTypeMismatch, StaticData, - {CalleePtr, CalleeRTTI, FTRTTIConst}); + {CalleePtr}); Builder.CreateBr(Cont); EmitBlock(Cont); @@ -5549,6 +5581,48 @@ void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) { cast<llvm::Instruction>(Val)->setMetadata(llvm::LLVMContext::MD_fpmath, Node); } +void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) { + llvm::Type *EltTy = Val->getType()->getScalarType(); + if (!EltTy->isFloatTy()) + return; + + if ((getLangOpts().OpenCL && + !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || + (getLangOpts().HIP && getLangOpts().CUDAIsDevice && + !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { + // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 3ulp + // + // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt + // build option allows an application to specify that single precision + // floating-point divide (x/y and 1/x) and sqrt used in the program + // source are correctly rounded. + // + // TODO: CUDA has a prec-sqrt flag + SetFPAccuracy(Val, 3.0f); + } +} + +void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) { + llvm::Type *EltTy = Val->getType()->getScalarType(); + if (!EltTy->isFloatTy()) + return; + + if ((getLangOpts().OpenCL && + !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || + (getLangOpts().HIP && getLangOpts().CUDAIsDevice && + !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { + // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp + // + // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt + // build option allows an application to specify that single precision + // floating-point divide (x/y and 1/x) and sqrt used in the program + // source are correctly rounded. + // + // TODO: CUDA has a prec-div flag + SetFPAccuracy(Val, 2.5f); + } +} + namespace { struct LValueOrRValue { LValue LV; diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 34e535a78dd6..810b28f25fa1 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -85,8 +85,6 @@ public: void EmitCopy(QualType type, const AggValueSlot &dest, const AggValueSlot &src); - void EmitMoveFromReturnSlot(const Expr *E, RValue Src); - void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, QualType ArrayQTy, Expr *ExprToVisit, ArrayRef<Expr *> Args, Expr *ArrayFiller); @@ -131,7 +129,14 @@ public: EnsureDest(E->getType()); if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) { - CGF.EmitAggregateStore(Result, Dest.getAddress(), + Address StoreDest = Dest.getAddress(); + // The emitted value is guaranteed to have the same size as the + // destination but can have a different type. Just do a bitcast in this + // case to avoid incorrect GEPs. + if (Result->getType() != StoreDest.getType()) + StoreDest = StoreDest.withElementType(Result->getType()); + + CGF.EmitAggregateStore(Result, StoreDest, E->getType().isVolatileQualified()); return; } @@ -525,8 +530,8 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, Emitter.tryEmitForInitializer(ExprToVisit, AS, ArrayQTy)) { auto GV = new llvm::GlobalVariable( CGM.getModule(), C->getType(), - CGM.isTypeConstant(ArrayQTy, /* ExcludeCtorDtor= */ true), - llvm::GlobalValue::PrivateLinkage, C, "constinit", + /* isConstant= */ true, llvm::GlobalValue::PrivateLinkage, C, + "constinit", /* InsertBefore= */ nullptr, llvm::GlobalVariable::NotThreadLocal, CGM.getContext().getTargetAddressSpace(AS)); Emitter.finalize(GV); @@ -746,8 +751,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { // GCC union extension QualType Ty = E->getSubExpr()->getType(); - Address CastPtr = - Builder.CreateElementBitCast(Dest.getAddress(), CGF.ConvertType(Ty)); + Address CastPtr = Dest.getAddress().withElementType(CGF.ConvertType(Ty)); EmitInitializationToLValue(E->getSubExpr(), CGF.MakeAddrLValue(CastPtr, Ty)); break; @@ -762,9 +766,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { LValue SourceLV = CGF.EmitLValue(E->getSubExpr()); Address SourceAddress = - Builder.CreateElementBitCast(SourceLV.getAddress(CGF), CGF.Int8Ty); - Address DestAddress = - Builder.CreateElementBitCast(Dest.getAddress(), CGF.Int8Ty); + SourceLV.getAddress(CGF).withElementType(CGF.Int8Ty); + Address DestAddress = Dest.getAddress().withElementType(CGF.Int8Ty); llvm::Value *SizeVal = llvm::ConstantInt::get( CGF.SizeTy, CGF.getContext().getTypeSizeInChars(E->getType()).getQuantity()); @@ -1651,11 +1654,19 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), ExprToVisit->getType()); // Handle initialization of an array. - if (ExprToVisit->getType()->isArrayType()) { + if (ExprToVisit->getType()->isConstantArrayType()) { auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType()); EmitArrayInit(Dest.getAddress(), AType, ExprToVisit->getType(), ExprToVisit, InitExprs, ArrayFiller); return; + } else if (ExprToVisit->getType()->isVariableArrayType()) { + // A variable array type that has an initializer can only do empty + // initialization. And because this feature is not exposed as an extension + // in C++, we can safely memset the array memory to zero. + assert(InitExprs.size() == 0 && + "you can only use an empty initializer with VLAs"); + CGF.EmitNullInitialization(Dest.getAddress(), ExprToVisit->getType()); + return; } assert(ExprToVisit->getType()->isRecordType() && @@ -2011,8 +2022,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E, // Okay, it seems like a good idea to use an initial memset, emit the call. llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity()); - Address Loc = Slot.getAddress(); - Loc = CGF.Builder.CreateElementBitCast(Loc, CGF.Int8Ty); + Address Loc = Slot.getAddress().withElementType(CGF.Int8Ty); CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false); // Tell the AggExprEmitter that the slot is known zero. @@ -2176,8 +2186,8 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, // we need to use a different call here. We use isVolatile to indicate when // either the source or the destination is volatile. - DestPtr = Builder.CreateElementBitCast(DestPtr, Int8Ty); - SrcPtr = Builder.CreateElementBitCast(SrcPtr, Int8Ty); + DestPtr = DestPtr.withElementType(Int8Ty); + SrcPtr = SrcPtr.withElementType(Int8Ty); // Don't do any of the memmove_collectable tests if GC isn't set. if (CGM.getLangOpts().getGC() == LangOptions::NonGC) { diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index b889a4e05ee1..4d3f3e9603d9 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -33,10 +33,12 @@ struct MemberCallInfo { } static MemberCallInfo -commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, +commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, CallArgList &Args, CallArgList *RtlArgs) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); + assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE)); assert(MD->isInstance() && @@ -44,7 +46,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, // Push the this ptr. const CXXRecordDecl *RD = - CGF.CGM.getCXXABI().getThisArgumentTypeForMethod(MD); + CGF.CGM.getCXXABI().getThisArgumentTypeForMethod(GD); Args.add(RValue::get(This), CGF.getTypes().DeriveThisType(RD, MD)); // If there is an implicit parameter (e.g. VTT), emit it. @@ -110,7 +112,7 @@ RValue CodeGenFunction::EmitCXXDestructorCall( } CallArgList Args; - commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam, + commonEmitCXXMemberOrOperatorCall(*this, Dtor, This, ImplicitParam, ImplicitParamTy, CE, Args, nullptr); return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee, ReturnValueSlot(), Args, nullptr, CE && CE == MustTailCall, @@ -285,7 +287,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); CallArgList Args; commonEmitCXXMemberOrOperatorCall( - *this, Ctor, This.getPointer(*this), /*ImplicitParam=*/nullptr, + *this, {Ctor, Ctor_Complete}, This.getPointer(*this), + /*ImplicitParam=*/nullptr, /*ImplicitParamTy=*/QualType(), CE, Args, nullptr); EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false, @@ -443,9 +446,9 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Emit the 'this' pointer. Address This = Address::invalid(); if (BO->getOpcode() == BO_PtrMemI) - This = EmitPointerWithAlignment(BaseExpr); + This = EmitPointerWithAlignment(BaseExpr, nullptr, nullptr, KnownNonNull); else - This = EmitLValue(BaseExpr).getAddress(*this); + This = EmitLValue(BaseExpr, KnownNonNull).getAddress(*this); EmitTypeCheck(TCK_MemberCall, E->getExprLoc(), This.getPointer(), QualType(MPT->getClass(), 0)); @@ -499,7 +502,7 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF, if (Base->isEmpty()) return; - DestPtr = CGF.Builder.CreateElementBitCast(DestPtr, CGF.Int8Ty); + DestPtr = DestPtr.withElementType(CGF.Int8Ty); const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(Base); CharUnits NVSize = Layout.getNonVirtualSize(); @@ -552,8 +555,7 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF, std::max(Layout.getNonVirtualAlignment(), DestPtr.getAlignment()); NullVariable->setAlignment(Align.getAsAlign()); - Address SrcPtr = - Address(CGF.EmitCastToVoidPtr(NullVariable), CGF.Int8Ty, Align); + Address SrcPtr(NullVariable, CGF.Int8Ty, Align); // Get and call the appropriate llvm.memcpy overload. for (std::pair<CharUnits, CharUnits> Store : Stores) { @@ -765,7 +767,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // wider than that, check whether it's already too big, and if so, // overflow. else if (numElementsWidth > sizeWidth && - numElementsWidth - sizeWidth > count.countLeadingZeros()) + numElementsWidth - sizeWidth > count.countl_zero()) hasAnyOverflow = true; // Okay, compute a count at the right width. @@ -826,8 +828,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // going to have to do a comparison for (2), and this happens to // take care of (1), too. if (numElementsWidth > sizeWidth) { - llvm::APInt threshold(numElementsWidth, 1); - threshold <<= sizeWidth; + llvm::APInt threshold = + llvm::APInt::getOneBitSet(numElementsWidth, sizeWidth); llvm::Value *thresholdV = llvm::ConstantInt::get(numElementsType, threshold); @@ -1074,7 +1076,7 @@ void CodeGenFunction::EmitNewArrayInitializer( if (const ConstantArrayType *CAT = dyn_cast_or_null<ConstantArrayType>( AllocType->getAsArrayTypeUnsafe())) { ElementTy = ConvertTypeForMem(AllocType); - CurPtr = Builder.CreateElementBitCast(CurPtr, ElementTy); + CurPtr = CurPtr.withElementType(ElementTy); InitListElements *= getContext().getConstantArrayElementCount(CAT); } @@ -1131,7 +1133,7 @@ void CodeGenFunction::EmitNewArrayInitializer( } // Switch back to initializing one base element at a time. - CurPtr = Builder.CreateElementBitCast(CurPtr, BeginPtr.getElementType()); + CurPtr = CurPtr.withElementType(BeginPtr.getElementType()); } // If all elements have already been initialized, skip any further @@ -1654,7 +1656,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { CharUnits allocationAlign = allocAlign; if (!E->passAlignment() && allocator->isReplaceableGlobalAllocationFunction()) { - unsigned AllocatorAlign = llvm::PowerOf2Floor(std::min<uint64_t>( + unsigned AllocatorAlign = llvm::bit_floor(std::min<uint64_t>( Target.getNewAlign(), getContext().getTypeSize(allocType))); allocationAlign = std::max( allocationAlign, getContext().toCharUnitsFromBits(AllocatorAlign)); @@ -1713,7 +1715,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { } llvm::Type *elementTy = ConvertTypeForMem(allocType); - Address result = Builder.CreateElementBitCast(allocation, elementTy); + Address result = allocation.withElementType(elementTy); // Passing pointer through launder.invariant.group to avoid propagation of // vptrs information which may be included in previous type. @@ -2071,6 +2073,7 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { Builder.CreateCondBr(IsNull, DeleteEnd, DeleteNotNull); EmitBlock(DeleteNotNull); + Ptr.setKnownNonNull(); QualType DeleteTy = E->getDestroyedType(); @@ -2103,7 +2106,8 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { Ptr = Address(Builder.CreateInBoundsGEP(Ptr.getElementType(), Ptr.getPointer(), GEP, "del.first"), - ConvertTypeForMem(DeleteTy), Ptr.getAlignment()); + ConvertTypeForMem(DeleteTy), Ptr.getAlignment(), + Ptr.isKnownNonNull()); } assert(ConvertTypeForMem(DeleteTy) == Ptr.getElementType()); @@ -2190,13 +2194,12 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, } llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) { - llvm::Type *StdTypeInfoPtrTy = - ConvertType(E->getType())->getPointerTo(); + llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext()); if (E->isTypeOperand()) { llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(E->getTypeOperand(getContext())); - return Builder.CreateBitCast(TypeInfo, StdTypeInfoPtrTy); + return TypeInfo; } // C++ [expr.typeid]p2: @@ -2206,12 +2209,10 @@ llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) { // type) to which the glvalue refers. // If the operand is already most derived object, no need to look up vtable. if (E->isPotentiallyEvaluated() && !E->isMostDerived(getContext())) - return EmitTypeidFromVTable(*this, E->getExprOperand(), - StdTypeInfoPtrTy); + return EmitTypeidFromVTable(*this, E->getExprOperand(), PtrTy); QualType OperandTy = E->getExprOperand()->getType(); - return Builder.CreateBitCast(CGM.GetAddrOfRTTIDescriptor(OperandTy), - StdTypeInfoPtrTy); + return CGM.GetAddrOfRTTIDescriptor(OperandTy); } static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF, @@ -2225,8 +2226,8 @@ static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF, if (!CGF.CGM.getCXXABI().EmitBadCastCall(CGF)) return nullptr; - CGF.EmitBlock(CGF.createBasicBlock("dynamic_cast.end")); - return llvm::UndefValue::get(DestLTy); + CGF.Builder.ClearInsertionPoint(); + return llvm::PoisonValue::get(DestLTy); } llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, @@ -2239,17 +2240,16 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, // C++ [expr.dynamic.cast]p7: // If T is "pointer to cv void," then the result is a pointer to the most // derived object pointed to by v. - const PointerType *DestPTy = DestTy->getAs<PointerType>(); - - bool isDynamicCastToVoid; + bool IsDynamicCastToVoid = DestTy->isVoidPointerType(); QualType SrcRecordTy; QualType DestRecordTy; - if (DestPTy) { - isDynamicCastToVoid = DestPTy->getPointeeType()->isVoidType(); + if (IsDynamicCastToVoid) { + SrcRecordTy = SrcTy->getPointeeType(); + // No DestRecordTy. + } else if (const PointerType *DestPTy = DestTy->getAs<PointerType>()) { SrcRecordTy = SrcTy->castAs<PointerType>()->getPointeeType(); DestRecordTy = DestPTy->getPointeeType(); } else { - isDynamicCastToVoid = false; SrcRecordTy = SrcTy; DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType(); } @@ -2262,18 +2262,30 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, EmitTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(), SrcRecordTy); - if (DCE->isAlwaysNull()) - if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) + if (DCE->isAlwaysNull()) { + if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) { + // Expression emission is expected to retain a valid insertion point. + if (!Builder.GetInsertBlock()) + EmitBlock(createBasicBlock("dynamic_cast.unreachable")); return T; + } + } assert(SrcRecordTy->isRecordType() && "source type must be a record type!"); + // If the destination is effectively final, the cast succeeds if and only + // if the dynamic type of the pointer is exactly the destination type. + bool IsExact = !IsDynamicCastToVoid && + CGM.getCodeGenOpts().OptimizationLevel > 0 && + DestRecordTy->getAsCXXRecordDecl()->isEffectivelyFinal() && + CGM.getCXXABI().shouldEmitExactDynamicCast(DestRecordTy); + // C++ [expr.dynamic.cast]p4: // If the value of v is a null pointer value in the pointer case, the result // is the null pointer value of type T. bool ShouldNullCheckSrcValue = - CGM.getCXXABI().shouldDynamicCastCallBeNullChecked(SrcTy->isPointerType(), - SrcRecordTy); + IsExact || CGM.getCXXABI().shouldDynamicCastCallBeNullChecked( + SrcTy->isPointerType(), SrcRecordTy); llvm::BasicBlock *CastNull = nullptr; llvm::BasicBlock *CastNotNull = nullptr; @@ -2289,30 +2301,38 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, } llvm::Value *Value; - if (isDynamicCastToVoid) { - Value = CGM.getCXXABI().EmitDynamicCastToVoid(*this, ThisAddr, SrcRecordTy, - DestTy); + if (IsDynamicCastToVoid) { + Value = CGM.getCXXABI().emitDynamicCastToVoid(*this, ThisAddr, SrcRecordTy); + } else if (IsExact) { + // If the destination type is effectively final, this pointer points to the + // right type if and only if its vptr has the right value. + Value = CGM.getCXXABI().emitExactDynamicCast( + *this, ThisAddr, SrcRecordTy, DestTy, DestRecordTy, CastEnd, CastNull); } else { assert(DestRecordTy->isRecordType() && "destination type must be a record type!"); - Value = CGM.getCXXABI().EmitDynamicCastCall(*this, ThisAddr, SrcRecordTy, + Value = CGM.getCXXABI().emitDynamicCastCall(*this, ThisAddr, SrcRecordTy, DestTy, DestRecordTy, CastEnd); - CastNotNull = Builder.GetInsertBlock(); } + CastNotNull = Builder.GetInsertBlock(); + llvm::Value *NullValue = nullptr; if (ShouldNullCheckSrcValue) { EmitBranch(CastEnd); EmitBlock(CastNull); + NullValue = EmitDynamicCastToNull(*this, DestTy); + CastNull = Builder.GetInsertBlock(); + EmitBranch(CastEnd); } EmitBlock(CastEnd); - if (ShouldNullCheckSrcValue) { + if (CastNull) { llvm::PHINode *PHI = Builder.CreatePHI(Value->getType(), 2); PHI->addIncoming(Value, CastNotNull); - PHI->addIncoming(llvm::Constant::getNullValue(Value->getType()), CastNull); + PHI->addIncoming(NullValue, CastNull); Value = PHI; } diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 7a14a418c7b6..2dd1a991ec97 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -488,15 +488,14 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_LValueBitCast: { LValue origLV = CGF.EmitLValue(Op); - Address V = origLV.getAddress(CGF); - V = Builder.CreateElementBitCast(V, CGF.ConvertType(DestTy)); + Address V = origLV.getAddress(CGF).withElementType(CGF.ConvertType(DestTy)); return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy), Op->getExprLoc()); } case CK_LValueToRValueBitCast: { LValue SourceLVal = CGF.EmitLValue(Op); - Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(CGF), - CGF.ConvertTypeForMem(DestTy)); + Address Addr = SourceLVal.getAddress(CGF).withElementType( + CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); return EmitLoadOfLValue(DestLV, Op->getExprLoc()); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index c38feaaca35a..353ee56839f3 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -932,12 +932,12 @@ tryEmitGlobalCompoundLiteral(ConstantEmitter &emitter, return ConstantAddress::invalid(); } - auto GV = new llvm::GlobalVariable(CGM.getModule(), C->getType(), - CGM.isTypeConstant(E->getType(), true), - llvm::GlobalValue::InternalLinkage, - C, ".compoundliteral", nullptr, - llvm::GlobalVariable::NotThreadLocal, - CGM.getContext().getTargetAddressSpace(addressSpace)); + auto GV = new llvm::GlobalVariable( + CGM.getModule(), C->getType(), + CGM.isTypeConstant(E->getType(), true, false), + llvm::GlobalValue::InternalLinkage, C, ".compoundliteral", nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(addressSpace)); emitter.finalize(GV); GV->setAlignment(Align.getAsAlign()); CGM.setAddrOfConstantCompoundLiteral(E, GV); @@ -1215,11 +1215,6 @@ public: return Visit(E->getSubExpr(), T); } - llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E, - QualType T) { - return Visit(E->getSubExpr(), T); - } - llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) { auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType()); assert(CAT && "can't emit array init for non-constant-bound array"); @@ -1322,7 +1317,12 @@ public: assert(CGM.getContext().hasSameUnqualifiedType(Ty, Arg->getType()) && "argument to copy ctor is of wrong type"); - return Visit(Arg, Ty); + // Look through the temporary; it's just converting the value to an + // lvalue to pass it to the constructor. + if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(Arg)) + return Visit(MTE->getSubExpr(), Ty); + // Don't try to support arbitrary lvalue-to-rvalue conversions for now. + return nullptr; } return CGM.EmitNullConstant(Ty); @@ -1340,6 +1340,7 @@ public: std::string Str; CGM.getContext().getObjCEncodingForType(E->getEncodedType(), Str); const ConstantArrayType *CAT = CGM.getContext().getAsConstantArrayType(T); + assert(CAT && "String data not of constant array type!"); // Resize the string to the right size, adding zeros at the end, or // truncating as needed. @@ -1570,7 +1571,7 @@ namespace { } void setLocation(llvm::GlobalVariable *placeholder) { - assert(Locations.find(placeholder) == Locations.end() && + assert(!Locations.contains(placeholder) && "already found location for placeholder!"); // Lazily fill in IndexValues with the values from Indices. @@ -1653,29 +1654,22 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { InConstantContext = D.hasConstantInitialization(); QualType destType = D.getType(); + const Expr *E = D.getInit(); + assert(E && "No initializer to emit"); + + if (!destType->isReferenceType()) { + QualType nonMemoryDestType = getNonMemoryType(CGM, destType); + if (llvm::Constant *C = ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), + nonMemoryDestType)) + return emitForMemory(C, destType); + } // Try to emit the initializer. Note that this can allow some things that // are not allowed by tryEmitPrivateForMemory alone. - if (auto value = D.evaluateValue()) { + if (APValue *value = D.evaluateValue()) return tryEmitPrivateForMemory(*value, destType); - } - - // FIXME: Implement C++11 [basic.start.init]p2: if the initializer of a - // reference is a constant expression, and the reference binds to a temporary, - // then constant initialization is performed. ConstExprEmitter will - // incorrectly emit a prvalue constant in this case, and the calling code - // interprets that as the (pointer) value of the reference, rather than the - // desired value of the referee. - if (destType->isReferenceType()) - return nullptr; - const Expr *E = D.getInit(); - assert(E && "No initializer to emit"); - - auto nonMemoryDestType = getNonMemoryType(CGM, destType); - auto C = - ConstExprEmitter(*this).Visit(const_cast<Expr*>(E), nonMemoryDestType); - return (C ? emitForMemory(C, destType) : nullptr); + return nullptr; } llvm::Constant * @@ -1730,7 +1724,7 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, } // Zero-extend bool. - if (C->getType()->isIntegerTy(1)) { + if (C->getType()->isIntegerTy(1) && !destType->isBitIntType()) { llvm::Type *boolTy = CGM.getTypes().ConvertTypeForMem(destType); return llvm::ConstantExpr::getZExt(C, boolTy); } @@ -1742,6 +1736,10 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, QualType destType) { assert(!destType->isVoidType() && "can't emit a void constant"); + if (llvm::Constant *C = + ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), destType)) + return C; + Expr::EvalResult Result; bool Success = false; @@ -1751,13 +1749,10 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, else Success = E->EvaluateAsRValue(Result, CGM.getContext(), InConstantContext); - llvm::Constant *C; if (Success && !Result.HasSideEffects) - C = tryEmitPrivate(Result.Val, destType); - else - C = ConstExprEmitter(*this).Visit(const_cast<Expr*>(E), destType); + return tryEmitPrivate(Result.Val, destType); - return C; + return nullptr; } llvm::Constant *CodeGenModule::getNullPointer(llvm::PointerType *T, QualType QT) { @@ -1832,9 +1827,6 @@ private: return C; llvm::Type *origPtrTy = C->getType(); - unsigned AS = origPtrTy->getPointerAddressSpace(); - llvm::Type *charPtrTy = CGM.Int8Ty->getPointerTo(AS); - C = llvm::ConstantExpr::getBitCast(C, charPtrTy); C = llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset()); C = llvm::ConstantExpr::getPointerCast(C, origPtrTy); return C; @@ -1944,15 +1936,8 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { } // Handle typeid(T). - if (TypeInfoLValue TI = base.dyn_cast<TypeInfoLValue>()) { - llvm::Type *StdTypeInfoPtrTy = - CGM.getTypes().ConvertType(base.getTypeInfoType())->getPointerTo(); - llvm::Constant *TypeInfo = - CGM.GetAddrOfRTTIDescriptor(QualType(TI.getType(), 0)); - if (TypeInfo->getType() != StdTypeInfoPtrTy) - TypeInfo = llvm::ConstantExpr::getBitCast(TypeInfo, StdTypeInfoPtrTy); - return TypeInfo; - } + if (TypeInfoLValue TI = base.dyn_cast<TypeInfoLValue>()) + return CGM.GetAddrOfRTTIDescriptor(QualType(TI.getType(), 0)); // Otherwise, it must be an expression. return Visit(base.get<const Expr*>()); @@ -1986,7 +1971,7 @@ static ConstantLValue emitConstantObjCStringLiteral(const StringLiteral *S, QualType T, CodeGenModule &CGM) { auto C = CGM.getObjCRuntime().GenerateConstantString(S); - return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(T)); + return C.withElementType(CGM.getTypes().ConvertTypeForMem(T)); } ConstantLValue @@ -2189,6 +2174,11 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, llvm::ArrayType *Desired = cast<llvm::ArrayType>(CGM.getTypes().ConvertType(DestType)); + + // Fix the type of incomplete arrays if the initializer isn't empty. + if (DestType->isIncompleteArrayType() && !Elts.empty()) + Desired = llvm::ArrayType::get(Desired->getElementType(), Elts.size()); + return EmitArrayConstant(CGM, Desired, CommonElementType, NumElements, Elts, Filler); } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index a0dcb978b1ac..fe1a59b21f38 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -814,13 +814,21 @@ public: Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); QualType getPromotionType(QualType Ty) { + const auto &Ctx = CGF.getContext(); if (auto *CT = Ty->getAs<ComplexType>()) { QualType ElementType = CT->getElementType(); - if (ElementType.UseExcessPrecision(CGF.getContext())) - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + if (ElementType.UseExcessPrecision(Ctx)) + return Ctx.getComplexType(Ctx.FloatTy); } - if (Ty.UseExcessPrecision(CGF.getContext())) - return CGF.getContext().FloatTy; + + if (Ty.UseExcessPrecision(Ctx)) { + if (auto *VT = Ty->getAs<VectorType>()) { + unsigned NumElements = VT->getNumElements(); + return Ctx.getVectorType(Ctx.FloatTy, NumElements, VT->getVectorKind()); + } + return Ctx.FloatTy; + } + return QualType(); } @@ -1861,6 +1869,23 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { return Visit(E->getInit(0)); } + if (isa<llvm::ScalableVectorType>(VType)) { + if (NumInitElements == 0) { + // C++11 value-initialization for the vector. + return EmitNullValue(E->getType()); + } + + if (NumInitElements == 1) { + Expr *InitVector = E->getInit(0); + + // Initialize from another scalable vector of the same type. + if (InitVector->getType() == E->getType()) + return Visit(InitVector); + } + + llvm_unreachable("Unexpected initialization of a scalable vector!"); + } + unsigned ResElts = cast<llvm::FixedVectorType>(VType)->getNumElements(); // Loop over initializers collecting the Value for each, and remembering @@ -2038,15 +2063,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_LValueBitCast: case CK_ObjCObjectLValueCast: { Address Addr = EmitLValue(E).getAddress(CGF); - Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy)); + Addr = Addr.withElementType(CGF.ConvertTypeForMem(DestTy)); LValue LV = CGF.MakeAddrLValue(Addr, DestTy); return EmitLoadOfLValue(LV, CE->getExprLoc()); } case CK_LValueToRValueBitCast: { LValue SourceLVal = CGF.EmitLValue(E); - Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(CGF), - CGF.ConvertTypeForMem(DestTy)); + Address Addr = SourceLVal.getAddress(CGF).withElementType( + CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); return EmitLoadOfLValue(DestLV, CE->getExprLoc()); @@ -2098,7 +2123,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // Update heapallocsite metadata when there is an explicit pointer cast. if (auto *CI = dyn_cast<llvm::CallBase>(Src)) { - if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) { + if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE) && + !isa<CastExpr>(E)) { QualType PointeeType = DestTy->getPointeeType(); if (!PointeeType.isNull()) CGF.getDebugInfo()->addHeapAllocSiteMetadata(CI, PointeeType, @@ -2126,7 +2152,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); llvm::Value *Result = Builder.CreateInsertVector( - DstTy, UndefVec, Src, Zero, "castScalableSve"); + DstTy, UndefVec, Src, Zero, "cast.scalable"); if (NeedsBitCast) Result = Builder.CreateBitCast(Result, OrigType); return Result; @@ -2150,7 +2176,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } if (ScalableSrc->getElementType() == FixedDst->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); - return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve"); + return Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed"); } } } @@ -2168,8 +2194,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { Address Addr = CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-value"); LValue LV = CGF.MakeAddrLValue(Addr, E->getType()); CGF.EmitStoreOfScalar(Src, LV); - Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy), - "castFixedSve"); + Addr = Addr.withElementType(CGF.ConvertTypeForMem(DestTy)); LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); return EmitLoadOfLValue(DestLV, CE->getExprLoc()); @@ -2681,15 +2706,13 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (type->isFunctionType()) { llvm::Value *amt = Builder.getInt32(amount); - value = CGF.EmitCastToVoidPtr(value); if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(CGF.Int8Ty, value, amt, "incdec.funcptr"); else - value = CGF.EmitCheckedInBoundsGEP(CGF.Int8Ty, value, amt, - /*SignedIndices=*/false, - isSubtraction, E->getExprLoc(), - "incdec.funcptr"); - value = Builder.CreateBitCast(value, input->getType()); + value = + CGF.EmitCheckedInBoundsGEP(CGF.Int8Ty, value, amt, + /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "incdec.funcptr"); // For everything else, we can just do a simple increment. } else { @@ -2800,7 +2823,6 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // Objective-C pointer types. } else { const ObjCObjectPointerType *OPT = type->castAs<ObjCObjectPointerType>(); - value = CGF.EmitCastToVoidPtr(value); CharUnits size = CGF.getContext().getTypeSizeInChars(OPT->getObjectType()); if (!isInc) size = -size; @@ -3456,21 +3478,7 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { llvm::Value *Val; CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); - if ((CGF.getLangOpts().OpenCL && - !CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || - (CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice && - !CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { - // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp - // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt - // build option allows an application to specify that single precision - // floating-point divide (x/y and 1/x) and sqrt used in the program - // source are correctly rounded. - llvm::Type *ValTy = Val->getType(); - if (ValTy->isFloatTy() || - (isa<llvm::VectorType>(ValTy) && - cast<llvm::VectorType>(ValTy)->getElementType()->isFloatTy())) - CGF.SetFPAccuracy(Val, 2.5); - } + CGF.SetDivFPAccuracy(Val); return Val; } else if (Ops.isFixedPointOp()) @@ -3711,11 +3719,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // Explicitly handle GNU void* and function pointer arithmetic extensions. The // GNU void* casts amount to no-ops since our void* type is i8*, but this is // future proof. - if (elementType->isVoidType() || elementType->isFunctionType()) { - Value *result = CGF.EmitCastToVoidPtr(pointer); - result = CGF.Builder.CreateGEP(CGF.Int8Ty, result, index, "add.ptr"); - return CGF.Builder.CreateBitCast(result, pointer->getType()); - } + if (elementType->isVoidType() || elementType->isFunctionType()) + return CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr"); llvm::Type *elemTy = CGF.ConvertTypeForMem(elementType); if (CGF.getLangOpts().isSignedOverflowDefined()) @@ -3734,8 +3739,6 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, static Value* buildFMulAdd(llvm::Instruction *MulOp, Value *Addend, const CodeGenFunction &CGF, CGBuilderTy &Builder, bool negMul, bool negAdd) { - assert(!(negMul && negAdd) && "Only one of negMul and negAdd should be set."); - Value *MulOp0 = MulOp->getOperand(0); Value *MulOp1 = MulOp->getOperand(1); if (negMul) @@ -3780,31 +3783,70 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, if (!op.FPFeatures.allowFPContractWithinStatement()) return nullptr; + Value *LHS = op.LHS; + Value *RHS = op.RHS; + + // Peek through fneg to look for fmul. Make sure fneg has no users, and that + // it is the only use of its operand. + bool NegLHS = false; + if (auto *LHSUnOp = dyn_cast<llvm::UnaryOperator>(LHS)) { + if (LHSUnOp->getOpcode() == llvm::Instruction::FNeg && + LHSUnOp->use_empty() && LHSUnOp->getOperand(0)->hasOneUse()) { + LHS = LHSUnOp->getOperand(0); + NegLHS = true; + } + } + + bool NegRHS = false; + if (auto *RHSUnOp = dyn_cast<llvm::UnaryOperator>(RHS)) { + if (RHSUnOp->getOpcode() == llvm::Instruction::FNeg && + RHSUnOp->use_empty() && RHSUnOp->getOperand(0)->hasOneUse()) { + RHS = RHSUnOp->getOperand(0); + NegRHS = true; + } + } + // We have a potentially fusable op. Look for a mul on one of the operands. // Also, make sure that the mul result isn't used directly. In that case, // there's no point creating a muladd operation. - if (auto *LHSBinOp = dyn_cast<llvm::BinaryOperator>(op.LHS)) { + if (auto *LHSBinOp = dyn_cast<llvm::BinaryOperator>(LHS)) { if (LHSBinOp->getOpcode() == llvm::Instruction::FMul && - LHSBinOp->use_empty()) - return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub); + (LHSBinOp->use_empty() || NegLHS)) { + // If we looked through fneg, erase it. + if (NegLHS) + cast<llvm::Instruction>(op.LHS)->eraseFromParent(); + return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, NegLHS, isSub); + } } - if (auto *RHSBinOp = dyn_cast<llvm::BinaryOperator>(op.RHS)) { + if (auto *RHSBinOp = dyn_cast<llvm::BinaryOperator>(RHS)) { if (RHSBinOp->getOpcode() == llvm::Instruction::FMul && - RHSBinOp->use_empty()) - return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false); + (RHSBinOp->use_empty() || NegRHS)) { + // If we looked through fneg, erase it. + if (NegRHS) + cast<llvm::Instruction>(op.RHS)->eraseFromParent(); + return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub ^ NegRHS, false); + } } - if (auto *LHSBinOp = dyn_cast<llvm::CallBase>(op.LHS)) { + if (auto *LHSBinOp = dyn_cast<llvm::CallBase>(LHS)) { if (LHSBinOp->getIntrinsicID() == llvm::Intrinsic::experimental_constrained_fmul && - LHSBinOp->use_empty()) - return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub); + (LHSBinOp->use_empty() || NegLHS)) { + // If we looked through fneg, erase it. + if (NegLHS) + cast<llvm::Instruction>(op.LHS)->eraseFromParent(); + return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, NegLHS, isSub); + } } - if (auto *RHSBinOp = dyn_cast<llvm::CallBase>(op.RHS)) { + if (auto *RHSBinOp = dyn_cast<llvm::CallBase>(RHS)) { if (RHSBinOp->getIntrinsicID() == llvm::Intrinsic::experimental_constrained_fmul && - RHSBinOp->use_empty()) - return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false); + (RHSBinOp->use_empty() || NegRHS)) { + // If we looked through fneg, erase it. + if (NegRHS) + cast<llvm::Instruction>(op.RHS)->eraseFromParent(); + return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub ^ NegRHS, false); + } } return nullptr; @@ -5098,7 +5140,7 @@ LValue CodeGenFunction::EmitObjCIsaExpr(const ObjCIsaExpr *E) { } // Cast the address to Class*. - Addr = Builder.CreateElementBitCast(Addr, ConvertType(E->getType())); + Addr = Addr.withElementType(ConvertType(E->getType())); return MakeAddrLValue(Addr, E->getType()); } diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index c39e0cc75f2d..75fb06de9384 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -125,7 +125,7 @@ packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) { } } -bool containsNonScalarVarargs(CodeGenFunction *CGF, CallArgList Args) { +bool containsNonScalarVarargs(CodeGenFunction *CGF, const CallArgList &Args) { return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) { return !A.getRValue(*CGF).isScalar(); }); @@ -189,7 +189,7 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { /* ParamsToSkip = */ 0); SmallVector<llvm::Value *, 8> Args; - for (auto A : CallArgs) { + for (const auto &A : CallArgs) { // We don't know how to emit non-scalar varargs. if (!A.getRValue(*this).isScalar()) { CGM.ErrorUnsupported(E, "non-scalar arg to printf"); @@ -202,7 +202,10 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); - auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args); + + bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal == + clang::TargetOptions::AMDGPUPrintfKind::Buffered); + auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered); Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); return RValue::get(Printf); } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 5882f491d597..e9fa273f21cc 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -175,7 +175,7 @@ void CGHLSLRuntime::finishCodeGen() { for (auto &Buf : Buffers) { layoutBuffer(Buf, DL); GlobalVariable *GV = replaceBuffer(Buf); - M.getGlobalList().push_back(GV); + M.insertGlobalVariable(GV); llvm::hlsl::ResourceClass RC = Buf.IsCBuffer ? llvm::hlsl::ResourceClass::CBuffer : llvm::hlsl::ResourceClass::SRV; diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp index 0abf39ad1f28..3d2b1b8b2f78 100644 --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -323,11 +323,11 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, template <size_t N, size_t... Ints> static std::array<Address, N> getParamAddrs(std::index_sequence<Ints...> IntSeq, std::array<CharUnits, N> Alignments, - FunctionArgList Args, + const FunctionArgList &Args, CodeGenFunction *CGF) { return std::array<Address, N>{ {Address(CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[Ints])), - CGF->VoidPtrTy, Alignments[Ints])...}}; + CGF->VoidPtrTy, Alignments[Ints], KnownNonNull)...}}; } // Template classes that are used as bases for classes that emit special @@ -365,9 +365,8 @@ template <class Derived> struct GenFuncBase { llvm::ConstantInt::get(NumElts->getType(), BaseEltSize); llvm::Value *SizeInBytes = CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts); - Address BC = CGF.Builder.CreateElementBitCast(DstAddr, CGF.CGM.Int8Ty); - llvm::Value *DstArrayEnd = - CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BC.getPointer(), SizeInBytes); + llvm::Value *DstArrayEnd = CGF.Builder.CreateInBoundsGEP( + CGF.Int8Ty, DstAddr.getPointer(), SizeInBytes); DstArrayEnd = CGF.Builder.CreateBitCast( DstArrayEnd, CGF.CGM.Int8PtrPtrTy, "dstarray.end"); llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock(); @@ -426,9 +425,9 @@ template <class Derived> struct GenFuncBase { assert(Addr.isValid() && "invalid address"); if (Offset.getQuantity() == 0) return Addr; - Addr = CGF->Builder.CreateElementBitCast(Addr, CGF->CGM.Int8Ty); + Addr = Addr.withElementType(CGF->CGM.Int8Ty); Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity()); - return CGF->Builder.CreateElementBitCast(Addr, CGF->CGM.Int8PtrTy); + return Addr.withElementType(CGF->CGM.Int8PtrTy); } Address getAddrWithOffset(Address Addr, CharUnits StructFieldOffset, @@ -491,8 +490,7 @@ template <class Derived> struct GenFuncBase { for (unsigned I = 0; I < N; ++I) { Alignments[I] = Addrs[I].getAlignment(); - Ptrs[I] = CallerCGF.Builder.CreateElementBitCast( - Addrs[I], CallerCGF.CGM.Int8PtrTy).getPointer(); + Ptrs[I] = Addrs[I].getPointer(); } if (llvm::Function *F = @@ -522,20 +520,19 @@ struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>, Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], this->Start); // Emit memcpy. - if (Size.getQuantity() >= 16 || !llvm::isPowerOf2_32(Size.getQuantity())) { + if (Size.getQuantity() >= 16 || + !llvm::has_single_bit<uint32_t>(Size.getQuantity())) { llvm::Value *SizeVal = llvm::ConstantInt::get(this->CGF->SizeTy, Size.getQuantity()); - DstAddr = - this->CGF->Builder.CreateElementBitCast(DstAddr, this->CGF->Int8Ty); - SrcAddr = - this->CGF->Builder.CreateElementBitCast(SrcAddr, this->CGF->Int8Ty); + DstAddr = DstAddr.withElementType(this->CGF->Int8Ty); + SrcAddr = SrcAddr.withElementType(this->CGF->Int8Ty); this->CGF->Builder.CreateMemCpy(DstAddr, SrcAddr, SizeVal, false); } else { llvm::Type *Ty = llvm::Type::getIntNTy( this->CGF->getLLVMContext(), Size.getQuantity() * this->CGF->getContext().getCharWidth()); - DstAddr = this->CGF->Builder.CreateElementBitCast(DstAddr, Ty); - SrcAddr = this->CGF->Builder.CreateElementBitCast(SrcAddr, Ty); + DstAddr = DstAddr.withElementType(Ty); + SrcAddr = SrcAddr.withElementType(Ty); llvm::Value *SrcVal = this->CGF->Builder.CreateLoad(SrcAddr, false); this->CGF->Builder.CreateStore(SrcVal, DstAddr, false); } @@ -555,19 +552,17 @@ struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>, QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0); llvm::Type *Ty = this->CGF->ConvertType(RT); Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset); - LValue DstBase = this->CGF->MakeAddrLValue( - this->CGF->Builder.CreateElementBitCast(DstAddr, Ty), FT); + LValue DstBase = + this->CGF->MakeAddrLValue(DstAddr.withElementType(Ty), FT); DstLV = this->CGF->EmitLValueForField(DstBase, FD); Address SrcAddr = this->getAddrWithOffset(Addrs[SrcIdx], Offset); - LValue SrcBase = this->CGF->MakeAddrLValue( - this->CGF->Builder.CreateElementBitCast(SrcAddr, Ty), FT); + LValue SrcBase = + this->CGF->MakeAddrLValue(SrcAddr.withElementType(Ty), FT); SrcLV = this->CGF->EmitLValueForField(SrcBase, FD); } else { llvm::Type *Ty = this->CGF->ConvertTypeForMem(FT); - Address DstAddr = - this->CGF->Builder.CreateElementBitCast(Addrs[DstIdx], Ty); - Address SrcAddr = - this->CGF->Builder.CreateElementBitCast(Addrs[SrcIdx], Ty); + Address DstAddr = Addrs[DstIdx].withElementType(Ty); + Address SrcAddr = Addrs[SrcIdx].withElementType(Ty); DstLV = this->CGF->MakeAddrLValue(DstAddr, FT); SrcLV = this->CGF->MakeAddrLValue(SrcAddr, FT); } @@ -665,7 +660,7 @@ struct GenDefaultInitialize llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity()); Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); - Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty); + Address Loc = DstAddr.withElementType(CGF->Int8Ty); CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal, IsVolatile); } @@ -817,8 +812,7 @@ void CodeGenFunction::destroyNonTrivialCStruct(CodeGenFunction &CGF, // such structure. void CodeGenFunction::defaultInitNonTrivialCStructVar(LValue Dst) { GenDefaultInitialize Gen(getContext()); - Address DstPtr = - Builder.CreateElementBitCast(Dst.getAddress(*this), CGM.Int8PtrTy); + Address DstPtr = Dst.getAddress(*this).withElementType(CGM.Int8PtrTy); Gen.setCGF(this); QualType QT = Dst.getType(); QT = Dst.isVolatile() ? QT.withVolatile() : QT; @@ -831,7 +825,7 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, std::array<Address, N> Addrs) { auto SetArtificialLoc = ApplyDebugLocation::CreateArtificial(CGF); for (unsigned I = 0; I < N; ++I) - Addrs[I] = CGF.Builder.CreateElementBitCast(Addrs[I], CGF.CGM.Int8PtrTy); + Addrs[I] = Addrs[I].withElementType(CGF.CGM.Int8PtrTy); QT = IsVolatile ? QT.withVolatile() : QT; Gen.callFunc(FuncName, QT, Addrs, CGF); } diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 7df2088a81d7..46c37eaea82b 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -140,7 +140,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy); llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc()); cast<llvm::LoadInst>(Ptr)->setMetadata( - CGM.getModule().getMDKindID("invariant.load"), + llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(getLLVMContext(), std::nullopt)); return Builder.CreateBitCast(Ptr, ConvertType(E->getType())); } @@ -1190,7 +1190,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // Perform an atomic load. This does not impose ordering constraints. Address ivarAddr = LV.getAddress(*this); - ivarAddr = Builder.CreateElementBitCast(ivarAddr, bitcastType); + ivarAddr = ivarAddr.withElementType(bitcastType); llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load"); load->setAtomic(llvm::AtomicOrdering::Unordered); @@ -1204,8 +1204,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, bitcastType = llvm::Type::getIntNTy(getLLVMContext(), retTySize); ivarVal = Builder.CreateTrunc(load, bitcastType); } - Builder.CreateStore(ivarVal, - Builder.CreateElementBitCast(ReturnValue, bitcastType)); + Builder.CreateStore(ivarVal, ReturnValue.withElementType(bitcastType)); // Make sure we don't do an autorelease. AutoreleaseResult = false; @@ -1485,15 +1484,13 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, // Currently, all atomic accesses have to be through integer // types, so there's no point in trying to pick a prettier type. - llvm::Type *bitcastType = - llvm::Type::getIntNTy(getLLVMContext(), - getContext().toBits(strategy.getIvarSize())); + llvm::Type *castType = llvm::Type::getIntNTy( + getLLVMContext(), getContext().toBits(strategy.getIvarSize())); // Cast both arguments to the chosen operation type. - argAddr = Builder.CreateElementBitCast(argAddr, bitcastType); - ivarAddr = Builder.CreateElementBitCast(ivarAddr, bitcastType); + argAddr = argAddr.withElementType(castType); + ivarAddr = ivarAddr.withElementType(castType); - // This bitcast load is likely to cause some nasty IR. llvm::Value *load = Builder.CreateLoad(argAddr); // Perform an atomic store. There are no memory ordering requirements. @@ -2205,18 +2202,7 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr, if (!fn) fn = getARCIntrinsic(IntID, CGF.CGM); - // Cast the argument to 'id*'. - llvm::Type *origType = addr.getElementType(); - addr = CGF.Builder.CreateElementBitCast(addr, CGF.Int8PtrTy); - - // Call the function. - llvm::Value *result = CGF.EmitNounwindRuntimeCall(fn, addr.getPointer()); - - // Cast the result back to a dereference of the original type. - if (origType != CGF.Int8PtrTy) - result = CGF.Builder.CreateBitCast(result, origType); - - return result; + return CGF.EmitNounwindRuntimeCall(fn, addr.getPointer()); } /// Perform an operation having the following signature: @@ -2661,9 +2647,6 @@ void CodeGenFunction::EmitARCDestroyWeak(Address addr) { if (!fn) fn = getARCIntrinsic(llvm::Intrinsic::objc_destroyWeak, CGM); - // Cast the argument to 'id*'. - addr = Builder.CreateElementBitCast(addr, Int8PtrTy); - EmitNounwindRuntimeCall(fn, addr.getPointer()); } diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index c7b193e34ea0..09b6c3ac6adf 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -46,17 +46,13 @@ namespace { /// types and the function declaration into a module if they're not used, and /// avoids constructing the type more than once if it's used more than once. class LazyRuntimeFunction { - CodeGenModule *CGM; - llvm::FunctionType *FTy; - const char *FunctionName; - llvm::FunctionCallee Function; + CodeGenModule *CGM = nullptr; + llvm::FunctionType *FTy = nullptr; + const char *FunctionName = nullptr; + llvm::FunctionCallee Function = nullptr; public: - /// Constructor leaves this class uninitialized, because it is intended to - /// be used as a field in another class and not all of the types that are - /// used as arguments will necessarily be available at construction time. - LazyRuntimeFunction() - : CGM(nullptr), FunctionName(nullptr), Function(nullptr) {} + LazyRuntimeFunction() = default; /// Initialises the lazy function with the name, return type, and the types /// of the arguments. diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index c739d3742f80..32f4f411347a 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -3809,15 +3809,9 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID, ivarList.fillPlaceholderWithInt(countSlot, ObjCTypes.IntTy, count); llvm::GlobalVariable *GV; - if (ForClass) - GV = - CreateMetadataVar("OBJC_CLASS_VARIABLES_" + ID->getName(), ivarList, - "__OBJC,__class_vars,regular,no_dead_strip", - CGM.getPointerAlign(), true); - else - GV = CreateMetadataVar("OBJC_INSTANCE_VARIABLES_" + ID->getName(), ivarList, - "__OBJC,__instance_vars,regular,no_dead_strip", - CGM.getPointerAlign(), true); + GV = CreateMetadataVar("OBJC_INSTANCE_VARIABLES_" + ID->getName(), ivarList, + "__OBJC,__instance_vars,regular,no_dead_strip", + CGM.getPointerAlign(), true); return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListPtrTy); } @@ -5023,11 +5017,8 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF, } void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, - Address DestPtr, - Address SrcPtr, + Address DestPtr, Address SrcPtr, llvm::Value *size) { - SrcPtr = CGF.Builder.CreateElementBitCast(SrcPtr, CGF.Int8Ty); - DestPtr = CGF.Builder.CreateElementBitCast(DestPtr, CGF.Int8Ty); llvm::Value *args[] = { DestPtr.getPointer(), SrcPtr.getPointer(), size }; CGF.EmitNounwindRuntimeCall(ObjCTypes.GcMemmoveCollectableFn(), args); } @@ -5293,12 +5284,7 @@ llvm::Constant *CGObjCCommonMac::GetClassName(StringRef RuntimeName) { } llvm::Function *CGObjCCommonMac::GetMethodDefinition(const ObjCMethodDecl *MD) { - llvm::DenseMap<const ObjCMethodDecl*, llvm::Function*>::iterator - I = MethodDefinitions.find(MD); - if (I != MethodDefinitions.end()) - return I->second; - - return nullptr; + return MethodDefinitions.lookup(MD); } /// GetIvarLayoutName - Returns a unique constant for the given @@ -7229,7 +7215,7 @@ CGObjCNonFragileABIMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, CGF.getSizeAlign(), "ivar"); if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) cast<llvm::LoadInst>(IvarOffsetValue) - ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), + ->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(VMContext, std::nullopt)); } @@ -7431,7 +7417,7 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, GV->eraseFromParent(); } GV = NewGV; - CGM.getModule().getGlobalList().push_back(GV); + CGM.getModule().insertGlobalVariable(GV); } assert(GV->getLinkage() == L); @@ -7629,7 +7615,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF, Address Addr = EmitSelectorAddr(Sel); llvm::LoadInst* LI = CGF.Builder.CreateLoad(Addr); - LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), + LI->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(VMContext, std::nullopt)); return LI; } @@ -7701,12 +7687,8 @@ void CGObjCNonFragileABIMac::EmitObjCStrongCastAssign( } void CGObjCNonFragileABIMac::EmitGCMemmoveCollectable( - CodeGen::CodeGenFunction &CGF, - Address DestPtr, - Address SrcPtr, - llvm::Value *Size) { - SrcPtr = CGF.Builder.CreateElementBitCast(SrcPtr, CGF.Int8Ty); - DestPtr = CGF.Builder.CreateElementBitCast(DestPtr, CGF.Int8Ty); + CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, + llvm::Value *Size) { llvm::Value *args[] = { DestPtr.getPointer(), SrcPtr.getPointer(), Size }; CGF.EmitNounwindRuntimeCall(ObjCTypes.GcMemmoveCollectableFn(), args); } diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp index 9097a8cf7009..634a3d5a938d 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -107,10 +107,10 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, CGF.CGM.getContext().toBits(StorageSize), CharUnits::fromQuantity(0))); - Address Addr = Address(V, CGF.Int8Ty, Alignment); - Addr = CGF.Builder.CreateElementBitCast(Addr, - llvm::Type::getIntNTy(CGF.getLLVMContext(), - Info->StorageSize)); + Address Addr = + Address(V, llvm::Type::getIntNTy(CGF.getLLVMContext(), Info->StorageSize), + Alignment); + return LValue::MakeBitfield(Addr, *Info, IvarTy, LValueBaseInfo(AlignmentSource::Decl), TBAAAccessInfo()); @@ -364,14 +364,14 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, CallArgList &callArgs) { unsigned ProgramAS = CGM.getDataLayout().getProgramAddressSpace(); + llvm::PointerType *signatureType = + llvm::PointerType::get(CGM.getLLVMContext(), ProgramAS); + // If there's a method, use information from that. if (method) { const CGFunctionInfo &signature = CGM.getTypes().arrangeObjCMessageSendSignature(method, callArgs[0].Ty); - llvm::PointerType *signatureType = - CGM.getTypes().GetFunctionType(signature)->getPointerTo(ProgramAS); - const CGFunctionInfo &signatureForCall = CGM.getTypes().arrangeCall(signature, callArgs); @@ -382,9 +382,6 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, const CGFunctionInfo &argsInfo = CGM.getTypes().arrangeUnprototypedObjCMessageSend(resultType, callArgs); - // Derive the signature to call from that. - llvm::PointerType *signatureType = - CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo(ProgramAS); return MessageSendInfo(argsInfo, signatureType); } diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp index ab8de7ecf50c..dc2330a29976 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp @@ -31,8 +31,11 @@ void CGOpenCLRuntime::EmitWorkGroupLocalVarDecl(CodeGenFunction &CGF, } llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { - assert(T->isOpenCLSpecificType() && - "Not an OpenCL specific type!"); + assert(T->isOpenCLSpecificType() && "Not an OpenCL specific type!"); + + // Check if the target has a specific translation for this type first. + if (llvm::Type *TransTy = CGM.getTargetCodeGenInfo().getOpenCLType(CGM, T)) + return TransTy; switch (cast<BuiltinType>(T)->getKind()) { default: @@ -75,6 +78,9 @@ llvm::PointerType *CGOpenCLRuntime::getPointerType(const Type *T, } llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) { + if (llvm::Type *PipeTy = CGM.getTargetCodeGenInfo().getOpenCLType(CGM, T)) + return PipeTy; + if (T->isReadOnly()) return getPipeType(T, "opencl.pipe_ro_t", PipeROTy); else @@ -91,12 +97,18 @@ llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T, StringRef Name, return PipeTy; } -llvm::PointerType *CGOpenCLRuntime::getSamplerType(const Type *T) { - if (!SamplerTy) - SamplerTy = llvm::PointerType::get(llvm::StructType::create( - CGM.getLLVMContext(), "opencl.sampler_t"), - CGM.getContext().getTargetAddressSpace( - CGM.getContext().getOpenCLTypeAddrSpace(T))); +llvm::Type *CGOpenCLRuntime::getSamplerType(const Type *T) { + if (SamplerTy) + return SamplerTy; + + if (llvm::Type *TransTy = CGM.getTargetCodeGenInfo().getOpenCLType( + CGM, CGM.getContext().OCLSamplerTy.getTypePtr())) + SamplerTy = TransTy; + else + SamplerTy = llvm::PointerType::get( + llvm::StructType::create(CGM.getLLVMContext(), "opencl.sampler_t"), + CGM.getContext().getTargetAddressSpace( + CGM.getContext().getOpenCLTypeAddrSpace(T))); return SamplerTy; } @@ -149,14 +161,13 @@ static const BlockExpr *getBlockExpr(const Expr *E) { void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block, llvm::Type *BlockTy) { - assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() && - "Block expression emitted twice"); + assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice"); assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function"); assert(Block->getType()->isPointerTy() && "Invalid block literal type"); EnqueuedBlockMap[E].InvokeFunc = InvokeF; EnqueuedBlockMap[E].BlockArg = Block; EnqueuedBlockMap[E].BlockTy = BlockTy; - EnqueuedBlockMap[E].Kernel = nullptr; + EnqueuedBlockMap[E].KernelHandle = nullptr; } llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { @@ -171,11 +182,10 @@ CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { // to get the block literal. const BlockExpr *Block = getBlockExpr(E); - assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && - "Block expression not emitted"); + assert(EnqueuedBlockMap.contains(Block) && "Block expression not emitted"); // Do not emit the block wrapper again if it has been emitted. - if (EnqueuedBlockMap[Block].Kernel) { + if (EnqueuedBlockMap[Block].KernelHandle) { return EnqueuedBlockMap[Block]; } @@ -183,9 +193,6 @@ CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { CGF, EnqueuedBlockMap[Block].InvokeFunc, EnqueuedBlockMap[Block].BlockTy); // The common part of the post-processing of the kernel goes here. - F->addFnAttr(llvm::Attribute::NoUnwind); - F->setCallingConv( - CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel)); - EnqueuedBlockMap[Block].Kernel = F; + EnqueuedBlockMap[Block].KernelHandle = F; return EnqueuedBlockMap[Block]; } diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h index 900644b3b93b..df8084d6008b 100644 --- a/clang/lib/CodeGen/CGOpenCLRuntime.h +++ b/clang/lib/CodeGen/CGOpenCLRuntime.h @@ -38,13 +38,13 @@ protected: CodeGenModule &CGM; llvm::Type *PipeROTy; llvm::Type *PipeWOTy; - llvm::PointerType *SamplerTy; + llvm::Type *SamplerTy; llvm::StringMap<llvm::PointerType *> CachedTys; /// Structure for enqueued block information. struct EnqueuedBlockInfo { llvm::Function *InvokeFunc; /// Block invoke function. - llvm::Function *Kernel; /// Enqueued block kernel. + llvm::Value *KernelHandle; /// Enqueued block kernel reference. llvm::Value *BlockArg; /// The first argument to enqueued block kernel. llvm::Type *BlockTy; /// Type of the block argument. }; @@ -70,7 +70,7 @@ public: virtual llvm::Type *getPipeType(const PipeType *T); - llvm::PointerType *getSamplerType(const Type *T); + llvm::Type *getSamplerType(const Type *T); // Returns a value which indicates the size in bytes of the pipe // element. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 2284aa1d1eb6..a52ec8909b12 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -498,11 +498,6 @@ enum OpenMPOffloadingRequiresDirFlags : int64_t { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) }; -enum OpenMPOffloadingReservedDeviceIDs { - /// Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, -}; } // anonymous namespace /// Describes ident structure that describes a source location. @@ -689,8 +684,7 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); llvm::Value *SrcBegin = nullptr; if (DRD) @@ -911,8 +905,8 @@ void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, QualType PrivateType = getPrivateType(N); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); if (needCleanups(N)) { - PrivateAddr = CGF.Builder.CreateElementBitCast( - PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + PrivateAddr = + PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); } } @@ -931,8 +925,7 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), - CGF.ConvertTypeForMem(ElTy)), + BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)), BaseLV.getType(), BaseLV.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); } @@ -963,7 +956,7 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( Addr, OriginalBaseAddress.getType()); - return OriginalBaseAddress.withPointer(Addr); + return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); } static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { @@ -1059,16 +1052,15 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { + : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, - hasRequiresUnifiedSharedMemory(), + llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, + isGPU(), hasRequiresUnifiedSharedMemory(), CGM.getLangOpts().OpenMPOffloadMandatory); - // Initialize Types used in OpenMPIRBuilder from OMPKinds.def - OMPBuilder.initialize(); + OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice + ? CGM.getLangOpts().OMPHostIRFile + : StringRef{}); OMPBuilder.setConfig(Config); - OffloadEntriesInfoManager.setConfig(Config); - loadOffloadInfoMetadata(); } void CGOpenMPRuntime::clear() { @@ -1262,20 +1254,38 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); } +std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { + std::string Suffix = getName({"omp_outlined"}); + return (Name + Suffix).str(); +} + +std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { + return getOutlinedHelperName(CGF.CurFn->getName()); +} + +std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { + std::string Suffix = getName({"omp", "reduction", "reduction_func"}); + return (Name + Suffix).str(); +} + llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), + CodeGen); } llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), + CodeGen); } llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( @@ -1368,8 +1378,8 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, unsigned Flags, bool EmitLoc) { uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; - if ((!EmitLoc && - CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) || + if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == + llvm::codegenoptions::NoDebugInfo) || Loc.isInvalid()) { SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); } else { @@ -1589,71 +1599,94 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { return CGM.CreateRuntimeFunction(FnTy, Name); } -/// Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static llvm::TargetRegionEntryInfo -getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - StringRef ParentName = "") { - SourceManager &SM = C.getSourceManager(); +llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind +convertDeviceClause(const VarDecl *VD) { + std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(VD); + if (!DevTy) + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) + switch ((int)*DevTy) { // Avoid -Wcovered-switch-default + case OMPDeclareTargetDeclAttr::DT_Host: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; + break; + case OMPDeclareTargetDeclAttr::DT_NoHost: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; + break; + case OMPDeclareTargetDeclAttr::DT_Any: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; + break; + } +} - assert(Loc.isValid() && "Source location is expected to be always valid."); +llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind +convertCaptureClause(const VarDecl *VD) { + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!MapType) + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; + switch ((int)*MapType) { // Avoid -Wcovered-switch-default + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; + break; + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; + break; + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; + break; + } +} - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); +static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( + CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, + SourceLocation BeginLoc, llvm::StringRef ParentName = "") { - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { - PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - SM.getDiagnostics().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - } + auto FileInfoCallBack = [&]() { + SourceManager &SM = CGM.getContext().getSourceManager(); + PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { + PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); + } + + return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); + }; - return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(), - PLoc.getLine()); + return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); } Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { - if (CGM.getLangOpts().OpenMPSimd) - return Address::invalid(); - std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || - ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - HasRequiresUnifiedSharedMemory))) { - SmallString<64> PtrName; - { - llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)); - if (!VD->isExternallyVisible()) { - auto EntryInfo = getTargetEntryUniqueInfo( - CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc()); - OS << llvm::format("_%x", EntryInfo.FileID); - } - OS << "_decl_tgt_ref_ptr"; - } - llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); - QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); - llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); - if (!Ptr) { - Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName); + auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; - auto *GV = cast<llvm::GlobalVariable>(Ptr); - GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + auto LinkageForVariable = [&VD, this]() { + return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + }; - if (!CGM.getLangOpts().OpenMPIsDevice) - GV->setInitializer(CGM.GetAddrOfGlobal(VD)); - registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); - } - return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); - } - return Address::invalid(); + std::vector<llvm::GlobalVariable *> GeneratedRefs; + + llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( + CGM.getContext().getPointerType(VD->getType())); + llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( + convertCaptureClause(VD), convertDeviceClause(VD), + VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, + VD->isExternallyVisible(), + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, + VD->getCanonicalDecl()->getBeginLoc()), + CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, + CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, + LinkageForVariable); + + if (!addr) + return Address::invalid(); + return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); } llvm::Constant * @@ -1742,9 +1775,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); - Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); - Arg = CtorCGF.Builder.CreateElementBitCast( - Arg, CtorCGF.ConvertTypeForMem(ASTTy)); + Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), + VDAddr.getAlignment()); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( @@ -1833,7 +1865,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { if (CGM.getLangOpts().OMPTargetTriples.empty() && - !CGM.getLangOpts().OpenMPIsDevice) + !CGM.getLangOpts().OpenMPIsTargetDevice) return false; std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); @@ -1841,12 +1873,12 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) - return CGM.getLangOpts().OpenMPIsDevice; + return CGM.getLangOpts().OpenMPIsTargetDevice; VD = VD->getDefinition(CGM.getContext()); assert(VD && "Unknown VarDecl"); if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) - return CGM.getLangOpts().OpenMPIsDevice; + return CGM.getLangOpts().OpenMPIsTargetDevice; QualType ASTTy = VD->getType(); SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); @@ -1854,16 +1886,16 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, // Produce the unique prefix to identify the new target regions. We use // the source location of the variable declaration which we know to not // conflict with any target region. - auto EntryInfo = - getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); + llvm::TargetRegionEntryInfo EntryInfo = + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName()); SmallString<128> Buffer, Out; - OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { llvm::Constant *Ctor; llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Generate function that re-emits the declaration's initializer into // the threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); @@ -1883,8 +1915,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::Constant *AddrInAS0 = Addr; if (Addr->getAddressSpace() != 0) AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( - Addr, llvm::PointerType::getWithSamePointeeType( - cast<llvm::PointerType>(Addr->getType()), 0)); + Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); CtorCGF.EmitAnyExprToMem(Init, Address(AddrInAS0, Addr->getValueType(), CGM.getContext().getDeclAlign(VD)), @@ -1905,14 +1936,14 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Out.clear(); auto CtorEntryInfo = EntryInfo; CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( CtorEntryInfo, Ctor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); } if (VD->getType().isDestructedType() != QualType::DK_none) { llvm::Constant *Dtor; llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Generate function that emits destructor call for the threadprivate // copy of the variable VD CodeGenFunction DtorCGF(CGM); @@ -1934,8 +1965,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::Constant *AddrInAS0 = Addr; if (Addr->getAddressSpace() != 0) AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( - Addr, llvm::PointerType::getWithSamePointeeType( - cast<llvm::PointerType>(Addr->getType()), 0)); + Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), CGM.getContext().getDeclAlign(VD)), ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), @@ -1954,11 +1984,11 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Out.clear(); auto DtorEntryInfo = EntryInfo; DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( DtorEntryInfo, Dtor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); } - return CGM.getLangOpts().OpenMPIsDevice; + return CGM.getLangOpts().OpenMPIsTargetDevice; } Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, @@ -2131,7 +2161,11 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); std::string Name = getName({Prefix, "var"}); - return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); + llvm::GlobalVariable *G = OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); + llvm::Align PtrAlign = OMPBuilder.M.getDataLayout().getPointerABIAlignment(G->getAddressSpace()); + if (PtrAlign > llvm::Align(G->getAlignment())) + G->setAlignment(PtrAlign); + return G; } namespace { @@ -2778,7 +2812,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, const StaticRTInput &Values) { OpenMPSchedType ScheduleNum = getRuntimeSchedule( ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); - assert(isOpenMPWorksharingDirective(DKind) && + assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && "Expected loop-based or sections-based directive."); llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, isOpenMPLoopDirective(DKind) @@ -2803,7 +2837,7 @@ void CGOpenMPRuntime::emitDistributeStaticInit( llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction; bool isGPUDistribute = - CGM.getLangOpts().OpenMPIsDevice && + CGM.getLangOpts().OpenMPIsTargetDevice && (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); StaticInitFunction = createForStaticInitFunction( Values.IVSize, Values.IVSigned, isGPUDistribute); @@ -2828,7 +2862,8 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && + if (isOpenMPDistributeDirective(DKind) && + CGM.getLangOpts().OpenMPIsTargetDevice && (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( @@ -2947,7 +2982,7 @@ enum KmpTaskTFields { void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // If we are in simd mode or there are no entries, we don't need to do // anything. - if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) + if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) return; llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = @@ -2991,42 +3026,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { } }; - OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager, - ErrorReportFn); -} - -/// Loads all the offload entries information from the host IR -/// metadata. -void CGOpenMPRuntime::loadOffloadInfoMetadata() { - // If we are in target mode, load the metadata from the host IR. This code has - // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). - - if (!CGM.getLangOpts().OpenMPIsDevice) - return; - - if (CGM.getLangOpts().OMPHostIRFile.empty()) - return; - - auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); - if (auto EC = Buf.getError()) { - CGM.getDiags().Report(diag::err_cannot_open_file) - << CGM.getLangOpts().OMPHostIRFile << EC.message(); - return; - } - - llvm::LLVMContext C; - auto ME = expectedToErrorOrAndEmitErrors( - C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); - - if (auto EC = ME.getError()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); - CGM.getDiags().Report(DiagID) - << CGM.getLangOpts().OMPHostIRFile << EC.message(); - return; - } - - OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); + OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { @@ -4242,8 +4242,7 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); LValue Base = CGF.EmitLoadOfPointerLValue( - CGF.Builder.CreateElementBitCast( - DepobjLVal.getAddress(CGF), + DepobjLVal.getAddress(CGF).withElementType( CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), KmpDependInfoPtrTy->castAs<PointerType>()); Address DepObjAddr = CGF.Builder.CreateGEP( @@ -4670,7 +4669,7 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, llvm::PHINode *ElementPHI = CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); ElementPHI->addIncoming(Begin.getPointer(), EntryBB); - Begin = Begin.withPointer(ElementPHI); + Begin = Begin.withPointer(ElementPHI, KnownNonNull); Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // deps[i].flags = NewDepKind; @@ -5005,7 +5004,7 @@ static void emitReductionCombiner(CodeGenFunction &CGF, } llvm::Function *CGOpenMPRuntime::emitReductionFunction( - SourceLocation Loc, llvm::Type *ArgsElemType, + StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { ASTContext &C = CGM.getContext(); @@ -5020,7 +5019,7 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction( Args.push_back(&RHSArg); const auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - std::string Name = getName({"omp", "reduction", "reduction_func"}); + std::string Name = getReductionFuncName(ReducerName); auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -5215,9 +5214,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } // 2. Emit reduce_func(). - llvm::Function *ReductionFn = - emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), - Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Function *ReductionFn = emitReductionFunction( + CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), + Privates, LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; std::string Name = getName({"reduction"}); @@ -5469,8 +5468,7 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); QualType PrivateType = RCG.getPrivateType(N); Address PrivateAddr = CGF.EmitLoadOfPointer( - CGF.Builder.CreateElementBitCast( - CGF.GetAddrOfLocalVar(&Param), + CGF.GetAddrOfLocalVar(&Param).withElementType( CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), C.getPointerType(PrivateType)->castAs<PointerType>()); llvm::Value *Size = nullptr; @@ -5558,17 +5556,16 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, LHSVD, // Pull out the pointer to the variable. CGF.EmitLoadOfPointer( - CGF.Builder.CreateElementBitCast( - CGF.GetAddrOfLocalVar(&ParamInOut), - CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), + CGF.GetAddrOfLocalVar(&ParamInOut) + .withElementType( + CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), C.getPointerType(LHSVD->getType())->castAs<PointerType>())); PrivateScope.addPrivate( RHSVD, // Pull out the pointer to the variable. CGF.EmitLoadOfPointer( - CGF.Builder.CreateElementBitCast( - CGF.GetAddrOfLocalVar(&ParamIn), - CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), + CGF.GetAddrOfLocalVar(&ParamIn).withElementType( + CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), C.getPointerType(RHSVD->getType())->castAs<PointerType>())); PrivateScope.Privatize(); // Emit the combiner body: @@ -5678,14 +5675,12 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( // ElemLVal.reduce_shar = &Shareds[Cnt]; LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); RCG.emitSharedOrigLValue(CGF, Cnt); - llvm::Value *CastedShared = - CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); - CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); + CGF.EmitStoreOfScalar(Shared, SharedLVal); // ElemLVal.reduce_orig = &Origs[Cnt]; LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); - llvm::Value *CastedOrig = - CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); - CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); + llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); + CGF.EmitStoreOfScalar(Orig, OrigLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; llvm::Value *SizeVal; @@ -5702,21 +5697,19 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); // ElemLVal.reduce_init = init; LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); - llvm::Value *InitAddr = - CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); CGF.EmitStoreOfScalar(InitAddr, InitLVal); // ElemLVal.reduce_fini = fini; LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); - llvm::Value *FiniAddr = Fini - ? CGF.EmitCastToVoidPtr(Fini) - : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + llvm::Value *FiniAddr = + Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); // ElemLVal.reduce_comb = comb; LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); - llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + llvm::Value *CombAddr = emitReduceCombFunction( CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], - RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + RHSExprs[Cnt], Data.ReductionCopies[Cnt]); CGF.EmitStoreOfScalar(CombAddr, CombLVal); // ElemLVal.flags = 0; LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); @@ -6057,15 +6050,14 @@ void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, AllocatorTraitsLVal.getBaseInfo(), AllocatorTraitsLVal.getTBAAInfo()); - llvm::Value *Traits = - CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); + llvm::Value *Traits = Addr.getPointer(); llvm::Value *AllocatorVal = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_init_allocator), {ThreadId, MemSpaceHandle, NumTraits, Traits}); // Store to allocator. - CGF.EmitVarDecl(*cast<VarDecl>( + CGF.EmitAutoVarAlloca(*cast<VarDecl>( cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); AllocatorVal = @@ -6095,8 +6087,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - auto EntryInfo = - getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName); + llvm::TargetRegionEntryInfo EntryInfo = + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); CodeGenFunction CGF(CGM, true); llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = @@ -6114,10 +6106,9 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); - OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo, - GenerateOutlinedFunction, DefaultValTeams, - DefaultValThreads, IsOffloadEntry, - OutlinedFn, OutlinedFnID); + OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, + DefaultValTeams, DefaultValThreads, + IsOffloadEntry, OutlinedFn, OutlinedFnID); if (OutlinedFn != nullptr) CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); @@ -6216,6 +6207,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( DefaultVal = -1; return nullptr; } + case OMPD_target_teams_loop: case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: @@ -6235,12 +6227,14 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + case OMPD_target_parallel_loop: case OMPD_target_simd: DefaultVal = 1; return nullptr; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_loop: case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: @@ -6306,7 +6300,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && + assert(!CGF.getLangOpts().OpenMPIsTargetDevice && "Clauses associated with the teams directive expected to be emitted " "only for the host!"); CGBuilderTy &Bld = CGF.Builder; @@ -6457,6 +6451,8 @@ const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( return ThreadLimit; } return nullptr; + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -6558,7 +6554,7 @@ const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && + assert(!CGF.getLangOpts().OpenMPIsTargetDevice && "Clauses associated with the teams directive expected to be emitted " "only for the host!"); OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); @@ -6659,6 +6655,8 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) return NumThreads; return Bld.getInt32(0); + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -6821,67 +6819,31 @@ public: const Expr *getMapExpr() const { return MapExpr; } }; - /// Class that associates information with a base pointer to be passed to the - /// runtime library. - class BasePointerInfo { - /// The base pointer. - llvm::Value *Ptr = nullptr; - /// The base declaration that refers to this device pointer, or null if - /// there is none. - const ValueDecl *DevPtrDecl = nullptr; - - public: - BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) - : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} - llvm::Value *operator*() const { return Ptr; } - const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } - void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } - }; - + using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; + using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; + using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; + using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; + using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; + using MapNonContiguousArrayTy = + llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; - using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; - using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; - using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; - using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; - using MapDimArrayTy = SmallVector<uint64_t, 4>; - using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; + using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; /// This structure contains combined information generated for mappable /// clauses, including base pointers, pointers, sizes, map types, user-defined /// mappers, and non-contiguous information. - struct MapCombinedInfoTy { - struct StructNonContiguousInfo { - bool IsNonContiguous = false; - MapDimArrayTy Dims; - MapNonContiguousArrayTy Offsets; - MapNonContiguousArrayTy Counts; - MapNonContiguousArrayTy Strides; - }; + struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { MapExprsArrayTy Exprs; - MapBaseValuesArrayTy BasePointers; - MapValuesArrayTy Pointers; - MapValuesArrayTy Sizes; - MapFlagsArrayTy Types; - MapMappersArrayTy Mappers; - StructNonContiguousInfo NonContigInfo; + MapValueDeclsArrayTy Mappers; + MapValueDeclsArrayTy DevicePtrDecls; /// Append arrays in \a CurInfo. void append(MapCombinedInfoTy &CurInfo) { Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); - BasePointers.append(CurInfo.BasePointers.begin(), - CurInfo.BasePointers.end()); - Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); - Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); - Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); + DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), + CurInfo.DevicePtrDecls.end()); Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); - NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), - CurInfo.NonContigInfo.Dims.end()); - NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), - CurInfo.NonContigInfo.Offsets.end()); - NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), - CurInfo.NonContigInfo.Counts.end()); - NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), - CurInfo.NonContigInfo.Strides.end()); + llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); } }; @@ -7163,6 +7125,7 @@ private: // double d; // int i[100]; // float *p; + // int **a = &i; // // struct S1 { // int i; @@ -7196,6 +7159,14 @@ private: // in unified shared memory mode or for local pointers // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // + // map((*a)[0:3]) + // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM + // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM + // + // map(**a) + // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM + // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM + // // map(s) // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM // @@ -7488,7 +7459,9 @@ private: bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && MapDecl && MapDecl->getType()->isLValueReferenceType(); - bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; + bool IsNonDerefPointer = IsPointer && + !(UO && UO->getOpcode() != UO_Deref) && !BO && + !IsNonContiguous; if (OASE) ++DimSize; @@ -7609,14 +7582,15 @@ private: .getAddress(CGF); } Size = CGF.Builder.CreatePtrDiff( - CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), - CGF.EmitCastToVoidPtr(LB.getPointer())); + CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer()); break; } } assert(Size && "Failed to determine structure size"); CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(LB.getPointer()); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( Size, CGF.Int64Ty, /*isSigned=*/true)); @@ -7628,10 +7602,12 @@ private: } CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(LB.getPointer()); Size = CGF.Builder.CreatePtrDiff( CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(), - CGF.EmitCastToVoidPtr(LB.getPointer())); + LB.getPointer()); CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back(Flags); @@ -7645,6 +7621,8 @@ private: (Next == CE && MapType != OMPC_MAP_unknown)) { CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(LB.getPointer()); CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); @@ -8145,9 +8123,12 @@ private: auto &&UseDeviceDataCombinedInfoGen = [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, - CodeGenFunction &CGF) { + CodeGenFunction &CGF, bool IsDevAddr) { UseDeviceDataCombinedInfo.Exprs.push_back(VD); - UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD); + UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); + UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); + UseDeviceDataCombinedInfo.DevicePointers.emplace_back( + IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); UseDeviceDataCombinedInfo.Sizes.push_back( llvm::Constant::getNullValue(CGF.Int64Ty)); @@ -8187,7 +8168,7 @@ private: } else { Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); } - UseDeviceDataCombinedInfoGen(VD, Ptr, CGF); + UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); } }; @@ -8214,6 +8195,7 @@ private: // item. if (CI != Data.end()) { if (IsDevAddr) { + CI->ForDeviceAddr = IsDevAddr; CI->ReturnDevicePointer = true; Found = true; break; @@ -8226,6 +8208,7 @@ private: PrevCI == CI->Components.rend() || isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || VarD->hasLocalStorage()) { + CI->ForDeviceAddr = IsDevAddr; CI->ReturnDevicePointer = true; Found = true; break; @@ -8316,8 +8299,9 @@ private: assert(RelevantVD && "No relevant declaration related with device pointer??"); - CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( - RelevantVD); + CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; + CurInfo.DevicePointers[CurrentBasePointersIdx] = + L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer; CurInfo.Types[CurrentBasePointersIdx] |= OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; } @@ -8356,7 +8340,10 @@ private: OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); } CurInfo.Exprs.push_back(L.VD); - CurInfo.BasePointers.emplace_back(BasePtr, L.VD); + CurInfo.BasePointers.emplace_back(BasePtr); + CurInfo.DevicePtrDecls.emplace_back(L.VD); + CurInfo.DevicePointers.emplace_back( + L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); CurInfo.Pointers.push_back(Ptr); CurInfo.Sizes.push_back( llvm::Constant::getNullValue(this->CGF.Int64Ty)); @@ -8367,7 +8354,8 @@ private: // individual members mapped. Emit an extra combined entry. if (PartialStruct.Base.isValid()) { CurInfo.NonContigInfo.Dims.push_back(0); - emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); + emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, + /*IsMapThis*/ !VD, VD); } // We need to append the results of this capture to what we already @@ -8433,7 +8421,7 @@ public: /// individual struct members. void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, MapFlagsArrayTy &CurTypes, - const StructRangeInfoTy &PartialStruct, + const StructRangeInfoTy &PartialStruct, bool IsMapThis, const ValueDecl *VD = nullptr, bool NotTargetParams = true) const { if (CurTypes.size() == 1 && @@ -8450,12 +8438,14 @@ public: CombinedInfo.Exprs.push_back(VD); // Base is the base of the struct CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); // Pointer is the address of the lowest element llvm::Value *LB = LBAddr.getPointer(); const CXXMethodDecl *MD = CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; - bool HasBaseClass = RD ? RD->getNumBases() > 0 : false; + bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; // There should not be a mapper for a combined entry. if (HasBaseClass) { // OpenMP 5.2 148:21: @@ -8571,6 +8561,8 @@ public: VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), @@ -8597,6 +8589,8 @@ public: VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( @@ -8608,6 +8602,8 @@ public: VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } @@ -8632,7 +8628,7 @@ public: OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) continue; - llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); + llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); assert(BasePtr && "Unable to find base lambda address."); int TgtIdx = -1; for (unsigned J = I; J > 0; --J) { @@ -8674,15 +8670,15 @@ public: // pass its value. if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.emplace_back(Arg, VD); + CombinedInfo.BasePointers.emplace_back(Arg); + CombinedInfo.DevicePtrDecls.emplace_back(VD); + CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); CombinedInfo.Pointers.push_back(Arg); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back( - (Cap->capturesVariable() - ? OpenMPOffloadMappingFlags::OMP_MAP_TO - : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); CombinedInfo.Mappers.push_back(nullptr); return; @@ -8916,6 +8912,8 @@ public: if (CI.capturesThis()) { CombinedInfo.Exprs.push_back(nullptr); CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(CV); const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); CombinedInfo.Sizes.push_back( @@ -8928,6 +8926,8 @@ public: const VarDecl *VD = CI.getCapturedVar(); CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(CV); if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are @@ -8959,6 +8959,8 @@ public: auto I = FirstPrivateDecls.find(VD); CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( CV, ElementType, CGF.getContext().getDeclAlign(VD), @@ -8984,74 +8986,6 @@ public: }; } // anonymous namespace -static void emitNonContiguousDescriptor( - CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, - CGOpenMPRuntime::TargetDataInfo &Info) { - CodeGenModule &CGM = CGF.CGM; - MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo - &NonContigInfo = CombinedInfo.NonContigInfo; - - // Build an array of struct descriptor_dim and then assign it to - // offload_args. - // - // struct descriptor_dim { - // uint64_t offset; - // uint64_t count; - // uint64_t stride - // }; - ASTContext &C = CGF.getContext(); - QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); - RecordDecl *RD; - RD = C.buildImplicitRecord("descriptor_dim"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, Int64Ty); - addFieldToRecordDecl(C, RD, Int64Ty); - addFieldToRecordDecl(C, RD, Int64Ty); - RD->completeDefinition(); - QualType DimTy = C.getRecordType(RD); - - enum { OffsetFD = 0, CountFD, StrideFD }; - // We need two index variable here since the size of "Dims" is the same as the - // size of Components, however, the size of offset, count, and stride is equal - // to the size of base declaration that is non-contiguous. - for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { - // Skip emitting ir if dimension size is 1 since it cannot be - // non-contiguous. - if (NonContigInfo.Dims[I] == 1) - continue; - llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); - QualType ArrayTy = - C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); - Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); - for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { - unsigned RevIdx = EE - II - 1; - LValue DimsLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); - // Offset - LValue OffsetLVal = CGF.EmitLValueForField( - DimsLVal, *std::next(RD->field_begin(), OffsetFD)); - CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); - // Count - LValue CountLVal = CGF.EmitLValueForField( - DimsLVal, *std::next(RD->field_begin(), CountFD)); - CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); - // Stride - LValue StrideLVal = CGF.EmitLValueForField( - DimsLVal, *std::next(RD->field_begin(), StrideFD)); - CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); - } - // args[I] = &dims - Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty); - llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.RTArgs.PointersArray, 0, I); - Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign()); - CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); - ++L; - } -} - // Try to extract the base declaration from a `this->x` expression if possible. static ValueDecl *getDeclFromThisExpr(const Expr *E) { if (!E) @@ -9108,196 +9042,45 @@ static void emitOffloadingArrays( CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous = false) { CodeGenModule &CGM = CGF.CGM; - ASTContext &Ctx = CGF.getContext(); // Reset the array information. Info.clearArrayInfo(); Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); - if (Info.NumberOfPtrs) { - // Detect if we have any capture size requiring runtime evaluation of the - // size so that a constant array could be eventually used. - - llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); - QualType PointerArrayType = Ctx.getConstantArrayType( - Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - - Info.RTArgs.BasePointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); - Info.RTArgs.PointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); - Address MappersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); - Info.RTArgs.MappersArray = MappersArray.getPointer(); - - // If we don't have any VLA types or other types that require runtime - // evaluation, we can use a constant array for the map sizes, otherwise we - // need to fill up the arrays as we do for the pointers. - QualType Int64Ty = - Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); - SmallVector<llvm::Constant *> ConstSizes( - CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0)); - llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size()); - for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { - if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) { - if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) { - if (IsNonContiguous && - static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( - CombinedInfo.Types[I] & - OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG)) - ConstSizes[I] = llvm::ConstantInt::get( - CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]); - else - ConstSizes[I] = CI; - continue; - } - } - RuntimeSizes.set(I); - } - - if (RuntimeSizes.all()) { - QualType SizeArrayType = Ctx.getConstantArrayType( - Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Info.RTArgs.SizesArray = - CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); - } else { - auto *SizesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); - std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); - auto *SizesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name); - SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - if (RuntimeSizes.any()) { - QualType SizeArrayType = Ctx.getConstantArrayType( - Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); - llvm::Value *GblConstPtr = - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - SizesArrayGbl, CGM.Int64Ty->getPointerTo()); - CGF.Builder.CreateMemCpy( - Buffer, - Address(GblConstPtr, CGM.Int64Ty, - CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth( - /*DestWidth=*/64, /*Signed=*/false))), - CGF.getTypeSize(SizeArrayType)); - Info.RTArgs.SizesArray = Buffer.getPointer(); - } else { - Info.RTArgs.SizesArray = SizesArrayGbl; - } - } - - // The map types are always constant so we don't need to generate code to - // fill arrays. Instead, we create an array constant. - SmallVector<uint64_t, 4> Mapping; - for (auto mapFlag : CombinedInfo.Types) - Mapping.push_back( - static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( - mapFlag)); - std::string MaptypesName = - CGM.getOpenMPRuntime().getName({"offload_maptypes"}); - auto *MapTypesArrayGbl = - OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); - Info.RTArgs.MapTypesArray = MapTypesArrayGbl; + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); - // The information types are only built if there is debug information - // requested. - if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { - Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue( - llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); - } else { - auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { - return emitMappingInformation(CGF, OMPBuilder, MapExpr); - }; - SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); - llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); - std::string MapnamesName = - CGM.getOpenMPRuntime().getName({"offload_mapnames"}); - auto *MapNamesArrayGbl = - OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); - Info.RTArgs.MapNamesArray = MapNamesArrayGbl; - } + auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + if (CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo) { + CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), + FillInfoMap); + } - // If there's a present map type modifier, it must not be applied to the end - // of a region, so generate a separate map type array in that case. - if (Info.separateBeginEndCalls()) { - bool EndMapTypesDiffer = false; - for (uint64_t &Type : Mapping) { - if (Type & - static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( - OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) { - Type &= - ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( - OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); - EndMapTypesDiffer = true; - } - } - if (EndMapTypesDiffer) { - MapTypesArrayGbl = - OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); - Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl; - } + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); } + }; - for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { - llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; - llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.RTArgs.BasePointersArray, 0, I); - BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); - Address BPAddr(BP, BPVal->getType(), - Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(BPVal, BPAddr); - - if (Info.requiresDevicePointerInfo()) - if (const ValueDecl *DevVD = - CombinedInfo.BasePointers[I].getDevicePtrDecl()) - Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); - - llvm::Value *PVal = CombinedInfo.Pointers[I]; - llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.RTArgs.PointersArray, 0, I); - P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); - Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(PVal, PAddr); - - if (RuntimeSizes.test(I)) { - llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - Info.RTArgs.SizesArray, - /*Idx0=*/0, - /*Idx1=*/I); - Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty)); - CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], - CGM.Int64Ty, - /*isSigned=*/true), - SAddr); - } - - // Fill up the mapper array. - llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); - if (CombinedInfo.Mappers[I]) { - MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); - Info.HasMapper = true; - } - Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); - CGF.Builder.CreateStore(MFunc, MAddr); + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); } - } - - if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || - Info.NumberOfPtrs == 0) - return; - - emitNonContiguousDescriptor(CGF, CombinedInfo, Info); + return MFunc; + }; + OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, + /*IsNonContiguous=*/true, DeviceAddrCB, + CustomMapperCB); } /// Check for inner distribute directive. @@ -9314,7 +9097,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPDistributeDirective(DKind)) + // For now, just treat 'target teams loop' as if it's distributed. + if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) return NestedDir; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( @@ -9569,12 +9353,13 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Fill up the runtime mapper handle for all components. for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( - *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); llvm::Value *CurSizeArg = Info.Sizes[I]; llvm::Value *CurNameArg = - (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) + (CGM.getCodeGenOpts().getDebugInfo() == + llvm::codegenoptions::NoDebugInfo) ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); @@ -9797,7 +9582,8 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. - if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) + if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && + Kind != OMPD_target_teams_loop) TD = getNestedDistributeDirective(CGM.getContext(), D); if (!TD) return llvm::ConstantInt::get(CGF.Int64Ty, 0); @@ -9808,289 +9594,303 @@ llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( return llvm::ConstantInt::get(CGF.Int64Ty, 0); } -void CGOpenMPRuntime::emitTargetCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, +static void +emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + bool RequiresOuterTask, const CapturedStmt &CS, + bool OffloadingMandatory, CodeGenFunction &CGF) { + if (OffloadingMandatory) { + CGF.Builder.CreateUnreachable(); + } else { + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, + CapturedVars); + } +} + +static llvm::Value *emitDeviceID( llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, - llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, - const OMPLoopDirective &D)> - SizeEmitter) { - if (!CGF.HaveInsertPoint()) - return; + CodeGenFunction &CGF) { + // Emit device ID if any. + llvm::Value *DeviceID; + if (Device.getPointer()) { + assert((Device.getInt() == OMPC_DEVICE_unknown || + Device.getInt() == OMPC_DEVICE_device_num) && + "Expected device_num modifier."); + llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); + DeviceID = + CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } + return DeviceID; +} - const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && - CGM.getLangOpts().OpenMPOffloadMandatory; +llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, + CodeGenFunction &CGF) { + llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); - assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); + if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { + CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); + llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( + DynMemClause->getSize(), /*IgnoreResultAssign=*/true); + DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, + /*isSigned=*/false); + } + return DynCGroupMem; +} - const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || - D.hasClausesOfKind<OMPNowaitClause>() || - D.hasClausesOfKind<OMPInReductionClause>(); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - }; - emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); +static void emitTargetCallKernelLaunch( + CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, + const CapturedStmt &CS, bool OffloadingMandatory, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, + llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter, + CodeGenFunction &CGF, CodeGenModule &CGM) { + llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); - CodeGenFunction::OMPTargetDataInfo InputInfo; - llvm::Value *MapTypesArray = nullptr; - llvm::Value *MapNamesArray = nullptr; - // Generate code for the host fallback function. - auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, - &CS, OffloadingMandatory](CodeGenFunction &CGF) { - if (OffloadingMandatory) { - CGF.Builder.CreateUnreachable(); - } else { - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); - } - }; - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray, - &MapNamesArray, SizeEmitter, - FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { - if (Device.getInt() == OMPC_DEVICE_ancestor) { - // Reverse offloading is not supported, so just execute on the host. - FallbackGen(CGF); - return; - } + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; - // On top of the arrays that were filled up, the target offloading call - // takes as arguments the device id as well as the host pointer. The host - // pointer is used by the runtime library to identify the current target - // region, so it only has to be unique and not necessarily point to - // anything. It could be the pointer to the outlined function that - // implements the target region, but we aren't using that so that the - // compiler doesn't need to keep that, and could therefore inline the host - // function if proven worthwhile during optimization. + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; - // From this point on, we need to have an ID of the target region defined. - assert(OutlinedFnID && "Invalid outlined function ID!"); - (void)OutlinedFnID; + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto *CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + MappableExprsHandler::MapCombinedInfoTy CurInfo; + MappableExprsHandler::StructRangeInfoTy PartialStruct; - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device.getPointer()) { - assert((Device.getInt() == OMPC_DEVICE_unknown || - Device.getInt() == OMPC_DEVICE_device_num) && - "Expected device_num modifier."); - llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); - DeviceID = - CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurInfo.Exprs.push_back(nullptr); + CurInfo.BasePointers.push_back(*CV); + CurInfo.DevicePtrDecls.push_back(nullptr); + CurInfo.DevicePointers.push_back( + MappableExprsHandler::DeviceInfoTy::None); + CurInfo.Pointers.push_back(*CV); + CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); + // Copy to the device as an argument. No need to retrieve it. + CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + CurInfo.Mappers.push_back(nullptr); } else { - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); + if (!CI->capturesThis()) + MappedVarSet.insert(CI->getCapturedVar()); + else + MappedVarSet.insert(nullptr); + if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, + CurInfo, LambdaPointers); } + // We expect to have at least an element of information for this capture. + assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && + "Non-existing map pointer for capture!"); + assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && + CurInfo.BasePointers.size() == CurInfo.Sizes.size() && + CurInfo.BasePointers.size() == CurInfo.Types.size() && + CurInfo.BasePointers.size() == CurInfo.Mappers.size() && + "Inconsistent map information sizes!"); - // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = - CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) { + CombinedInfo.append(PartialStruct.PreliminaryMapData); + MEHandler.emitCombinedEntry( + CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), + nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty()); + } - // Return value of the runtime offloading call. - llvm::Value *Return; + // We need to append the results of this capture to what we already have. + CombinedInfo.append(CurInfo); + } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures( + LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, + CombinedInfo.Types); + // Map any list items in a map clause that were not captures because they + // weren't referenced within the construct. + MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); - llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); - llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + CGOpenMPRuntime::TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); + bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); - // Source location for the ident struct - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + InputInfo.MappersArray = + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; - // Get tripcount for the target loop-based directive. - llvm::Value *NumIterations = - emitTargetNumIterationsCall(CGF, D, SizeEmitter); + auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, + RequiresOuterTask, &CS, OffloadingMandatory, Device, + OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { + bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; - llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); - if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { - CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); - llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( - DynMemClause->getSize(), /*IgnoreResultAssign=*/true); - DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, - /*isSigned=*/false); + if (IsReverseOffloading) { + // Reverse offloading is not supported, so just execute on the host. + // FIXME: This fallback solution is incorrect since it ignores the + // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to + // assert here and ensure SEMA emits an error. + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return; } - llvm::Value *ZeroArray = - llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3)); - bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); - llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait); + unsigned NumTargetItems = InputInfo.NumberOfTargetItems; - llvm::Value *NumTeams3D = - CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0}); - llvm::Value *NumThreads3D = - CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0}); + llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer(); + llvm::Value *PointersArray = InputInfo.PointersArray.getPointer(); + llvm::Value *SizesArray = InputInfo.SizesArray.getPointer(); + llvm::Value *MappersArray = InputInfo.MappersArray.getPointer(); - // Arguments for the target kernel. - SmallVector<llvm::Value *> KernelArgs{ - CGF.Builder.getInt32(/* Version */ 2), - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer(), - NumIterations, - Flags, - NumTeams3D, - NumThreads3D, - DynCGroupMem, + auto &&EmitTargetCallFallbackCB = + [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) + -> llvm::OpenMPIRBuilder::InsertPointTy { + CGF.Builder.restoreIP(IP); + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return CGF.Builder.saveIP(); }; - // The target region is an outlined function launched by the runtime - // via calls to __tgt_target_kernel(). - // - // Note that on the host and CPU targets, the runtime implementation of - // these calls simply call the outlined function without forking threads. - // The outlined functions themselves have runtime calls to - // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by - // the compiler in emitTeamsCall() and emitParallelCall(). - // - // In contrast, on the NVPTX target, the implementation of - // __tgt_target_teams() launches a GPU kernel with the requested number - // of teams and threads so no additional calls to the runtime are required. - // Check the error code and execute the host version if required. - CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel( - CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads, - OutlinedFnID, KernelArgs)); - - llvm::BasicBlock *OffloadFailedBlock = - CGF.createBasicBlock("omp_offload.failed"); - llvm::BasicBlock *OffloadContBlock = - CGF.createBasicBlock("omp_offload.cont"); - llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); - CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); - - CGF.EmitBlock(OffloadFailedBlock); - FallbackGen(CGF); + llvm::Value *DeviceID = emitDeviceID(Device, CGF); + llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = + OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); + llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); + llvm::Value *NumIterations = + OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); + llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); - CGF.EmitBranch(OffloadContBlock); + llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( + BasePointersArray, PointersArray, SizesArray, MapTypesArray, + nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); - CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); - }; + llvm::OpenMPIRBuilder::TargetKernelArgs Args( + NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, + DynCGGroupMem, HasNoWait); - // Notify that the host version must be executed. - auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { - FallbackGen(CGF); + CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( + CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, + DeviceID, RTLoc, AllocaIP)); }; - auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, - &MapNamesArray, &CapturedVars, RequiresOuterTask, - &CS](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); +} - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; +static void +emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + bool RequiresOuterTask, const CapturedStmt &CS, + bool OffloadingMandatory, CodeGenFunction &CGF) { - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto *CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - MappableExprsHandler::MapCombinedInfoTy CurInfo; - MappableExprsHandler::StructRangeInfoTy PartialStruct; + // Notify that the host version must be executed. + auto &&ElseGen = + [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + }; - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurInfo.Exprs.push_back(nullptr); - CurInfo.BasePointers.push_back(*CV); - CurInfo.Pointers.push_back(*CV); - CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); - // Copy to the device as an argument. No need to retrieve it. - CurInfo.Types.push_back( - OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | - OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | - OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); - CurInfo.Mappers.push_back(nullptr); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); - if (!CI->capturesThis()) - MappedVarSet.insert(CI->getCapturedVar()); - else - MappedVarSet.insert(nullptr); - if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); - // Generate correct mapping for variables captured by reference in - // lambdas. - if (CI->capturesVariable()) - MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, - CurInfo, LambdaPointers); - } - // We expect to have at least an element of information for this capture. - assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && - "Non-existing map pointer for capture!"); - assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && - CurInfo.BasePointers.size() == CurInfo.Sizes.size() && - CurInfo.BasePointers.size() == CurInfo.Types.size() && - CurInfo.BasePointers.size() == CurInfo.Mappers.size() && - "Inconsistent map information sizes!"); + if (RequiresOuterTask) { + CodeGenFunction::OMPTargetDataInfo InputInfo; + CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); + } else { + OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); + } +} - // If there is an entry in PartialStruct it means we have a struct with - // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) { - CombinedInfo.append(PartialStruct.PreliminaryMapData); - MEHandler.emitCombinedEntry( - CombinedInfo, CurInfo.Types, PartialStruct, nullptr, - !PartialStruct.PreliminaryMapData.BasePointers.empty()); - } +void CGOpenMPRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { + if (!CGF.HaveInsertPoint()) + return; - // We need to append the results of this capture to what we already have. - CombinedInfo.append(CurInfo); - } - // Adjust MEMBER_OF flags for the lambdas captures. - MEHandler.adjustMemberOfForLambdaCaptures( - LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, - CombinedInfo.Types); - // Map any list items in a map clause that were not captures because they - // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); + const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && + CGM.getLangOpts().OpenMPOffloadMandatory; - CGOpenMPRuntime::TargetDataInfo Info; - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - bool EmitDebug = - CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, - /*ForEndCall=*/false); + assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); - InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, - CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, - CGM.getPointerAlign()); - InputInfo.SizesArray = - Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); - InputInfo.MappersArray = - Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - MapTypesArray = Info.RTArgs.MapTypesArray; - MapNamesArray = Info.RTArgs.MapNamesArray; - if (RequiresOuterTask) - CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); - else - emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); + const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || + D.hasClausesOfKind<OMPNowaitClause>() || + D.hasClausesOfKind<OMPInReductionClause>(); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); }; + emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); - auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( - CodeGenFunction &CGF, PrePostActionTy &) { - if (RequiresOuterTask) { - CodeGenFunction::OMPTargetDataInfo InputInfo; - CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); - } else { - emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); - } + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; + llvm::Value *MapNamesArray = nullptr; + + auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, + RequiresOuterTask, &CS, OffloadingMandatory, Device, + OutlinedFnID, &InputInfo, &MapTypesArray, + &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, + PrePostActionTy &) { + emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, + Device, OutlinedFnID, InputInfo, MapTypesArray, + MapNamesArray, SizeEmitter, CGF, CGM); }; + auto &&TargetElseGen = + [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { + emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, + CS, OffloadingMandatory, CGF); + }; + // If we have a target function ID it means that we need to support // offloading, otherwise, just execute on the host. We need to execute on host // regardless of the conditional in the if clause if, e.g., the user do not @@ -10121,12 +9921,13 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (RequiresDeviceCodegen) { const auto &E = *cast<OMPExecutableDirective>(S); - auto EntryInfo = - getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName); + + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, E.getBeginLoc(), ParentName); // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. - if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo)) + if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) return; switch (E.getDirectiveKind()) { @@ -10173,6 +9974,14 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, CGM, ParentName, cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); break; + case OMPD_target_teams_loop: + CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); + break; + case OMPD_target_parallel_loop: + CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); + break; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: @@ -10272,10 +10081,10 @@ static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) { + if (!CGM.getLangOpts().OpenMPIsTargetDevice) { if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), - CGM.getLangOpts().OpenMPIsDevice)) + CGM.getLangOpts().OpenMPIsTargetDevice)) return true; return false; } @@ -10286,7 +10095,7 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { StringRef Name = CGM.getMangledName(GD); scanForTargetRegionsFunctions(FD->getBody(), Name); if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), - CGM.getLangOpts().OpenMPIsDevice)) + CGM.getLangOpts().OpenMPIsTargetDevice)) return true; } @@ -10297,10 +10106,10 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), - CGM.getLangOpts().OpenMPIsDevice)) + CGM.getLangOpts().OpenMPIsTargetDevice)) return true; - if (!CGM.getLangOpts().OpenMPIsDevice) + if (!CGM.getLangOpts().OpenMPIsTargetDevice) return false; // Check if there are Ctors/Dtors in this declaration and look for target @@ -10337,19 +10146,13 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { if (CGM.getLangOpts().OMPTargetTriples.empty() && - !CGM.getLangOpts().OpenMPIsDevice) - return; - - // If we have host/nohost variables, they do not need to be registered. - std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(VD); - if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any) + !CGM.getLangOpts().OpenMPIsTargetDevice) return; std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) { - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Register non-target variables being emitted in device code (debug info // may cause this). StringRef VarName = CGM.getMangledName(VD); @@ -10357,66 +10160,29 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, } return; } - // Register declare target variables. - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags; - StringRef VarName; - int64_t VarSize; - llvm::GlobalValue::LinkageTypes Linkage; - if ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - !HasRequiresUnifiedSharedMemory) { - Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; - VarName = CGM.getMangledName(VD); - if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { - VarSize = - CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity(); - assert(VarSize != 0 && "Expected non-zero size of the variable"); - } else { - VarSize = 0; - } - Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); - // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { - // Do not create a "ref-variable" if the original is not also available - // on the host. - if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) - return; - std::string RefName = getName({VarName, "ref"}); - if (!CGM.GetGlobalValue(RefName)) { - llvm::Constant *AddrRef = - OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName); - auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); - GVAddrRef->setConstant(/*Val=*/true); - GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); - GVAddrRef->setInitializer(Addr); - CGM.addCompilerUsedGlobal(GVAddrRef); - } - } - } else { - assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - HasRequiresUnifiedSharedMemory)) && - "Declare target attribute must link or to with unified memory."); - if (*Res == OMPDeclareTargetDeclAttr::MT_Link) - Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; - else - Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; + auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; + auto LinkageForVariable = [&VD, this]() { + return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + }; - if (CGM.getLangOpts().OpenMPIsDevice) { - VarName = Addr->getName(); - Addr = nullptr; - } else { - VarName = getAddrOfDeclareTargetVar(VD).getName(); - Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); - } - VarSize = CGM.getPointerSize().getQuantity(); - Linkage = llvm::GlobalValue::WeakAnyLinkage; - } + std::vector<llvm::GlobalVariable *> GeneratedRefs; + OMPBuilder.registerTargetGlobalVariable( + convertCaptureClause(VD), convertDeviceClause(VD), + VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, + VD->isExternallyVisible(), + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, + VD->getCanonicalDecl()->getBeginLoc()), + CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, + CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, + CGM.getTypes().ConvertTypeForMem( + CGM.getContext().getPointerType(VD->getType())), + Addr); + + for (auto *ref : GeneratedRefs) + CGM.addCompilerUsedGlobal(ref); - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( - VarName, Addr, VarSize, Flags, Linkage); + return; } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { @@ -10514,19 +10280,19 @@ bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; } } CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { - if (CGM.getLangOpts().OpenMPIsDevice) + if (CGM.getLangOpts().OpenMPIsTargetDevice) CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; } bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { - if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) + if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) return true; const auto *D = cast<FunctionDecl>(GD.getDecl()); @@ -10549,10 +10315,9 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. if (CGM.getLangOpts().OMPTargetTriples.empty() || - CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || - (OffloadEntriesInfoManager.empty() && - !HasEmittedDeclareTargetRegion && - !HasEmittedTargetRegion)) + CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice || + (OMPBuilder.OffloadInfoManager.empty() && + !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion)) return nullptr; // Create and register the function that handles the requires directives. @@ -10573,9 +10338,8 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { // passed to the runtime. This avoids the runtime from throwing an error // for mismatching requires clauses across compilation units that don't // contain at least 1 target region. - assert((HasEmittedTargetRegion || - HasEmittedDeclareTargetRegion || - !OffloadEntriesInfoManager.empty()) && + assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || + !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."); if (HasRequiresUnifiedSharedMemory) Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; @@ -10652,140 +10416,94 @@ void CGOpenMPRuntime::emitTargetDataCalls( // off. PrePostActionTy NoPrivAction; - // Generate the code for the opening of the data environment. Capture all the - // arguments of the runtime call by reference because they are used in the - // closing of the region. - auto &&BeginThenGen = [this, &D, Device, &Info, - &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); + llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); + + llvm::Value *IfCondVal = nullptr; + if (IfCond) + IfCondVal = CGF.EvaluateExprAsBool(IfCond); + + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) { + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + auto GenMapInfoCB = + [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { + CGF.Builder.restoreIP(CodeGenIP); // Get map clause information. MappableExprsHandler MEHandler(D, CGF); MEHandler.generateAllInfo(CombinedInfo); - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, - /*IsNonContiguous=*/true); - - llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; - bool EmitDebug = - CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info, - EmitDebug); - - // Emit device ID if any. - llvm::Value *DeviceID = nullptr; - if (Device) { - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - } else { - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + if (CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo) { + CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), + FillInfoMap); } - // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); - // - // Source location for the ident struct - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - PointerNum, - RTArgs.BasePointersArray, - RTArgs.PointersArray, - RTArgs.SizesArray, - RTArgs.MapTypesArray, - RTArgs.MapNamesArray, - RTArgs.MappersArray}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), - OffloadingArgs); - - // If device pointer privatization is required, emit the body of the region - // here. It will have to be duplicated: with and without privatization. - if (!Info.CaptureDeviceAddrMap.empty()) - CodeGen(CGF); + return CombinedInfo; }; - - // Generate code for the closing of the data region. - auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, - PrePostActionTy &) { - assert(Info.isValid() && "Invalid data environment closing arguments."); - - llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs; - bool EmitDebug = - CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info, - EmitDebug, - /*ForEndCall=*/true); - - // Emit device ID if any. - llvm::Value *DeviceID = nullptr; - if (Device) { - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - } else { - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; + auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { + CGF.Builder.restoreIP(CodeGenIP); + switch (BodyGenType) { + case BodyGenTy::Priv: + if (!Info.CaptureDeviceAddrMap.empty()) + CodeGen(CGF); + break; + case BodyGenTy::DupNoPriv: + if (!Info.CaptureDeviceAddrMap.empty()) { + CodeGen.setAction(NoPrivAction); + CodeGen(CGF); + } + break; + case BodyGenTy::NoPriv: + if (Info.CaptureDeviceAddrMap.empty()) { + CodeGen.setAction(NoPrivAction); + CodeGen(CGF); + } + break; } - - // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); - - // Source location for the ident struct - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - PointerNum, - RTArgs.BasePointersArray, - RTArgs.PointersArray, - RTArgs.SizesArray, - RTArgs.MapTypesArray, - RTArgs.MapNamesArray, - RTArgs.MappersArray}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), - OffloadingArgs); + return InsertPointTy(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); }; - // If we need device pointer privatization, we need to emit the body of the - // region with no privatization in the 'else' branch of the conditional. - // Otherwise, we don't have to do anything. - auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, - PrePostActionTy &) { - if (!Info.CaptureDeviceAddrMap.empty()) { - CodeGen.setAction(NoPrivAction); - CodeGen(CGF); + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); } }; - // We don't have to do anything to close the region if the if clause evaluates - // to false. - auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; - - if (IfCond) { - emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); - } else { - RegionCodeGenTy RCG(BeginThenGen); - RCG(CGF); - } + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; - // If we don't require privatization of device pointers, we emit the body in - // between the runtime calls. This avoids duplicating the body code. - if (Info.CaptureDeviceAddrMap.empty()) { - CodeGen.setAction(NoPrivAction); - CodeGen(CGF); - } + // Source location for the ident struct + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - if (IfCond) { - emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); - } else { - RegionCodeGenTy RCG(EndThenGen); - RCG(CGF); - } + CGF.Builder.restoreIP(OMPBuilder.createTargetData( + OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, + /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); } void CGOpenMPRuntime::emitTargetDataStandAloneCall( @@ -10939,8 +10657,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( /*IsNonContiguous=*/true); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - bool EmitDebug = - CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo; + bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo; OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, EmitDebug, /*ForEndCall=*/false); @@ -11158,7 +10876,7 @@ static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { if (Kind == ParamKindTy::Uniform) return false; - if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef) + if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) return false; if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && @@ -11654,8 +11372,10 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, llvm::ArrayRef(FiniArgs)); } -void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, - const OMPDependClause *C) { +template <typename T> +static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, + const T *C, llvm::Value *ULoc, + llvm::Value *ThreadID) { QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); @@ -11672,21 +11392,35 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, /*Volatile=*/false, Int64Ty); } llvm::Value *Args[] = { - emitUpdateLocation(CGF, C->getBeginLoc()), - getThreadID(CGF, C->getBeginLoc()), - CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; + ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; llvm::FunctionCallee RTLFn; - if (C->getDependencyKind() == OMPC_DEPEND_source) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + OMPDoacrossKind<T> ODK; + if (ODK.isSource(C)) { RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_doacross_post); } else { - assert(C->getDependencyKind() == OMPC_DEPEND_sink); + assert(ODK.isSink(C) && "Expect sink modifier."); RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_doacross_wait); } CGF.EmitRuntimeCall(RTLFn, Args); } +void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) { + return EmitDoacrossOrdered<OMPDependClause>( + CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), + getThreadID(CGF, C->getBeginLoc())); +} + +void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDoacrossClause *C) { + return EmitDoacrossOrdered<OMPDoacrossClause>( + CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), + getThreadID(CGF, C->getBeginLoc())); +} + void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef<llvm::Value *> Args) const { @@ -12375,14 +12109,16 @@ void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( } llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12671,6 +12407,11 @@ void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDoacrossClause *C) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + const VarDecl * CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index e7c1a098c768..2ee2a39ba538 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -232,7 +232,7 @@ public: /// as those marked as `omp declare target`. class DisableAutoDeclareTargetRAII { CodeGenModule &CGM; - bool SavedShouldMarkAsGlobal; + bool SavedShouldMarkAsGlobal = false; public: DisableAutoDeclareTargetRAII(CodeGenModule &CGM); @@ -327,42 +327,6 @@ protected: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// Emits object of ident_t type with info for source location. - /// \param Flags Flags for OpenMP location. - /// \param EmitLoc emit source location with debug-info is off. - /// - llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - unsigned Flags = 0, bool EmitLoc = false); - - /// Emit the number of teams for a target directive. Inspect the num_teams - /// clause associated with a teams construct combined or closely nested - /// with the target directive. - /// - /// Emit a team of size one for directives such as 'target parallel' that - /// have no associated teams construct. - /// - /// Otherwise, return nullptr. - const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - int32_t &DefaultVal); - llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D); - /// Emit the number of threads for a target directive. Inspect the - /// thread_limit clause associated with a teams construct combined or closely - /// nested with the target directive. - /// - /// Emit the num_threads clause for directives such as 'target parallel' that - /// have no associated teams construct. - /// - /// Otherwise, return nullptr. - const Expr * - getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - int32_t &DefaultVal); - llvm::Value * - emitNumThreadsForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D); - /// Returns pointer to ident_t type. llvm::Type *getIdentTyPointerTy(); @@ -371,9 +335,11 @@ protected: llvm::Value *getThreadID(CodeGenFunction &CGF, SourceLocation Loc); /// Get the function name of an outlined region. - // The name can be customized depending on the target. - // - virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } + std::string getOutlinedHelperName(StringRef Name) const; + std::string getOutlinedHelperName(CodeGenFunction &CGF) const; + + /// Get the function name of a reduction function. + std::string getReductionFuncName(StringRef Name) const; /// Emits \p Callee function call with arguments \p Args with location \p Loc. void emitCall(CodeGenFunction &CGF, SourceLocation Loc, @@ -508,9 +474,6 @@ protected: /// kmp_int64 st; // stride /// }; QualType KmpDimTy; - /// Entity that registers the offloading constants that were emitted so - /// far. - llvm::OffloadEntriesInfoManager OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; /// List of the emitted declarations. @@ -552,10 +515,6 @@ protected: /// Device routines are specific to the bool HasEmittedDeclareTargetRegion = false; - /// Loads all the offload entries information from the host IR - /// metadata. - void loadOffloadInfoMetadata(); - /// Start scanning from statement \a S and emit all target regions /// found along the way. /// \param S Starting statement. @@ -657,15 +616,6 @@ protected: llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data); - /// Return the trip count of loops associated with constructs / 'target teams - /// distribute' and 'teams distribute parallel for'. \param SizeEmitter Emits - /// the int64 value for the number of iterations of the associated loop. - llvm::Value *emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, - const OMPLoopDirective &D)> - SizeEmitter); - /// Emit update for lastprivate conditional data. void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, @@ -692,8 +642,72 @@ public: virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Emits object of ident_t type with info for source location. + /// \param Flags Flags for OpenMP location. + /// \param EmitLoc emit source location with debug-info is off. + /// + llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, + unsigned Flags = 0, bool EmitLoc = false); + + /// Emit the number of teams for a target directive. Inspect the num_teams + /// clause associated with a teams construct combined or closely nested + /// with the target directive. + /// + /// Emit a team of size one for directives such as 'target parallel' that + /// have no associated teams construct. + /// + /// Otherwise, return nullptr. + const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + int32_t &DefaultVal); + llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D); + /// Emit the number of threads for a target directive. Inspect the + /// thread_limit clause associated with a teams construct combined or closely + /// nested with the target directive. + /// + /// Emit the num_threads clause for directives such as 'target parallel' that + /// have no associated teams construct. + /// + /// Otherwise, return nullptr. + const Expr * + getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + int32_t &DefaultVal); + llvm::Value * + emitNumThreadsForTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &D); + + /// Return the trip count of loops associated with constructs / 'target teams + /// distribute' and 'teams distribute parallel for'. \param SizeEmitter Emits + /// the int64 value for the number of iterations of the associated loop. + llvm::Value *emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); + /// Returns true if the current target is a GPU. - virtual bool isTargetCodegen() const { return false; } + virtual bool isGPU() const { return false; } + + /// Check if the variable length declaration is delayed: + virtual bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, + const VarDecl *VD) const { + return false; + }; + + /// Get call to __kmpc_alloc_shared + virtual std::pair<llvm::Value *, llvm::Value *> + getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) { + llvm_unreachable("not implemented"); + } + + /// Get call to __kmpc_free_shared + virtual void getKmpcFreeShared( + CodeGenFunction &CGF, + const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) { + llvm_unreachable("not implemented"); + } /// Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: @@ -732,26 +746,30 @@ public: /// Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). + /// \param CGF Reference to current CodeGenFunction. /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. virtual llvm::Function *emitParallelOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen); /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). + /// \param CGF Reference to current CodeGenFunction. /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. virtual llvm::Function *emitTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen); /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* @@ -1185,18 +1203,17 @@ public: bool HasCancel = false); /// Emits reduction function. + /// \param ReducerName Name of the function calling the reduction. /// \param ArgsElemType Array type containing pointers to reduction variables. /// \param Privates List of private copies for original reduction arguments. /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. - llvm::Function *emitReductionFunction(SourceLocation Loc, - llvm::Type *ArgsElemType, - ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps); + llvm::Function *emitReductionFunction( + StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, + ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps); /// Emits single reduction combiner void emitSingleReductionCombiner(CodeGenFunction &CGF, @@ -1441,9 +1458,9 @@ public: bool SeparateBeginEndCalls) : llvm::OpenMPIRBuilder::TargetDataInfo(RequiresDevicePointerInfo, SeparateBeginEndCalls) {} - /// Map between the a declaration of a capture and the corresponding base - /// pointer address where the runtime returns the device pointers. - llvm::DenseMap<const ValueDecl *, Address> CaptureDeviceAddrMap; + /// Map between the a declaration of a capture and the corresponding new + /// llvm address where the runtime returns the device pointers. + llvm::DenseMap<const ValueDecl *, llvm::Value *> CaptureDeviceAddrMap; }; /// Emit the target data mapping code associated with \a D. @@ -1489,6 +1506,11 @@ public: virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C); + /// Emit code for doacross ordered directive with 'doacross' clause. + /// \param C 'doacross' clause with 'sink|source' dependence type. + virtual void emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDoacrossClause *C); + /// Translates the native parameter of outlined function if this is required /// for target. /// \param FD Field decl from captured record for the parameter. @@ -1666,30 +1688,30 @@ public: /// Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). + /// \param CGF Reference to current CodeGenFunction. /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitParallelOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitParallelOutlinedFunction( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; /// Emits outlined function for the specified OpenMP teams directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). + /// \param CGF Reference to current CodeGenFunction. /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitTeamsOutlinedFunction( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; /// Emits outlined function for the OpenMP task directive \a D. This /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t* @@ -2242,6 +2264,11 @@ public: void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override; + /// Emit code for doacross ordered directive with 'doacross' clause. + /// \param C 'doacross' clause with 'sink|source' dependence type. + void emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDoacrossClause *C) override; + /// Translates the native parameter of outlined function if this is required /// for target. /// \param FD Field decl from captured record for the parameter. @@ -2264,6 +2291,34 @@ public: }; } // namespace CodeGen +// Utility for openmp doacross clause kind +namespace { +template <typename T> class OMPDoacrossKind { +public: + bool isSink(const T *) { return false; } + bool isSource(const T *) { return false; } +}; +template <> class OMPDoacrossKind<OMPDependClause> { +public: + bool isSink(const OMPDependClause *C) { + return C->getDependencyKind() == OMPC_DEPEND_sink; + } + bool isSource(const OMPDependClause *C) { + return C->getDependencyKind() == OMPC_DEPEND_source; + } +}; +template <> class OMPDoacrossKind<OMPDoacrossClause> { +public: + bool isSource(const OMPDoacrossClause *C) { + return C->getDependenceType() == OMPC_DOACROSS_source || + C->getDependenceType() == OMPC_DOACROSS_source_omp_cur_iteration; + } + bool isSink(const OMPDoacrossClause *C) { + return C->getDependenceType() == OMPC_DOACROSS_sink || + C->getDependenceType() == OMPC_DOACROSS_sink_omp_cur_iteration; + } +}; +} // namespace } // namespace clang #endif diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index e8c5f04db49f..62aacb9e24d6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -190,7 +190,7 @@ static RecordDecl *buildRecordForGlobalizedVars( IntegerLiteral::Create(C, Align, C.getIntTypeForBitwidth(32, /*Signed=*/0), SourceLocation()), - {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned)); + {}, AlignedAttr::GNU_aligned)); } GlobalizedRD->addDecl(Field); MappedDeclsFields.try_emplace(VD, Field); @@ -205,6 +205,7 @@ class CheckVarsEscapingDeclContext final CodeGenFunction &CGF; llvm::SetVector<const ValueDecl *> EscapedDecls; llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls; + llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls; llvm::SmallPtrSet<const Decl *, 4> EscapedParameters; RecordDecl *GlobalizedRD = nullptr; llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; @@ -221,10 +222,12 @@ class CheckVarsEscapingDeclContext final if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>()) return; // Variables captured by value must be globalized. + bool IsCaptured = false; if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { // Check if need to capture the variable that was already captured by // value in the outer region. + IsCaptured = true; if (!IsForCombinedParallelRegion) { if (!FD->hasAttrs()) return; @@ -251,9 +254,14 @@ class CheckVarsEscapingDeclContext final VD->getType()->isReferenceType()) // Do not globalize variables with reference type. return; - if (VD->getType()->isVariablyModifiedType()) - EscapedVariableLengthDecls.insert(VD); - else + if (VD->getType()->isVariablyModifiedType()) { + // If not captured at the target region level then mark the escaped + // variable as delayed. + if (IsCaptured) + EscapedVariableLengthDecls.insert(VD); + else + DelayedVariableLengthDecls.insert(VD); + } else EscapedDecls.insert(VD); } @@ -485,10 +493,7 @@ public: const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const { assert(GlobalizedRD && "Record for globalized variables must be generated already."); - auto I = MappedDeclsFields.find(VD); - if (I == MappedDeclsFields.end()) - return nullptr; - return I->getSecond(); + return MappedDeclsFields.lookup(VD); } /// Returns the list of the escaped local variables/parameters. @@ -507,6 +512,12 @@ public: ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const { return EscapedVariableLengthDecls.getArrayRef(); } + + /// Returns the list of the delayed variables with the variably modified + /// types. + ArrayRef<const ValueDecl *> getDelayedVariableLengthDecls() const { + return DelayedVariableLengthDecls.getArrayRef(); + } }; } // anonymous namespace @@ -528,6 +539,7 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; unsigned LaneIDBits = llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); + assert(LaneIDBits < 32 && "Invalid LaneIDBits size in NVPTX device."); unsigned LaneIDMask = ~0u >> (32u - LaneIDBits); auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), @@ -655,6 +667,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target: case OMPD_target_teams: return hasNestedSPMDDirective(Ctx, D); + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -859,13 +873,12 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) { - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true, - hasRequiresUnifiedSharedMemory(), + llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, + isGPU(), hasRequiresUnifiedSharedMemory(), CGM.getLangOpts().OpenMPOffloadMandatory); OMPBuilder.setConfig(Config); - OffloadEntriesInfoManager.setConfig(Config); - if (!CGM.getLangOpts().OpenMPIsDevice) + if (!CGM.getLangOpts().OpenMPIsTargetDevice) llvm_unreachable("OpenMP can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); @@ -906,14 +919,15 @@ void CGOpenMPRuntimeGPU::emitNumTeamsClause(CodeGenFunction &CGF, SourceLocation Loc) {} llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. bool PrevIsInTTDRegion = IsInTTDRegion; IsInTTDRegion = false; auto *OutlinedFun = cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen)); + CGF, D, ThreadIDVar, InnermostKind, CodeGen)); IsInTTDRegion = PrevIsInTTDRegion; if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) { llvm::Function *WrapperFun = @@ -963,8 +977,9 @@ getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, } llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { SourceLocation Loc = D.getBeginLoc(); const RecordDecl *GlobalizedRD = nullptr; @@ -1025,7 +1040,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( } Action(Loc, GlobalizedRD, MappedDeclsFields); CodeGen.setAction(Action); llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( - D, ThreadIDVar, InnermostKind, CodeGen); + CGF, D, ThreadIDVar, InnermostKind, CodeGen); return OutlinedFun; } @@ -1083,41 +1098,66 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, if (auto *DI = CGF.getDebugInfo()) VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->getLocation())); } - for (const auto *VD : I->getSecond().EscapedVariableLengthDecls) { - // Use actual memory size of the VLA object including the padding - // for alignment purposes. - llvm::Value *Size = CGF.getTypeSize(VD->getType()); - CharUnits Align = CGM.getContext().getDeclAlign(VD); - Size = Bld.CreateNUWAdd( - Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1)); - llvm::Value *AlignVal = - llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity()); - - Size = Bld.CreateUDiv(Size, AlignVal); - Size = Bld.CreateNUWMul(Size, AlignVal); - // Allocate space for this VLA object to be globalized. - llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())}; - llvm::CallBase *VoidPtr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc_shared), - AllocArgs, VD->getName()); - VoidPtr->addRetAttr( - llvm::Attribute::get(CGM.getLLVMContext(), llvm::Attribute::Alignment, - CGM.getContext().getTargetInfo().getNewAlign())); - - I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back( - std::pair<llvm::Value *, llvm::Value *>( - {VoidPtr, CGF.getTypeSize(VD->getType())})); - LValue Base = CGF.MakeAddrLValue(VoidPtr, VD->getType(), + for (const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) { + const auto *VD = cast<VarDecl>(ValueD); + std::pair<llvm::Value *, llvm::Value *> AddrSizePair = + getKmpcAllocShared(CGF, VD); + I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair); + LValue Base = CGF.MakeAddrLValue(AddrSizePair.first, VD->getType(), CGM.getContext().getDeclAlign(VD), AlignmentSource::Decl); - I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD), - Base.getAddress(CGF)); + I->getSecond().MappedParams->setVarAddr(CGF, VD, Base.getAddress(CGF)); } I->getSecond().MappedParams->apply(CGF); } +bool CGOpenMPRuntimeGPU::isDelayedVariableLengthDecl(CodeGenFunction &CGF, + const VarDecl *VD) const { + const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); + if (I == FunctionGlobalizedDecls.end()) + return false; + + // Check variable declaration is delayed: + return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD); +} + +std::pair<llvm::Value *, llvm::Value *> +CGOpenMPRuntimeGPU::getKmpcAllocShared(CodeGenFunction &CGF, + const VarDecl *VD) { + CGBuilderTy &Bld = CGF.Builder; + + // Compute size and alignment. + llvm::Value *Size = CGF.getTypeSize(VD->getType()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + Size = Bld.CreateNUWAdd( + Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1)); + llvm::Value *AlignVal = + llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity()); + Size = Bld.CreateUDiv(Size, AlignVal); + Size = Bld.CreateNUWMul(Size, AlignVal); + + // Allocate space for this VLA object to be globalized. + llvm::Value *AllocArgs[] = {Size}; + llvm::CallBase *VoidPtr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_shared), + AllocArgs, VD->getName()); + VoidPtr->addRetAttr(llvm::Attribute::get( + CGM.getLLVMContext(), llvm::Attribute::Alignment, Align.getQuantity())); + + return std::make_pair(VoidPtr, Size); +} + +void CGOpenMPRuntimeGPU::getKmpcFreeShared( + CodeGenFunction &CGF, + const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) { + // Deallocate the memory for each globalized VLA object + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free_shared), + {AddrSizePair.first, AddrSizePair.second}); +} + void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck) { if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic && @@ -1126,8 +1166,9 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); if (I != FunctionGlobalizedDecls.end()) { - // Deallocate the memory for each globalized VLA object - for (auto AddrSizePair : + // Deallocate the memory for each globalized VLA object that was + // globalized in the prolog (i.e. emitGenericVarsProlog). + for (const auto &AddrSizePair : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free_shared), @@ -1555,10 +1596,9 @@ static void emitReductionListCopy( case RemoteLaneToThread: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = - CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( - SrcElementPtrAddr, PrivateLlvmPtrType), - PrivatePtrType->castAs<PointerType>()); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr.withElementType(PrivateLlvmPtrType), + PrivatePtrType->castAs<PointerType>()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. @@ -1572,27 +1612,24 @@ static void emitReductionListCopy( case ThreadCopy: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = - CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( - SrcElementPtrAddr, PrivateLlvmPtrType), - PrivatePtrType->castAs<PointerType>()); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr.withElementType(PrivateLlvmPtrType), + PrivatePtrType->castAs<PointerType>()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); - DestElementAddr = - CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( - DestElementPtrAddr, PrivateLlvmPtrType), - PrivatePtrType->castAs<PointerType>()); + DestElementAddr = CGF.EmitLoadOfPointer( + DestElementPtrAddr.withElementType(PrivateLlvmPtrType), + PrivatePtrType->castAs<PointerType>()); break; } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); - SrcElementAddr = - CGF.EmitLoadOfPointer(CGF.Builder.CreateElementBitCast( - SrcElementPtrAddr, PrivateLlvmPtrType), - PrivatePtrType->castAs<PointerType>()); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr.withElementType(PrivateLlvmPtrType), + PrivatePtrType->castAs<PointerType>()); // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. @@ -1634,10 +1671,10 @@ static void emitReductionListCopy( // Regardless of src and dest of copy, we emit the load of src // element as this is required in all directions - SrcElementAddr = Bld.CreateElementBitCast( - SrcElementAddr, CGF.ConvertTypeForMem(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast(DestElementAddr, - SrcElementAddr.getElementType()); + SrcElementAddr = SrcElementAddr.withElementType( + CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = + DestElementAddr.withElementType(SrcElementAddr.getElementType()); // Now that all active lanes have read the element in the // Reduce list, shuffle over the value from the remote lane. @@ -1866,8 +1903,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); // elemptr = ((CopyType*)(elemptrptr)) + I - Address ElemPtr(ElemPtrPtr, CGF.Int8Ty, Align); - ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType); + Address ElemPtr(ElemPtrPtr, CopyType, Align); if (NumIters > 1) ElemPtr = Bld.CreateGEP(ElemPtr, Cnt); @@ -1941,8 +1977,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); - Address TargetElemPtr(TargetElemPtrVal, CGF.Int8Ty, Align); - TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType); + Address TargetElemPtr(TargetElemPtrVal, CopyType, Align); if (NumIters > 1) TargetElemPtr = Bld.CreateGEP(TargetElemPtr, Cnt); @@ -2373,8 +2408,7 @@ static llvm::Value *emitListToGlobalReduceFunction( Address GlobAddr = GlobLVal.getAddress(CGF); llvm::Value *BufferPtr = Bld.CreateInBoundsGEP( GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs); - llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); - CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); + CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; @@ -2390,8 +2424,7 @@ static llvm::Value *emitListToGlobalReduceFunction( } // Call reduce_function(GlobalReduceList, ReduceList) - llvm::Value *GlobalReduceList = - CGF.EmitCastToVoidPtr(ReductionList.getPointer()); + llvm::Value *GlobalReduceList = ReductionList.getPointer(); Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc); @@ -2583,8 +2616,7 @@ static llvm::Value *emitGlobalToListReduceFunction( Address GlobAddr = GlobLVal.getAddress(CGF); llvm::Value *BufferPtr = Bld.CreateInBoundsGEP( GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs); - llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); - CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); + CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; @@ -2600,8 +2632,7 @@ static llvm::Value *emitGlobalToListReduceFunction( } // Call reduce_function(ReduceList, GlobalReduceList) - llvm::Value *GlobalReduceList = - CGF.EmitCastToVoidPtr(ReductionList.getPointer()); + llvm::Value *GlobalReduceList = ReductionList.getPointer(); Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc); @@ -2923,9 +2954,9 @@ void CGOpenMPRuntimeGPU::emitReduction( llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( ReductionList.getPointer(), CGF.VoidPtrTy); - llvm::Function *ReductionFn = - emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), - Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Function *ReductionFn = emitReductionFunction( + CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), + Privates, LHSExprs, RHSExprs, ReductionOps); llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction( CGM, Privates, ReductionArrayTy, ReductionFn, Loc); @@ -3085,17 +3116,16 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF, unsigned NativePointeeAddrSpace = CGF.getTypes().getTargetAddressSpace(NativePointeeTy); QualType TargetTy = TargetParam->getType(); - llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( - LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); + llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, /*Volatile=*/false, + TargetTy, SourceLocation()); // First cast to generic. TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TargetAddr, llvm::PointerType::getWithSamePointeeType( - cast<llvm::PointerType>(TargetAddr->getType()), /*AddrSpace=*/0)); + TargetAddr, + llvm::PointerType::get(CGF.getLLVMContext(), /*AddrSpace=*/0)); // Cast from generic to native address space. TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TargetAddr, llvm::PointerType::getWithSamePointeeType( - cast<llvm::PointerType>(TargetAddr->getType()), - NativePointeeAddrSpace)); + TargetAddr, + llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace)); Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType); CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false, NativeParamType); @@ -3120,8 +3150,8 @@ void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall( continue; } llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - NativeArg, llvm::PointerType::getWithSamePointeeType( - cast<llvm::PointerType>(NativeArg->getType()), /*AddrSpace*/ 0)); + NativeArg, + llvm::PointerType::get(CGF.getLLVMContext(), /*AddrSpace*/ 0)); TargetArgs.emplace_back( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); } @@ -3292,7 +3322,10 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, TeamAndReductions.second.clear(); ArrayRef<const ValueDecl *> EscapedVariableLengthDecls = VarChecker.getEscapedVariableLengthDecls(); - if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty()) + ArrayRef<const ValueDecl *> DelayedVariableLengthDecls = + VarChecker.getDelayedVariableLengthDecls(); + if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() && + DelayedVariableLengthDecls.empty()) return; auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().MappedParams = @@ -3302,6 +3335,8 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, VarChecker.getEscapedParameters().end()); I->getSecond().EscapedVariableLengthDecls.append( EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end()); + I->getSecond().DelayedVariableLengthDecls.append( + DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end()); DeclToAddrMapTy &Data = I->getSecond().LocalVarData; for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { assert(VD->isCanonicalDecl() && "Expected canonical declaration"); @@ -3352,7 +3387,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF, llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); auto *GV = new llvm::GlobalVariable( CGM.getModule(), VarTy, /*isConstant=*/false, - llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy), + llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy), VD->getName(), /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(AS)); @@ -3579,6 +3614,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::GFX90a: case CudaArch::GFX90c: case CudaArch::GFX940: + case CudaArch::GFX941: + case CudaArch::GFX942: case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: @@ -3594,6 +3631,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::GFX1101: case CudaArch::GFX1102: case CudaArch::GFX1103: + case CudaArch::GFX1150: + case CudaArch::GFX1151: case CudaArch::Generic: case CudaArch::UNUSED: case CudaArch::UNKNOWN: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 75d140205773..dddfe5a94dcc 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -110,45 +110,7 @@ private: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override; - /// Emits code for parallel or serial call of the \a OutlinedFn with - /// variables captured in a record which address is stored in \a - /// CapturedStruct. - /// This call is for the Non-SPMD Execution Mode. - /// \param OutlinedFn Outlined function to be run in parallel threads. Type of - /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). - /// \param CapturedVars A pointer to the record with the references to - /// variables used in \a OutlinedFn function. - /// \param IfCond Condition in the associated 'if' clause, if it was - /// specified, nullptr otherwise. - void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, - ArrayRef<llvm::Value *> CapturedVars, - const Expr *IfCond); - - /// Emits code for parallel or serial call of the \a OutlinedFn with - /// variables captured in a record which address is stored in \a - /// CapturedStruct. - /// This call is for a parallel directive within an SPMD target directive. - /// \param OutlinedFn Outlined function to be run in parallel threads. Type of - /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). - /// \param CapturedVars A pointer to the record with the references to - /// variables used in \a OutlinedFn function. - /// \param IfCond Condition in the associated 'if' clause, if it was - /// specified, nullptr otherwise. - /// - void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef<llvm::Value *> CapturedVars, - const Expr *IfCond); - protected: - /// Get the function name of an outlined region. - // The name can be customized depending on the target. - // - StringRef getOutlinedHelperName() const override { - return "__omp_outlined__"; - } - /// Check if the default location must be constant. /// Constant for NVPTX for better optimization. bool isDefaultLocationConstant() const override { return true; } @@ -157,12 +119,25 @@ public: explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); void clear() override; - bool isTargetCodegen() const override { return true; }; + bool isGPU() const override { return true; }; /// Declare generalized virtual functions which need to be defined /// by all specializations of OpenMPGPURuntime Targets like AMDGCN /// and NVPTX. + /// Check if the variable length declaration is delayed: + bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, + const VarDecl *VD) const override; + + /// Get call to __kmpc_alloc_shared + std::pair<llvm::Value *, llvm::Value *> + getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override; + + /// Get call to __kmpc_free_shared + void getKmpcFreeShared( + CodeGenFunction &CGF, + const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) override; + /// Get the GPU warp size. llvm::Value *getGPUWarpSize(CodeGenFunction &CGF); @@ -197,31 +172,31 @@ public: // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). + /// \param CGF Reference to current CodeGenFunction. /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitParallelOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitParallelOutlinedFunction( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; /// Emits inlined function for the specified OpenMP teams // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). + /// \param CGF Reference to current CodeGenFunction. /// \param D OpenMP directive. /// \param ThreadIDVar Variable for thread id in the current OpenMP region. /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Function * - emitTeamsOutlinedFunction(const OMPExecutableDirective &D, - const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) override; + llvm::Function *emitTeamsOutlinedFunction( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -294,12 +269,6 @@ public: ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) override; - /// Returns specified OpenMP runtime function for the current OpenMP - /// implementation. Specialized for the NVPTX device. - /// \param Function OpenMP runtime function. - /// \return Specified function. - llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function); - /// Translates the native parameter of outlined function if this is required /// for target. /// \param FD Field decl from captured record for the parameter. @@ -403,6 +372,7 @@ private: DeclToAddrMapTy LocalVarData; EscapedParamsTy EscapedParameters; llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; + llvm::SmallVector<const ValueDecl *, 4> DelayedVariableLengthDecls; llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> EscapedVariableLengthDeclsAddrs; std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 596f0bd33204..888b7ddcccd3 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -182,7 +182,7 @@ struct CGRecordLowering { llvm::Type *StorageType); /// Lowers an ASTRecordLayout to a llvm type. void lower(bool NonVirtualBaseType); - void lowerUnion(); + void lowerUnion(bool isNoUniqueAddress); void accumulateFields(); void accumulateBitFields(RecordDecl::field_iterator Field, RecordDecl::field_iterator FieldEnd); @@ -280,7 +280,7 @@ void CGRecordLowering::lower(bool NVBaseType) { // CodeGenTypes::ComputeRecordLayout. CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize(); if (D->isUnion()) { - lowerUnion(); + lowerUnion(NVBaseType); computeVolatileBitfields(); return; } @@ -308,8 +308,9 @@ void CGRecordLowering::lower(bool NVBaseType) { computeVolatileBitfields(); } -void CGRecordLowering::lowerUnion() { - CharUnits LayoutSize = Layout.getSize(); +void CGRecordLowering::lowerUnion(bool isNoUniqueAddress) { + CharUnits LayoutSize = + isNoUniqueAddress ? Layout.getDataSize() : Layout.getSize(); llvm::Type *StorageType = nullptr; bool SeenNamedMember = false; // Iterate through the fields setting bitFieldInfo and the Fields array. Also @@ -365,7 +366,12 @@ void CGRecordLowering::lowerUnion() { FieldTypes.push_back(StorageType); appendPaddingBytes(LayoutSize - getSize(StorageType)); // Set packed if we need it. - if (LayoutSize % getAlignment(StorageType)) + const auto StorageAlignment = getAlignment(StorageType); + assert((Layout.getSize() % StorageAlignment == 0 || + Layout.getDataSize() % StorageAlignment) && + "Union's standard layout and no_unique_address layout must agree on " + "packedness"); + if (Layout.getDataSize() % StorageAlignment) Packed = true; } @@ -379,9 +385,14 @@ void CGRecordLowering::accumulateFields() { for (++Field; Field != FieldEnd && Field->isBitField(); ++Field); accumulateBitFields(Start, Field); } else if (!Field->isZeroSize(Context)) { + // Use base subobject layout for the potentially-overlapping field, + // as it is done in RecordLayoutBuilder Members.push_back(MemberInfo( bitsToCharUnits(getFieldBitOffset(*Field)), MemberInfo::Field, - getStorageType(*Field), *Field)); + Field->isPotentiallyOverlapping() + ? getStorageType(Field->getType()->getAsCXXRecordDecl()) + : getStorageType(*Field), + *Field)); ++Field; } else { ++Field; @@ -882,7 +893,7 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { // If we're in C++, compute the base subobject type. llvm::StructType *BaseTy = nullptr; - if (isa<CXXRecordDecl>(D) && !D->isUnion() && !D->hasAttr<FinalAttr>()) { + if (isa<CXXRecordDecl>(D)) { BaseTy = Ty; if (Builder.Layout.getNonVirtualSize() != Builder.Layout.getSize()) { CGRecordLowering BaseBuilder(*this, D, /*Packed=*/Builder.Packed); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 248ffb544014..2184b8600d76 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -24,6 +24,8 @@ #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Assumptions.h" @@ -414,19 +416,22 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S)); break; case Stmt::OMPTeamsGenericLoopDirectiveClass: - llvm_unreachable("teams loop directive not supported yet."); + EmitOMPTeamsGenericLoopDirective(cast<OMPTeamsGenericLoopDirective>(*S)); break; case Stmt::OMPTargetTeamsGenericLoopDirectiveClass: - llvm_unreachable("target teams loop directive not supported yet."); + EmitOMPTargetTeamsGenericLoopDirective( + cast<OMPTargetTeamsGenericLoopDirective>(*S)); break; case Stmt::OMPParallelGenericLoopDirectiveClass: - llvm_unreachable("parallel loop directive not supported yet."); + EmitOMPParallelGenericLoopDirective( + cast<OMPParallelGenericLoopDirective>(*S)); break; case Stmt::OMPTargetParallelGenericLoopDirectiveClass: - llvm_unreachable("target parallel loop directive not supported yet."); + EmitOMPTargetParallelGenericLoopDirective( + cast<OMPTargetParallelGenericLoopDirective>(*S)); break; case Stmt::OMPParallelMaskedDirectiveClass: - llvm_unreachable("parallel masked directive not supported yet."); + EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S)); break; } } @@ -2191,9 +2196,9 @@ std::pair<llvm::Value*, llvm::Type *> CodeGenFunction::EmitAsmInputLValue( getTargetHooks().isScalarizableAsmOperand(*this, Ty)) { Ty = llvm::IntegerType::get(getLLVMContext(), Size); - return {Builder.CreateLoad(Builder.CreateElementBitCast( - InputValue.getAddress(*this), Ty)), - nullptr}; + return { + Builder.CreateLoad(InputValue.getAddress(*this).withElementType(Ty)), + nullptr}; } } @@ -2327,6 +2332,92 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, } } +static void +EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, + const llvm::ArrayRef<llvm::Value *> RegResults, + const llvm::ArrayRef<llvm::Type *> ResultRegTypes, + const llvm::ArrayRef<llvm::Type *> ResultTruncRegTypes, + const llvm::ArrayRef<LValue> ResultRegDests, + const llvm::ArrayRef<QualType> ResultRegQualTys, + const llvm::BitVector &ResultTypeRequiresCast, + const llvm::BitVector &ResultRegIsFlagReg) { + CGBuilderTy &Builder = CGF.Builder; + CodeGenModule &CGM = CGF.CGM; + llvm::LLVMContext &CTX = CGF.getLLVMContext(); + + assert(RegResults.size() == ResultRegTypes.size()); + assert(RegResults.size() == ResultTruncRegTypes.size()); + assert(RegResults.size() == ResultRegDests.size()); + // ResultRegDests can be also populated by addReturnRegisterOutputs() above, + // in which case its size may grow. + assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); + assert(ResultRegIsFlagReg.size() <= ResultRegDests.size()); + + for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { + llvm::Value *Tmp = RegResults[i]; + llvm::Type *TruncTy = ResultTruncRegTypes[i]; + + if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) { + // Target must guarantee the Value `Tmp` here is lowered to a boolean + // value. + llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2); + llvm::Value *IsBooleanValue = + Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two); + llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume); + Builder.CreateCall(FnAssume, IsBooleanValue); + } + + // If the result type of the LLVM IR asm doesn't match the result type of + // the expression, do the conversion. + if (ResultRegTypes[i] != TruncTy) { + + // Truncate the integer result to the right size, note that TruncTy can be + // a pointer. + if (TruncTy->isFloatingPointTy()) + Tmp = Builder.CreateFPTrunc(Tmp, TruncTy); + else if (TruncTy->isPointerTy() && Tmp->getType()->isIntegerTy()) { + uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy); + Tmp = Builder.CreateTrunc( + Tmp, llvm::IntegerType::get(CTX, (unsigned)ResSize)); + Tmp = Builder.CreateIntToPtr(Tmp, TruncTy); + } else if (Tmp->getType()->isPointerTy() && TruncTy->isIntegerTy()) { + uint64_t TmpSize = + CGM.getDataLayout().getTypeSizeInBits(Tmp->getType()); + Tmp = Builder.CreatePtrToInt( + Tmp, llvm::IntegerType::get(CTX, (unsigned)TmpSize)); + Tmp = Builder.CreateTrunc(Tmp, TruncTy); + } else if (TruncTy->isIntegerTy()) { + Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); + } else if (TruncTy->isVectorTy()) { + Tmp = Builder.CreateBitCast(Tmp, TruncTy); + } + } + + LValue Dest = ResultRegDests[i]; + // ResultTypeRequiresCast elements correspond to the first + // ResultTypeRequiresCast.size() elements of RegResults. + if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { + unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); + Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); + if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) { + Builder.CreateStore(Tmp, A); + continue; + } + + QualType Ty = + CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false); + if (Ty.isNull()) { + const Expr *OutExpr = S.getOutputExpr(i); + CGM.getDiags().Report(OutExpr->getExprLoc(), + diag::err_store_value_to_reg); + return; + } + Dest = CGF.MakeAddrLValue(A, Ty); + } + CGF.EmitStoreThroughLValue(RValue::get(Tmp), Dest); + } +} + void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Pop all cleanup blocks at the end of the asm statement. CodeGenFunction::RunCleanupsScope Cleanups(*this); @@ -2487,8 +2578,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Otherwise there will be a mis-match if the matrix is also an // input-argument which is represented as vector. if (isa<MatrixType>(OutExpr->getType().getCanonicalType())) - DestAddr = Builder.CreateElementBitCast( - DestAddr, ConvertType(OutExpr->getType())); + DestAddr = DestAddr.withElementType(ConvertType(OutExpr->getType())); ArgTypes.push_back(DestAddr.getType()); ArgElemTypes.push_back(DestAddr.getElementType()); @@ -2627,7 +2717,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { SmallVector<llvm::BasicBlock *, 16> Transfer; llvm::BasicBlock *Fallthrough = nullptr; bool IsGCCAsmGoto = false; - if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) { + if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) { IsGCCAsmGoto = GS->isAsmGoto(); if (IsGCCAsmGoto) { for (const auto *E : GS->labels()) { @@ -2690,7 +2780,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { "unwind clobber can't be used with asm goto"); // Add machine specific clobbers - std::string MachineClobbers = getTarget().getClobbers(); + std::string_view MachineClobbers = getTarget().getClobbers(); if (!MachineClobbers.empty()) { if (!Constraints.empty()) Constraints += ','; @@ -2721,13 +2811,40 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect, HasUnwindClobber); std::vector<llvm::Value*> RegResults; + llvm::CallBrInst *CBR; + llvm::DenseMap<llvm::BasicBlock *, SmallVector<llvm::Value *, 4>> + CBRRegResults; if (IsGCCAsmGoto) { - llvm::CallBrInst *Result = - Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); + CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); EmitBlock(Fallthrough); - UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, false, - ReadOnly, ReadNone, InNoMergeAttributedStmt, S, - ResultRegTypes, ArgElemTypes, *this, RegResults); + UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone, + InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, + *this, RegResults); + // Because we are emitting code top to bottom, we don't have enough + // information at this point to know precisely whether we have a critical + // edge. If we have outputs, split all indirect destinations. + if (!RegResults.empty()) { + unsigned i = 0; + for (llvm::BasicBlock *Dest : CBR->getIndirectDests()) { + llvm::Twine SynthName = Dest->getName() + ".split"; + llvm::BasicBlock *SynthBB = createBasicBlock(SynthName); + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(SynthBB); + + if (ResultRegTypes.size() == 1) { + CBRRegResults[SynthBB].push_back(CBR); + } else { + for (unsigned j = 0, e = ResultRegTypes.size(); j != e; ++j) { + llvm::Value *Tmp = Builder.CreateExtractValue(CBR, j, "asmresult"); + CBRRegResults[SynthBB].push_back(Tmp); + } + } + + EmitBranch(Dest); + EmitBlock(SynthBB); + CBR->setIndirectDest(i++, SynthBB); + } + } } else if (HasUnwindClobber) { llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, ""); UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone, @@ -2736,79 +2853,26 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } else { llvm::CallInst *Result = Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); - UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, false, - ReadOnly, ReadNone, InNoMergeAttributedStmt, S, - ResultRegTypes, ArgElemTypes, *this, RegResults); + UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone, + InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes, + *this, RegResults); } - assert(RegResults.size() == ResultRegTypes.size()); - assert(RegResults.size() == ResultTruncRegTypes.size()); - assert(RegResults.size() == ResultRegDests.size()); - // ResultRegDests can be also populated by addReturnRegisterOutputs() above, - // in which case its size may grow. - assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); - assert(ResultRegIsFlagReg.size() <= ResultRegDests.size()); - for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { - llvm::Value *Tmp = RegResults[i]; - llvm::Type *TruncTy = ResultTruncRegTypes[i]; - - if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) { - // Target must guarantee the Value `Tmp` here is lowered to a boolean - // value. - llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2); - llvm::Value *IsBooleanValue = - Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two); - llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume); - Builder.CreateCall(FnAssume, IsBooleanValue); - } - - // If the result type of the LLVM IR asm doesn't match the result type of - // the expression, do the conversion. - if (ResultRegTypes[i] != ResultTruncRegTypes[i]) { - - // Truncate the integer result to the right size, note that TruncTy can be - // a pointer. - if (TruncTy->isFloatingPointTy()) - Tmp = Builder.CreateFPTrunc(Tmp, TruncTy); - else if (TruncTy->isPointerTy() && Tmp->getType()->isIntegerTy()) { - uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy); - Tmp = Builder.CreateTrunc(Tmp, - llvm::IntegerType::get(getLLVMContext(), (unsigned)ResSize)); - Tmp = Builder.CreateIntToPtr(Tmp, TruncTy); - } else if (Tmp->getType()->isPointerTy() && TruncTy->isIntegerTy()) { - uint64_t TmpSize =CGM.getDataLayout().getTypeSizeInBits(Tmp->getType()); - Tmp = Builder.CreatePtrToInt(Tmp, - llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize)); - Tmp = Builder.CreateTrunc(Tmp, TruncTy); - } else if (TruncTy->isIntegerTy()) { - Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); - } else if (TruncTy->isVectorTy()) { - Tmp = Builder.CreateBitCast(Tmp, TruncTy); - } - } + EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes, + ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast, + ResultRegIsFlagReg); - LValue Dest = ResultRegDests[i]; - // ResultTypeRequiresCast elements correspond to the first - // ResultTypeRequiresCast.size() elements of RegResults. - if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { - unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); - Address A = Builder.CreateElementBitCast(Dest.getAddress(*this), - ResultRegTypes[i]); - if (getTargetHooks().isScalarizableAsmOperand(*this, TruncTy)) { - Builder.CreateStore(Tmp, A); - continue; - } - - QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false); - if (Ty.isNull()) { - const Expr *OutExpr = S.getOutputExpr(i); - CGM.getDiags().Report(OutExpr->getExprLoc(), - diag::err_store_value_to_reg); - return; - } - Dest = MakeAddrLValue(A, Ty); + // If this is an asm goto with outputs, repeat EmitAsmStores, but with a + // different insertion point; one for each indirect destination and with + // CBRRegResults rather than RegResults. + if (IsGCCAsmGoto && !CBRRegResults.empty()) { + for (llvm::BasicBlock *Succ : CBR->getIndirectDests()) { + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(Succ, --(Succ->end())); + EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes, + ResultTruncRegTypes, ResultRegDests, ResultRegQualTys, + ResultTypeRequiresCast, ResultRegIsFlagReg); } - EmitStoreThroughLValue(RValue::get(Tmp), Dest); } } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 6bc30ad0302e..4910ff6865e4 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -704,7 +704,7 @@ void CodeGenFunction::EmitOMPAggregateAssign( // Drill down to the base element type on both arrays. const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); - SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); llvm::Value *SrcBegin = SrcAddr.getPointer(); llvm::Value *DestBegin = DestAddr.getPointer(); @@ -802,7 +802,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, if (!HaveInsertPoint()) return false; bool DeviceConstTarget = - getLangOpts().OpenMPIsDevice && + getLangOpts().OpenMPIsTargetDevice && isOpenMPTargetExecutionDirective(D.getDirectiveKind()); bool FirstprivateIsLastprivate = false; llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; @@ -1266,10 +1266,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit( // implicit variable. PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress(*this)); - PrivateScope.addPrivate(RHSVD, Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), - "rhs.begin")); + PrivateScope.addPrivate(RHSVD, + GetAddrOfLocalVar(PrivateVD).withElementType( + ConvertTypeForMem(RHSVD->getType()))); } else { QualType Type = PrivateVD->getType(); bool IsArray = getContext().getAsArrayType(Type) != nullptr; @@ -1277,14 +1276,13 @@ void CodeGenFunction::EmitOMPReductionClauseInit( // Store the address of the original variable associated with the LHS // implicit variable. if (IsArray) { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); + OriginalAddr = + OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); } PrivateScope.addPrivate(LHSVD, OriginalAddr); PrivateScope.addPrivate( - RHSVD, IsArray ? Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( + ConvertTypeForMem(RHSVD->getType())) : GetAddrOfLocalVar(PrivateVD)); } ++ILHS; @@ -1547,7 +1545,8 @@ static void emitCommonOMPParallelDirective( llvm::Value *NumThreads = nullptr; llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, + CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), @@ -1821,6 +1820,11 @@ class OMPTransformDirectiveScopeRAII { CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; + OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = + delete; + OMPTransformDirectiveScopeRAII & + operator=(const OMPTransformDirectiveScopeRAII &) = delete; + public: OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { @@ -4489,6 +4493,33 @@ void CodeGenFunction::EmitOMPParallelMasterDirective( checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPParallelMaskedDirective( + const OMPParallelMaskedDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'masked' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + emitOMPCopyinClause(CGF, S); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + emitMasked(CGF, S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); +} + void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit @@ -4826,6 +4857,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // a pointer to this memory. for (auto &Pair : UntiedLocalVars) { QualType VDType = Pair.first->getType().getNonReferenceType(); + if (Pair.first->getType()->isLValueReferenceType()) + VDType = CGF.getContext().getPointerType(VDType); if (isAllocatableDecl(Pair.first)) { llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); Address Replacement( @@ -5811,37 +5844,46 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, return Fn; } +template <typename T> +static void emitRestoreIP(CodeGenFunction &CGF, const T *C, + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, + llvm::OpenMPIRBuilder &OMPBuilder) { + + unsigned NumLoops = C->getNumLoops(); + QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/64, /*Signed=*/1); + llvm::SmallVector<llvm::Value *> StoreValues; + for (unsigned I = 0; I < NumLoops; I++) { + const Expr *CounterVal = C->getLoopData(I); + assert(CounterVal); + llvm::Value *StoreValue = CGF.EmitScalarConversion( + CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + StoreValues.emplace_back(StoreValue); + } + OMPDoacrossKind<T> ODK; + bool IsDependSource = ODK.isSource(C); + CGF.Builder.restoreIP( + OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, + StoreValues, ".cnt.addr", IsDependSource)); +} + void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - if (S.hasClausesOfKind<OMPDependClause>()) { + if (S.hasClausesOfKind<OMPDependClause>() || + S.hasClausesOfKind<OMPDoacrossClause>()) { // The ordered directive with depend clause. - assert(!S.hasAssociatedStmt() && - "No associated statement must be in ordered depend construct."); + assert(!S.hasAssociatedStmt() && "No associated statement must be in " + "ordered depend|doacross construct."); InsertPointTy AllocaIP(AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) { - unsigned NumLoops = DC->getNumLoops(); - QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth( - /*DestWidth=*/64, /*Signed=*/1); - llvm::SmallVector<llvm::Value *> StoreValues; - for (unsigned I = 0; I < NumLoops; I++) { - const Expr *CounterVal = DC->getLoopData(I); - assert(CounterVal); - llvm::Value *StoreValue = EmitScalarConversion( - EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, - CounterVal->getExprLoc()); - StoreValues.emplace_back(StoreValue); - } - bool IsDependSource = false; - if (DC->getDependencyKind() == OMPC_DEPEND_source) - IsDependSource = true; - Builder.restoreIP(OMPBuilder.createOrderedDepend( - Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr", - IsDependSource)); - } + for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) + emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); + for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) + emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); } else { // The ordered directive with threads or simd clause, or without clause. // Without clause, it behaves as if the threads clause is specified. @@ -5888,6 +5930,13 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); return; } + if (S.hasClausesOfKind<OMPDoacrossClause>()) { + assert(!S.hasAssociatedStmt() && + "No associated statement must be in ordered doacross construct."); + for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) + CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); + return; + } const auto *C = S.getSingleClause<OMPSIMDClause>(); auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -6533,7 +6582,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, CodeGenModule &CGM = CGF.CGM; // On device emit this construct as inlined code. - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { OMPLexicalScope Scope(CGF, S, OMPD_target); CGM.getOpenMPRuntime().emitInlinedDirective( CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -6656,7 +6705,8 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( - S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, + CodeGen); const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); @@ -6923,27 +6973,27 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); llvm::Value *Device = nullptr; + llvm::Value *NumDependences = nullptr; + llvm::Value *DependenceList = nullptr; + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) Device = EmitScalarExpr(C->getDevice()); - llvm::Value *NumDependences = nullptr; - llvm::Value *DependenceAddress = nullptr; - if (const auto *DC = S.getSingleClause<OMPDependClause>()) { - OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), - DC->getModifier()); - Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); - std::pair<llvm::Value *, Address> DependencePair = - CGM.getOpenMPRuntime().emitDependClause(*this, Dependencies, - DC->getBeginLoc()); - NumDependences = DependencePair.first; - DependenceAddress = Builder.CreatePointerCast( - DependencePair.second.getPointer(), CGM.Int8PtrTy); + // Build list and emit dependences + OMPTaskDataTy Data; + buildDependences(S, Data); + if (!Data.Dependences.empty()) { + Address DependenciesArray = Address::invalid(); + std::tie(NumDependences, DependenciesArray) = + CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, + S.getBeginLoc()); + DependenceList = DependenciesArray.getPointer(); } + Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); - assert(!(S.hasClausesOfKind<OMPNowaitClause>() && - !(S.getSingleClause<OMPInitClause>() || - S.getSingleClause<OMPDestroyClause>() || - S.getSingleClause<OMPUseClause>())) && + assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || + S.getSingleClause<OMPDestroyClause>() || + S.getSingleClause<OMPUseClause>())) && "OMPNowaitClause clause is used separately in OMPInteropDirective."); if (const auto *C = S.getSingleClause<OMPInitClause>()) { @@ -6957,20 +7007,20 @@ void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { InteropType = llvm::omp::OMPInteropType::TargetSync; } OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, - NumDependences, DependenceAddress, - S.hasClausesOfKind<OMPNowaitClause>()); + NumDependences, DependenceList, + Data.HasNowaitClause); } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) { llvm::Value *InteropvarPtr = EmitLValue(C->getInteropVar()).getPointer(*this); OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, - NumDependences, DependenceAddress, - S.hasClausesOfKind<OMPNowaitClause>()); + NumDependences, DependenceList, + Data.HasNowaitClause); } else if (const auto *C = S.getSingleClause<OMPUseClause>()) { llvm::Value *InteropvarPtr = EmitLValue(C->getInteropVar()).getPointer(*this); OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, - NumDependences, DependenceAddress, - S.hasClausesOfKind<OMPNowaitClause>()); + NumDependences, DependenceList, + Data.HasNowaitClause); } } @@ -7129,14 +7179,13 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { void CodeGenFunction::EmitOMPUseDevicePtrClause( const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, - const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { - auto OrigVarIt = C.varlist_begin(); - auto InitIt = C.inits().begin(); - for (const Expr *PvtVarIt : C.private_copies()) { - const auto *OrigVD = - cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); - const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); - const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); + const llvm::DenseMap<const ValueDecl *, llvm::Value *> + CaptureDeviceAddrMap) { + llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; + for (const Expr *OrigVarIt : C.varlists()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); + if (!Processed.insert(OrigVD).second) + continue; // In order to identify the right initializer we need to match the // declaration used by the mapping logic. In some cases we may get @@ -7146,7 +7195,7 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( // OMPCapturedExprDecl are used to privative fields of the current // structure. const auto *ME = cast<MemberExpr>(OED->getInit()); - assert(isa<CXXThisExpr>(ME->getBase()) && + assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && "Base should be the current struct!"); MatchingVD = ME->getMemberDecl(); } @@ -7157,32 +7206,16 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( if (InitAddrIt == CaptureDeviceAddrMap.end()) continue; - // Initialize the temporary initialization variable with the address - // we get from the runtime library. We have to cast the source address - // because it is always a void *. References are materialized in the - // privatization scope, so the initialization here disregards the fact - // the original variable is a reference. llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); - Address InitAddr = Builder.CreateElementBitCast(InitAddrIt->second, Ty); - setAddrOfLocalVar(InitVD, InitAddr); - - // Emit private declaration, it will be initialized by the value we - // declaration we just added to the local declarations map. - EmitDecl(*PvtVD); - - // The initialization variables reached its purpose in the emission - // of the previous declaration, so we don't need it anymore. - LocalDeclMap.erase(InitVD); // Return the address of the private variable. - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(PvtVD)); + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, + Address(InitAddrIt->second, Ty, + getContext().getTypeAlignInChars(getContext().VoidPtrTy))); assert(IsRegistered && "firstprivate var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; - - ++OrigVarIt; - ++InitIt; } } @@ -7197,7 +7230,8 @@ static const VarDecl *getBaseDecl(const Expr *Ref) { void CodeGenFunction::EmitOMPUseDeviceAddrClause( const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, - const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { + const llvm::DenseMap<const ValueDecl *, llvm::Value *> + CaptureDeviceAddrMap) { llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; for (const Expr *Ref : C.varlists()) { const VarDecl *OrigVD = getBaseDecl(Ref); @@ -7222,16 +7256,20 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause( if (InitAddrIt == CaptureDeviceAddrMap.end()) continue; - Address PrivAddr = InitAddrIt->getSecond(); + llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); + + Address PrivAddr = + Address(InitAddrIt->second, Ty, + getContext().getTypeAlignInChars(getContext().VoidPtrTy)); // For declrefs and variable length array need to load the pointer for // correct mapping, since the pointer to the data was passed to the runtime. if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || MatchingVD->getType()->isArrayType()) { QualType PtrTy = getContext().getPointerType( OrigVD->getType().getNonReferenceType()); - PrivAddr = EmitLoadOfPointer( - Builder.CreateElementBitCast(PrivAddr, ConvertTypeForMem(PtrTy)), - PtrTy->castAs<PointerType>()); + PrivAddr = + EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), + PtrTy->castAs<PointerType>()); } (void)PrivateScope.addPrivate(OrigVD, PrivAddr); @@ -7260,16 +7298,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective( }; DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); - auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; // Codegen that selects whether to generate the privatization code or not. - auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, - &InnermostCodeGen](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { RegionCodeGenTy RCG(InnermostCodeGen); PrivatizeDevicePointers = false; @@ -7289,7 +7324,28 @@ void CodeGenFunction::EmitOMPTargetDataDirective( (void)PrivateScope.Privatize(); RCG(CGF); } else { - OMPLexicalScope Scope(CGF, S, OMPD_unknown); + // If we don't have target devices, don't bother emitting the data + // mapping code. + std::optional<OpenMPDirectiveKind> CaptureRegion; + if (CGM.getLangOpts().OMPTargetTriples.empty()) { + // Emit helper decls of the use_device_ptr/use_device_addr clauses. + for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) + for (const Expr *E : C->varlists()) { + const Decl *D = cast<DeclRefExpr>(E)->getDecl(); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } + for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) + for (const Expr *E : C->varlists()) { + const Decl *D = getBaseDecl(E); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } + } else { + CaptureRegion = OMPD_unknown; + } + + OMPLexicalScope Scope(CGF, S, CaptureRegion); RCG(CGF); } }; @@ -7789,6 +7845,148 @@ void CodeGenFunction::EmitOMPGenericLoopDirective( CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); } +void CodeGenFunction::EmitOMPParallelGenericLoopDirective( + const OMPLoopDirective &S) { + // Emit combined directive as if its consituent constructs are 'parallel' + // and 'for'. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + emitOMPCopyinClause(CGF, S); + (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); +} + +void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( + const OMPTeamsGenericLoopDirective &S) { + // To be consistent with current behavior of 'target teams loop', emit + // 'teams loop' as if its constituent constructs are 'distribute, + // 'parallel, and 'for'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +static void +emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, + const OMPTargetTeamsGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit 'teams loop' as if its constituent constructs are 'distribute, + // 'parallel, and 'for'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +/// Emit combined directive 'target teams loop' as if its constituent +/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. +void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( + const OMPTargetTeamsGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsGenericLoopDirective &S) { + // Emit SPMD target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && + "Target device function emission failed for 'target teams loop'."); +} + +static void emitTargetParallelGenericLoopRegion( + CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit as 'parallel for'. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S) { + // Emit target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +/// Emit combined directive 'target parallel loop' as if its constituent +/// constructs are 'target', 'parallel', and 'for'. +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { diff --git a/clang/lib/CodeGen/CGVTT.cpp b/clang/lib/CodeGen/CGVTT.cpp index d0c8e351626b..22790147c6f5 100644 --- a/clang/lib/CodeGen/CGVTT.cpp +++ b/clang/lib/CodeGen/CGVTT.cpp @@ -42,8 +42,8 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, llvm::GlobalVariable::LinkageTypes Linkage, const CXXRecordDecl *RD) { VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/true); - llvm::ArrayType *ArrayType = - llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size()); + llvm::ArrayType *ArrayType = llvm::ArrayType::get( + CGM.GlobalsInt8PtrTy, Builder.getVTTComponents().size()); SmallVector<llvm::GlobalVariable *, 8> VTables; SmallVector<VTableAddressPointsMapTy, 8> VTableAddressPoints; @@ -81,9 +81,6 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, VTable->getValueType(), VTable, Idxs, /*InBounds=*/true, /*InRangeIndex=*/1); - Init = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Init, - CGM.Int8PtrTy); - VTTComponents.push_back(Init); } @@ -112,9 +109,9 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/false); - llvm::ArrayType *ArrayType = - llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size()); - llvm::Align Align = CGM.getDataLayout().getABITypeAlign(CGM.Int8PtrTy); + llvm::ArrayType *ArrayType = llvm::ArrayType::get( + CGM.GlobalsInt8PtrTy, Builder.getVTTComponents().size()); + llvm::Align Align = CGM.getDataLayout().getABITypeAlign(CGM.GlobalsInt8PtrTy); llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align); diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index a0b5d9e4b096..91dd7a8e046b 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -690,7 +690,7 @@ bool CodeGenVTables::useRelativeLayout() const { llvm::Type *CodeGenModule::getVTableComponentType() const { if (UseRelativeLayout(*this)) return Int32Ty; - return Int8PtrTy; + return GlobalsInt8PtrTy; } llvm::Type *CodeGenVTables::getVTableComponentType() const { @@ -702,7 +702,7 @@ static void AddPointerLayoutOffset(const CodeGenModule &CGM, CharUnits offset) { builder.add(llvm::ConstantExpr::getIntToPtr( llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity()), - CGM.Int8PtrTy)); + CGM.GlobalsInt8PtrTy)); } static void AddRelativeLayoutOffset(const CodeGenModule &CGM, @@ -739,7 +739,7 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, vtableHasLocalLinkage, /*isCompleteDtor=*/false); else - return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy)); + return builder.add(rtti); case VTableComponent::CK_FunctionPointer: case VTableComponent::CK_CompleteDtorPointer: @@ -758,7 +758,8 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, ? MD->hasAttr<CUDADeviceAttr>() : (MD->hasAttr<CUDAHostAttr>() || !MD->hasAttr<CUDADeviceAttr>()); if (!CanEmitMethod) - return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int8PtrTy)); + return builder.add( + llvm::ConstantExpr::getNullValue(CGM.GlobalsInt8PtrTy)); // Method is acceptable, continue processing as usual. } @@ -771,20 +772,20 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, // with the local symbol. As a temporary solution, fill these components // with zero. We shouldn't be calling these in the first place anyway. if (useRelativeLayout()) - return llvm::ConstantPointerNull::get(CGM.Int8PtrTy); + return llvm::ConstantPointerNull::get(CGM.GlobalsInt8PtrTy); // For NVPTX devices in OpenMP emit special functon as null pointers, // otherwise linking ends up with unresolved references. - if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsDevice && + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isNVPTX()) - return llvm::ConstantPointerNull::get(CGM.Int8PtrTy); + return llvm::ConstantPointerNull::get(CGM.GlobalsInt8PtrTy); llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); llvm::Constant *fn = cast<llvm::Constant>( CGM.CreateRuntimeFunction(fnTy, name).getCallee()); if (auto f = dyn_cast<llvm::Function>(fn)) f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy); + return fn; }; llvm::Constant *fnPtr; @@ -822,15 +823,26 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, return addRelativeComponent( builder, fnPtr, vtableAddressPoint, vtableHasLocalLinkage, component.getKind() == VTableComponent::CK_CompleteDtorPointer); - } else - return builder.add(llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy)); + } else { + // TODO: this icky and only exists due to functions being in the generic + // address space, rather than the global one, even though they are + // globals; fixing said issue might be intrusive, and will be done + // later. + unsigned FnAS = fnPtr->getType()->getPointerAddressSpace(); + unsigned GVAS = CGM.GlobalsInt8PtrTy->getPointerAddressSpace(); + + if (FnAS != GVAS) + fnPtr = + llvm::ConstantExpr::getAddrSpaceCast(fnPtr, CGM.GlobalsInt8PtrTy); + return builder.add(fnPtr); + } } case VTableComponent::CK_UnusedFunctionPointer: if (useRelativeLayout()) return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int32Ty)); else - return builder.addNullPointer(CGM.Int8PtrTy); + return builder.addNullPointer(CGM.GlobalsInt8PtrTy); } llvm_unreachable("Unexpected vtable component kind"); @@ -1045,19 +1057,20 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { switch (keyFunction->getTemplateSpecializationKind()) { case TSK_Undeclared: case TSK_ExplicitSpecialization: - assert((def || CodeGenOpts.OptimizationLevel > 0 || - CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo) && - "Shouldn't query vtable linkage without key function, " - "optimizations, or debug info"); - if (!def && CodeGenOpts.OptimizationLevel > 0) - return llvm::GlobalVariable::AvailableExternallyLinkage; + assert( + (def || CodeGenOpts.OptimizationLevel > 0 || + CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo) && + "Shouldn't query vtable linkage without key function, " + "optimizations, or debug info"); + if (!def && CodeGenOpts.OptimizationLevel > 0) + return llvm::GlobalVariable::AvailableExternallyLinkage; - if (keyFunction->isInlined()) - return !Context.getLangOpts().AppleKext ? - llvm::GlobalVariable::LinkOnceODRLinkage : - llvm::Function::InternalLinkage; + if (keyFunction->isInlined()) + return !Context.getLangOpts().AppleKext + ? llvm::GlobalVariable::LinkOnceODRLinkage + : llvm::Function::InternalLinkage; - return llvm::GlobalVariable::ExternalLinkage; + return llvm::GlobalVariable::ExternalLinkage; case TSK_ImplicitInstantiation: return !Context.getLangOpts().AppleKext ? @@ -1171,9 +1184,16 @@ bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) { if (!keyFunction) return false; + const FunctionDecl *Def; // Otherwise, if we don't have a definition of the key function, the // vtable must be defined somewhere else. - return !keyFunction->hasBody(); + if (!keyFunction->hasBody(Def)) + return true; + + assert(Def && "The body of the key function is not assigned to Def?"); + // If the non-inline key function comes from another module unit, the vtable + // must be defined there. + return Def->isInAnotherModuleUnit() && !Def->isInlineSpecified(); } /// Given that we're currently at the end of the translation unit, and @@ -1211,7 +1231,8 @@ void CodeGenModule::EmitDeferredVTables() { } bool CodeGenModule::AlwaysHasLTOVisibilityPublic(const CXXRecordDecl *RD) { - if (RD->hasAttr<LTOVisibilityPublicAttr>() || RD->hasAttr<UuidAttr>()) + if (RD->hasAttr<LTOVisibilityPublicAttr>() || RD->hasAttr<UuidAttr>() || + RD->hasAttr<DLLExportAttr>() || RD->hasAttr<DLLImportAttr>()) return true; if (!getCodeGenOpts().LTOVisibilityPublicStd) @@ -1238,13 +1259,9 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { if (!isExternallyVisible(LV.getLinkage())) return true; - if (getTriple().isOSBinFormatCOFF()) { - if (RD->hasAttr<DLLExportAttr>() || RD->hasAttr<DLLImportAttr>()) - return false; - } else { - if (LV.getVisibility() != HiddenVisibility) - return false; - } + if (!getTriple().isOSBinFormatCOFF() && + LV.getVisibility() != HiddenVisibility) + return false; return !AlwaysHasLTOVisibilityPublic(RD); } @@ -1268,13 +1285,13 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( else TypeVis = llvm::GlobalObject::VCallVisibilityPublic; - for (auto B : RD->bases()) + for (const auto &B : RD->bases()) if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) TypeVis = std::min( TypeVis, GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl(), Visited)); - for (auto B : RD->vbases()) + for (const auto &B : RD->vbases()) if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) TypeVis = std::min( TypeVis, diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h index e7b59d94f257..9d4223547050 100644 --- a/clang/lib/CodeGen/CGVTables.h +++ b/clang/lib/CodeGen/CGVTables.h @@ -75,16 +75,6 @@ class CodeGenVTables { bool vtableHasLocalLinkage, bool isCompleteDtor) const; - /// Create a dso_local stub that will be used for a relative reference in the - /// relative vtable layout. This stub will just be a tail call to the original - /// function and propagate any function attributes from the original. If the - /// original function is already dso_local, the original is returned instead - /// and a stub is not created. - llvm::Function * - getOrCreateRelativeStub(llvm::Function *func, - llvm::GlobalValue::LinkageTypes stubLinkage, - bool isCompleteDtor) const; - bool useRelativeLayout() const; llvm::Type *getVTableComponentType() const; diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h index f01eece042f8..1e6f67250583 100644 --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -225,6 +225,9 @@ class LValue { // this lvalue. bool Nontemporal : 1; + // The pointer is known not to be null. + bool IsKnownNonNull : 1; + LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; @@ -238,9 +241,7 @@ private: if (isGlobalReg()) assert(ElementType == nullptr && "Global reg does not store elem type"); else - assert(llvm::cast<llvm::PointerType>(V->getType()) - ->isOpaqueOrPointeeTypeMatches(ElementType) && - "Pointer element type mismatch"); + assert(ElementType != nullptr && "Must have elem type"); this->Type = Type; this->Quals = Quals; @@ -333,24 +334,35 @@ public: LValueBaseInfo getBaseInfo() const { return BaseInfo; } void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; } + KnownNonNull_t isKnownNonNull() const { + return (KnownNonNull_t)IsKnownNonNull; + } + LValue setKnownNonNull() { + IsKnownNonNull = true; + return *this; + } + // simple lvalue llvm::Value *getPointer(CodeGenFunction &CGF) const { assert(isSimple()); return V; } Address getAddress(CodeGenFunction &CGF) const { - return Address(getPointer(CGF), ElementType, getAlignment()); + return Address(getPointer(CGF), ElementType, getAlignment(), + isKnownNonNull()); } void setAddress(Address address) { assert(isSimple()); V = address.getPointer(); ElementType = address.getElementType(); Alignment = address.getAlignment().getQuantity(); + IsKnownNonNull = address.isKnownNonNull(); } // vector elt lvalue Address getVectorAddress() const { - return Address(getVectorPointer(), ElementType, getAlignment()); + return Address(getVectorPointer(), ElementType, getAlignment(), + (KnownNonNull_t)isKnownNonNull()); } llvm::Value *getVectorPointer() const { assert(isVectorElt()); @@ -362,7 +374,8 @@ public: } Address getMatrixAddress() const { - return Address(getMatrixPointer(), ElementType, getAlignment()); + return Address(getMatrixPointer(), ElementType, getAlignment(), + (KnownNonNull_t)isKnownNonNull()); } llvm::Value *getMatrixPointer() const { assert(isMatrixElt()); @@ -375,7 +388,8 @@ public: // extended vector elements. Address getExtVectorAddress() const { - return Address(getExtVectorPointer(), ElementType, getAlignment()); + return Address(getExtVectorPointer(), ElementType, getAlignment(), + (KnownNonNull_t)isKnownNonNull()); } llvm::Value *getExtVectorPointer() const { assert(isExtVectorElt()); @@ -388,7 +402,8 @@ public: // bitfield lvalue Address getBitFieldAddress() const { - return Address(getBitFieldPointer(), ElementType, getAlignment()); + return Address(getBitFieldPointer(), ElementType, getAlignment(), + (KnownNonNull_t)isKnownNonNull()); } llvm::Value *getBitFieldPointer() const { assert(isBitField()); return V; } const CGBitFieldInfo &getBitFieldInfo() const { @@ -409,6 +424,7 @@ public: assert(address.getPointer()->getType()->isPointerTy()); R.V = address.getPointer(); R.ElementType = address.getElementType(); + R.IsKnownNonNull = address.isKnownNonNull(); R.Initialize(type, qs, address.getAlignment(), BaseInfo, TBAAInfo); return R; } @@ -421,6 +437,7 @@ public: R.V = vecAddress.getPointer(); R.ElementType = vecAddress.getElementType(); R.VectorIdx = Idx; + R.IsKnownNonNull = vecAddress.isKnownNonNull(); R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), BaseInfo, TBAAInfo); return R; @@ -434,6 +451,7 @@ public: R.V = vecAddress.getPointer(); R.ElementType = vecAddress.getElementType(); R.VectorElts = Elts; + R.IsKnownNonNull = vecAddress.isKnownNonNull(); R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), BaseInfo, TBAAInfo); return R; @@ -453,6 +471,7 @@ public: R.V = Addr.getPointer(); R.ElementType = Addr.getElementType(); R.BitFieldInfo = &Info; + R.IsKnownNonNull = Addr.isKnownNonNull(); R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo, TBAAInfo); return R; @@ -464,6 +483,7 @@ public: R.LVType = GlobalReg; R.V = V; R.ElementType = nullptr; + R.IsKnownNonNull = true; R.Initialize(type, type.getQualifiers(), alignment, LValueBaseInfo(AlignmentSource::Decl), TBAAAccessInfo()); return R; @@ -477,6 +497,7 @@ public: R.V = matAddress.getPointer(); R.ElementType = matAddress.getElementType(); R.VectorIdx = Idx; + R.IsKnownNonNull = matAddress.isKnownNonNull(); R.Initialize(type, type.getQualifiers(), matAddress.getAlignment(), BaseInfo, TBAAInfo); return R; @@ -579,6 +600,8 @@ public: Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed, IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) { + if (addr.isValid()) + addr.setKnownNonNull(); return AggValueSlot(addr, quals, isDestructed, needsGC, isZeroed, isAliased, mayOverlap, isChecked); } diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 2b219267869e..a3b72381d73f 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenAction.h" +#include "CGCall.h" #include "CodeGenModule.h" #include "CoverageMappingGen.h" #include "MacroPPCallbacks.h" @@ -86,7 +87,7 @@ namespace clang { }; static void reportOptRecordError(Error E, DiagnosticsEngine &Diags, - const CodeGenOptions CodeGenOpts) { + const CodeGenOptions &CodeGenOpts) { handleAllErrors( std::move(E), [&](const LLVMRemarkSetupFileError &E) { @@ -115,6 +116,7 @@ namespace clang { const LangOptions &LangOpts; std::unique_ptr<raw_pwrite_stream> AsmOutStream; ASTContext *Context; + IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; Timer LLVMIRGeneration; unsigned LLVMIRGenerationRefCount; @@ -147,7 +149,7 @@ namespace clang { public: BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, @@ -158,10 +160,10 @@ namespace clang { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - AsmOutStream(std::move(OS)), Context(nullptr), + AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, InFile, std::move(FS), HeaderSearchOpts, + Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { TimerIsEnabled = CodeGenOpts.TimePasses; @@ -173,7 +175,7 @@ namespace clang { // to use the clang diagnostic handler for IR input files. It avoids // initializing the OS field. BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, + IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, @@ -183,10 +185,10 @@ namespace clang { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - Context(nullptr), + Context(nullptr), FS(VFS), LLVMIRGeneration("irgen", "LLVM IR Generation Time"), LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, "", std::move(FS), HeaderSearchOpts, + Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts, CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)), CurLinkModule(Module) { TimerIsEnabled = CodeGenOpts.TimePasses; @@ -261,15 +263,17 @@ namespace clang { } // Links each entry in LinkModules into our module. Returns true on error. - bool LinkInModules() { + bool LinkInModules(llvm::Module *M) { for (auto &LM : LinkModules) { + assert(LM.Module && "LinkModule does not actually have a module"); if (LM.PropagateAttrs) for (Function &F : *LM.Module) { // Skip intrinsics. Keep consistent with how intrinsics are created // in LLVM IR. if (F.isIntrinsic()) continue; - Gen->CGM().addDefaultFunctionDefinitionAttributes(F); + CodeGen::mergeDefaultFunctionDefinitionAttributes( + F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize); } CurLinkModule = LM.Module.get(); @@ -277,20 +281,20 @@ namespace clang { bool Err; if (LM.Internalize) { Err = Linker::linkModules( - *getModule(), std::move(LM.Module), LM.LinkFlags, + *M, std::move(LM.Module), LM.LinkFlags, [](llvm::Module &M, const llvm::StringSet<> &GVS) { internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { return !GV.hasName() || (GVS.count(GV.getName()) == 0); }); }); } else { - Err = Linker::linkModules(*getModule(), std::move(LM.Module), - LM.LinkFlags); + Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags); } if (Err) return true; } + LinkModules.clear(); return false; // success } @@ -353,7 +357,7 @@ namespace clang { } // Link each LinkModule into our module. - if (LinkInModules()) + if (LinkInModules(getModule())) return; for (auto &F : getModule()->functions()) { @@ -381,7 +385,7 @@ namespace clang { EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, C.getTargetInfo().getDataLayoutString(), - getModule(), Action, std::move(AsmOutStream)); + getModule(), Action, FS, std::move(AsmOutStream)); Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); @@ -631,9 +635,8 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { return false; Diags.Report(*Loc, diag::warn_fe_frame_larger_than) - << D.getStackSize() - << D.getStackLimit() - << llvm::demangle(D.getFunction().getName().str()); + << D.getStackSize() << D.getStackLimit() + << llvm::demangle(D.getFunction().getName()); return true; } @@ -647,7 +650,7 @@ bool BackendConsumer::ResourceLimitDiagHandler( Diags.Report(*Loc, DiagID) << D.getResourceName() << D.getResourceSize() << D.getResourceLimit() - << llvm::demangle(D.getFunction().getName().str()); + << llvm::demangle(D.getFunction().getName()); return true; } @@ -852,7 +855,7 @@ void BackendConsumer::DontCallDiagHandler(const DiagnosticInfoDontCall &D) { Diags.Report(LocCookie, D.getSeverity() == DiagnosticSeverity::DS_Error ? diag::err_fe_backend_error_attr : diag::warn_fe_backend_warning_attr) - << llvm::demangle(D.getFunctionName().str()) << D.getNote(); + << llvm::demangle(D.getFunctionName()) << D.getNote(); } void BackendConsumer::MisExpectDiagHandler( @@ -990,6 +993,36 @@ CodeGenAction::~CodeGenAction() { delete VMContext; } +bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { + if (!LinkModules.empty()) + return false; + + for (const CodeGenOptions::BitcodeFileToLink &F : + CI.getCodeGenOpts().LinkBitcodeFiles) { + auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); + if (!BCBuf) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << BCBuf.getError().message(); + LinkModules.clear(); + return true; + } + + Expected<std::unique_ptr<llvm::Module>> ModuleOrErr = + getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); + if (!ModuleOrErr) { + handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); + }); + LinkModules.clear(); + return true; + } + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + } + return false; +} + bool CodeGenAction::hasIRSupport() const { return true; } void CodeGenAction::EndSourceFileAction() { @@ -1044,33 +1077,9 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (BA != Backend_EmitNothing && !OS) return nullptr; - VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers); - // Load bitcode modules to link with, if we need to. - if (LinkModules.empty()) - for (const CodeGenOptions::BitcodeFileToLink &F : - CI.getCodeGenOpts().LinkBitcodeFiles) { - auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); - if (!BCBuf) { - CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); - LinkModules.clear(); - return nullptr; - } - - Expected<std::unique_ptr<llvm::Module>> ModuleOrErr = - getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); - if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { - CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << EIB.message(); - }); - LinkModules.clear(); - return nullptr; - } - LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, - F.Internalize, F.LinkFlags}); - } + if (loadLinkModules(CI)) + return nullptr; CoverageSourceInfo *CoverageInfo = nullptr; // Add the preprocessor callback only when the coverage mapping is generated. @@ -1103,7 +1112,14 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); - VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers); + auto DiagErrors = [&](Error E) -> std::unique_ptr<llvm::Module> { + unsigned DiagID = + CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); + handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(DiagID) << EIB.message(); + }); + return {}; + }; // For ThinLTO backend invocations, ensure that the context // merges types based on ODR identifiers. We also need to read @@ -1111,15 +1127,6 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) { VMContext->enableDebugTypeODRUniquing(); - auto DiagErrors = [&](Error E) -> std::unique_ptr<llvm::Module> { - unsigned DiagID = - CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); - handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { - CI.getDiagnostics().Report(DiagID) << EIB.message(); - }); - return {}; - }; - Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef); if (!BMsOrErr) return DiagErrors(BMsOrErr.takeError()); @@ -1140,10 +1147,39 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { return std::move(*MOrErr); } + // Load bitcode modules to link with, if we need to. + if (loadLinkModules(CI)) + return nullptr; + + // Handle textual IR and bitcode file with one single module. llvm::SMDiagnostic Err; if (std::unique_ptr<llvm::Module> M = parseIR(MBRef, Err, *VMContext)) return M; + // If MBRef is a bitcode with multiple modules (e.g., -fsplit-lto-unit + // output), place the extra modules (actually only one, a regular LTO module) + // into LinkModules as if we are using -mlink-bitcode-file. + Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef); + if (BMsOrErr && BMsOrErr->size()) { + std::unique_ptr<llvm::Module> FirstM; + for (auto &BM : *BMsOrErr) { + Expected<std::unique_ptr<llvm::Module>> MOrErr = + BM.parseModule(*VMContext); + if (!MOrErr) + return DiagErrors(MOrErr.takeError()); + if (FirstM) + LinkModules.push_back({std::move(*MOrErr), /*PropagateAttrs=*/false, + /*Internalize=*/false, /*LinkFlags=*/{}}); + else + FirstM = std::move(*MOrErr); + } + if (FirstM) + return FirstM; + } + // If BMsOrErr fails, consume the error and use the error message from + // parseIR. + consumeError(BMsOrErr.takeError()); + // Translate from the diagnostic info to the SourceManager location if // available. // TODO: Unify this with ConvertBackendLocation() @@ -1219,6 +1255,11 @@ void CodeGenAction::ExecuteAction() { CI.getCodeGenOpts(), CI.getTargetOpts(), CI.getLangOpts(), TheModule.get(), std::move(LinkModules), *VMContext, nullptr); + + // Link in each pending link module. + if (Result.LinkInModules(&*TheModule)) + return; + // PR44896: Force DiscardValueNames as false. DiscardValueNames cannot be // true here because the valued names are needed for reading textual IR. Ctx.setDiscardValueNames(false); @@ -1238,10 +1279,10 @@ void CodeGenAction::ExecuteAction() { std::unique_ptr<llvm::ToolOutputFile> OptRecordFile = std::move(*OptRecordFileOrErr); - EmitBackendOutput(Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, - TargetOpts, CI.getLangOpts(), - CI.getTarget().getDataLayoutString(), TheModule.get(), BA, - std::move(OS)); + EmitBackendOutput( + Diagnostics, CI.getHeaderSearchOpts(), CodeGenOpts, TargetOpts, + CI.getLangOpts(), CI.getTarget().getDataLayoutString(), TheModule.get(), + BA, CI.getFileManager().getVirtualFileSystemPtr(), std::move(OS)); if (OptRecordFile) OptRecordFile->keep(); } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 8cbe2a540744..fab70b66d1d9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/xxhash.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <optional> @@ -567,18 +568,17 @@ bool CodeGenFunction::AlwaysEmitXRayTypedEvents() const { XRayInstrKind::Typed); } -llvm::Value * -CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F, - llvm::Value *EncodedAddr) { - // Reconstruct the address of the global. - auto *PCRelAsInt = Builder.CreateSExt(EncodedAddr, IntPtrTy); - auto *FuncAsInt = Builder.CreatePtrToInt(F, IntPtrTy, "func_addr.int"); - auto *GOTAsInt = Builder.CreateAdd(PCRelAsInt, FuncAsInt, "global_addr.int"); - auto *GOTAddr = Builder.CreateIntToPtr(GOTAsInt, Int8PtrPtrTy, "global_addr"); - - // Load the original pointer through the global. - return Builder.CreateLoad(Address(GOTAddr, Int8PtrTy, getPointerAlign()), - "decoded_addr"); +llvm::ConstantInt * +CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { + // Remove any (C++17) exception specifications, to allow calling e.g. a + // noexcept function through a non-noexcept pointer. + if (!isa<FunctionNoProtoType>(Ty)) + Ty = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None); + std::string Mangled; + llvm::raw_string_ostream Out(Mangled); + CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out, false); + return llvm::ConstantInt::get( + CGM.Int32Ty, static_cast<uint32_t>(llvm::xxh3_64bits(Mangled))); } void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, @@ -730,31 +730,38 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (D) { const bool SanitizeBounds = SanOpts.hasOneOf(SanitizerKind::Bounds); + SanitizerMask no_sanitize_mask; bool NoSanitizeCoverage = false; for (auto *Attr : D->specific_attrs<NoSanitizeAttr>()) { - // Apply the no_sanitize* attributes to SanOpts. - SanitizerMask mask = Attr->getMask(); - SanOpts.Mask &= ~mask; - if (mask & SanitizerKind::Address) - SanOpts.set(SanitizerKind::KernelAddress, false); - if (mask & SanitizerKind::KernelAddress) - SanOpts.set(SanitizerKind::Address, false); - if (mask & SanitizerKind::HWAddress) - SanOpts.set(SanitizerKind::KernelHWAddress, false); - if (mask & SanitizerKind::KernelHWAddress) - SanOpts.set(SanitizerKind::HWAddress, false); - + no_sanitize_mask |= Attr->getMask(); // SanitizeCoverage is not handled by SanOpts. if (Attr->hasCoverage()) NoSanitizeCoverage = true; } + // Apply the no_sanitize* attributes to SanOpts. + SanOpts.Mask &= ~no_sanitize_mask; + if (no_sanitize_mask & SanitizerKind::Address) + SanOpts.set(SanitizerKind::KernelAddress, false); + if (no_sanitize_mask & SanitizerKind::KernelAddress) + SanOpts.set(SanitizerKind::Address, false); + if (no_sanitize_mask & SanitizerKind::HWAddress) + SanOpts.set(SanitizerKind::KernelHWAddress, false); + if (no_sanitize_mask & SanitizerKind::KernelHWAddress) + SanOpts.set(SanitizerKind::HWAddress, false); + if (SanitizeBounds && !SanOpts.hasOneOf(SanitizerKind::Bounds)) Fn->addFnAttr(llvm::Attribute::NoSanitizeBounds); if (NoSanitizeCoverage && CGM.getCodeGenOpts().hasSanitizeCoverage()) Fn->addFnAttr(llvm::Attribute::NoSanitizeCoverage); + + // Some passes need the non-negated no_sanitize attribute. Pass them on. + if (CGM.getCodeGenOpts().hasSanitizeBinaryMetadata()) { + if (no_sanitize_mask & SanitizerKind::Thread) + Fn->addFnAttr("no_sanitize_thread"); + } } if (ShouldSkipSanitizerInstrumentation()) { @@ -939,21 +946,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // If we are checking function types, emit a function type signature as // prologue data. - if (FD && getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) { + if (FD && SanOpts.has(SanitizerKind::Function)) { if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) { - // Remove any (C++17) exception specifications, to allow calling e.g. a - // noexcept function through a non-noexcept pointer. - auto ProtoTy = getContext().getFunctionTypeWithExceptionSpec( - FD->getType(), EST_None); - llvm::Constant *FTRTTIConst = - CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true); - llvm::GlobalVariable *FTRTTIProxy = - CGM.GetOrCreateRTTIProxyGlobalVariable(FTRTTIConst); llvm::LLVMContext &Ctx = Fn->getContext(); llvm::MDBuilder MDB(Ctx); - Fn->setMetadata(llvm::LLVMContext::MD_func_sanitize, - MDB.createRTTIPointerPrologue(PrologueSig, FTRTTIProxy)); - CGM.addCompilerUsedGlobal(FTRTTIProxy); + Fn->setMetadata( + llvm::LLVMContext::MD_func_sanitize, + MDB.createRTTIPointerPrologue( + PrologueSig, getUBSanFunctionTypeHash(FD->getType()))); } } @@ -1104,8 +1104,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, auto AI = CurFn->arg_begin(); if (CurFnInfo->getReturnInfo().isSRetAfterThis()) ++AI; - ReturnValue = Address(&*AI, ConvertType(RetTy), - CurFnInfo->getReturnInfo().getIndirectAlign()); + ReturnValue = + Address(&*AI, ConvertType(RetTy), + CurFnInfo->getReturnInfo().getIndirectAlign(), KnownNonNull); if (!CurFnInfo->getReturnInfo().getIndirectByVal()) { ReturnValuePointer = CreateDefaultAlignTempAlloca(Int8PtrTy, "result.ptr"); @@ -1125,8 +1126,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, cast<llvm::GetElementPtrInst>(Addr)->getResultElementType(); ReturnValuePointer = Address(Addr, Ty, getPointerAlign()); Addr = Builder.CreateAlignedLoad(Ty, Addr, getPointerAlign(), "agg.result"); - ReturnValue = - Address(Addr, ConvertType(RetTy), CGM.getNaturalTypeAlignment(RetTy)); + ReturnValue = Address(Addr, ConvertType(RetTy), + CGM.getNaturalTypeAlignment(RetTy), KnownNonNull); } else { ReturnValue = CreateIRTemp(RetTy, "retval"); @@ -1934,8 +1935,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, llvm::Value *baseSizeInChars = llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity()); - Address begin = - Builder.CreateElementBitCast(dest, CGF.Int8Ty, "vla.begin"); + Address begin = dest.withElementType(CGF.Int8Ty); llvm::Value *end = Builder.CreateInBoundsGEP( begin.getElementType(), begin.getPointer(), sizeInChars, "vla.end"); @@ -1979,9 +1979,8 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { } } - // Cast the dest ptr to the appropriate i8 pointer type. if (DestPtr.getElementType() != Int8Ty) - DestPtr = Builder.CreateElementBitCast(DestPtr, Int8Ty); + DestPtr = DestPtr.withElementType(Int8Ty); // Get size and alignment info for this aggregate. CharUnits size = getContext().getTypeSizeInChars(Ty); @@ -2141,7 +2140,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, } llvm::Type *baseType = ConvertType(eltType); - addr = Builder.CreateElementBitCast(addr, baseType, "array.begin"); + addr = addr.withElementType(baseType); } else { // Create the actual GEP. addr = Address(Builder.CreateInBoundsGEP( @@ -2498,7 +2497,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, auto *PTy = dyn_cast<llvm::PointerType>(VTy); unsigned AS = PTy ? PTy->getAddressSpace() : 0; llvm::PointerType *IntrinTy = - llvm::PointerType::getWithSamePointeeType(CGM.Int8PtrTy, AS); + llvm::PointerType::get(CGM.getLLVMContext(), AS); llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, {IntrinTy, CGM.ConstGlobalsPtrTy}); @@ -2533,7 +2532,7 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, llvm::BasicBlock::iterator InsertPt) const { LoopStack.InsertHelper(I); if (IsSanitizerScope) - CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); + I->setNoSanitizeMetadata(); } void CGBuilderInserter::InsertHelper( @@ -2611,6 +2610,16 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, })) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; + } else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) { + llvm::StringMap<bool> CalleeFeatureMap; + CGM.getContext().getFunctionFeatureMap(CalleeFeatureMap, TargetDecl); + + for (const auto &F : CalleeFeatureMap) { + if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) || + !CallerFeatureMap.find(F.getKey())->getValue())) + CGM.getDiags().Report(Loc, diag::err_function_needs_feature) + << FD->getDeclName() << TargetDecl->getDeclName() << F.getKey(); + } } } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a535aa7c0410..409f48a04906 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -117,7 +117,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0) \ SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0) \ SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0) \ - SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1) \ + SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0) \ SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0) \ SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \ SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0) \ @@ -318,10 +318,10 @@ public: /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. - const Decl *CurFuncDecl; + const Decl *CurFuncDecl = nullptr; /// CurCodeDecl - This is the inner-most code context, which includes blocks. - const Decl *CurCodeDecl; - const CGFunctionInfo *CurFnInfo; + const Decl *CurCodeDecl = nullptr; + const CGFunctionInfo *CurFnInfo = nullptr; QualType FnRetTy; llvm::Function *CurFn = nullptr; @@ -333,6 +333,7 @@ public: // in this header. struct CGCoroInfo { std::unique_ptr<CGCoroData> Data; + bool InSuspendBlock = false; CGCoroInfo(); ~CGCoroInfo(); }; @@ -342,6 +343,10 @@ public: return CurCoro.Data != nullptr; } + bool inSuspendBlock() const { + return isCoroutine() && CurCoro.InSuspendBlock; + } + /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; @@ -743,11 +748,11 @@ public: /// An i1 variable indicating whether or not the @finally is /// running for an exception. - llvm::AllocaInst *ForEHVar; + llvm::AllocaInst *ForEHVar = nullptr; /// An i8* variable into which the exception pointer to rethrow /// has been saved. - llvm::AllocaInst *SavedExnVar; + llvm::AllocaInst *SavedExnVar = nullptr; public: void enter(CodeGenFunction &CGF, const Stmt *Finally, @@ -2060,6 +2065,8 @@ public: llvm::Value *CompletePtr, QualType ElementType); void pushStackRestore(CleanupKind kind, Address SPMem); + void pushKmpcAllocFree(CleanupKind Kind, + std::pair<llvm::Value *, llvm::Value *> AddrSizePair); void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); llvm::Function *generateDestroyHelper(Address addr, QualType type, @@ -2363,10 +2370,9 @@ public: /// XRay typed event handling calls. bool AlwaysEmitXRayTypedEvents() const; - /// Decode an address used in a function prologue, encoded by \c - /// EncodeAddrForUseInPrologue. - llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F, - llvm::Value *EncodedAddr); + /// Return a type hash constant for a function instrumented by + /// -fsanitize=function. + llvm::ConstantInt *getUBSanFunctionTypeHash(QualType T) const; /// EmitFunctionProlog - Emit the target specific LLVM code to load the /// arguments for the given function. This is also responsible for naming the @@ -2621,9 +2627,6 @@ public: AggValueSlot::DoesNotOverlap); } - /// Emit a cast to void* in the appropriate address space. - llvm::Value *EmitCastToVoidPtr(llvm::Value *value); - /// EvaluateExprAsBool - Perform the usual unary conversions on the specified /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); @@ -3164,7 +3167,8 @@ public: Address getIndirectAddress() const { assert(isIndirect()); - return Address(Value, ElementType, CharUnits::fromQuantity(Alignment)); + return Address(Value, ElementType, CharUnits::fromQuantity(Alignment), + KnownNonNull); } }; @@ -3399,10 +3403,12 @@ public: OMPPrivateScope &PrivateScope); void EmitOMPUseDevicePtrClause( const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, - const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap); + const llvm::DenseMap<const ValueDecl *, llvm::Value *> + CaptureDeviceAddrMap); void EmitOMPUseDeviceAddrClause( const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, - const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap); + const llvm::DenseMap<const ValueDecl *, llvm::Value *> + CaptureDeviceAddrMap); /// Emit code for copyin clause in \a D directive. The next code is /// generated at the start of outlined functions for directives: /// \code @@ -3578,7 +3584,14 @@ public: void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); + void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S); + void EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S); + void EmitOMPTargetTeamsGenericLoopDirective( + const OMPTargetTeamsGenericLoopDirective &S); + void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S); void EmitOMPInteropDirective(const OMPInteropDirective &S); + void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, @@ -3617,6 +3630,16 @@ public: CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S); + /// Emit device code for the target teams loop directive. + static void EmitOMPTargetTeamsGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsGenericLoopDirective &S); + + /// Emit device code for the target parallel loop directive. + static void EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S); + static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S); @@ -3771,8 +3794,13 @@ public: /// an LLVM type of the same size of the lvalue's type. If the lvalue has a /// variable length type, this is not possible. /// - LValue EmitLValue(const Expr *E); + LValue EmitLValue(const Expr *E, + KnownNonNull_t IsKnownNonNull = NotKnownNonNull); +private: + LValue EmitLValueHelper(const Expr *E, KnownNonNull_t IsKnownNonNull); + +public: /// Same as EmitLValue but additionally we generate checking code to /// guard against undefined behavior. This is only suitable when we know /// that the address will be used to access the object. @@ -4234,6 +4262,7 @@ public: llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); + llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); @@ -4248,6 +4277,20 @@ public: unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSMEZero(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch); llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E); @@ -4674,6 +4717,14 @@ public: /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); + /// Set the minimum required accuracy of the given sqrt operation + /// based on CodeGenOpts. + void SetSqrtFPAccuracy(llvm::Value *Val); + + /// Set the minimum required accuracy of the given sqrt operation based on + /// CodeGenOpts. + void SetDivFPAccuracy(llvm::Value *Val); + /// Set the codegen fast-math flags. void SetFastMathFlags(FPOptions FPFeatures); @@ -4783,9 +4834,10 @@ public: /// into the address of a local variable. In such a case, it's quite /// reasonable to just ignore the returned alignment when it isn't from an /// explicit source. - Address EmitPointerWithAlignment(const Expr *Addr, - LValueBaseInfo *BaseInfo = nullptr, - TBAAAccessInfo *TBAAInfo = nullptr); + Address + EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr, + KnownNonNull_t IsKnownNonNull = NotKnownNonNull); /// If \p E references a parameter with pass_object_size info or a constant /// array size modifier, emit the object size divided by the size of \p EltTy. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 12d602fed693..07a9dec12f6f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -50,9 +50,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -67,8 +67,9 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" -#include "llvm/Support/X86TargetParser.h" #include "llvm/Support/xxhash.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/TargetParser/X86TargetParser.h" #include <optional> using namespace clang; @@ -100,6 +101,228 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { llvm_unreachable("invalid C++ ABI kind"); } +static std::unique_ptr<TargetCodeGenInfo> +createTargetCodeGenInfo(CodeGenModule &CGM) { + const TargetInfo &Target = CGM.getTarget(); + const llvm::Triple &Triple = Target.getTriple(); + const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts(); + + switch (Triple.getArch()) { + default: + return createDefaultTargetCodeGenInfo(CGM); + + case llvm::Triple::le32: + return createPNaClTargetCodeGenInfo(CGM); + case llvm::Triple::m68k: + return createM68kTargetCodeGenInfo(CGM); + case llvm::Triple::mips: + case llvm::Triple::mipsel: + if (Triple.getOS() == llvm::Triple::NaCl) + return createPNaClTargetCodeGenInfo(CGM); + return createMIPSTargetCodeGenInfo(CGM, /*IsOS32=*/true); + + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + return createMIPSTargetCodeGenInfo(CGM, /*IsOS32=*/false); + + case llvm::Triple::avr: { + // For passing parameters, R8~R25 are used on avr, and R18~R25 are used + // on avrtiny. For passing return value, R18~R25 are used on avr, and + // R22~R25 are used on avrtiny. + unsigned NPR = Target.getABI() == "avrtiny" ? 6 : 18; + unsigned NRR = Target.getABI() == "avrtiny" ? 4 : 8; + return createAVRTargetCodeGenInfo(CGM, NPR, NRR); + } + + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: + case llvm::Triple::aarch64_be: { + AArch64ABIKind Kind = AArch64ABIKind::AAPCS; + if (Target.getABI() == "darwinpcs") + Kind = AArch64ABIKind::DarwinPCS; + else if (Triple.isOSWindows()) + return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64); + + return createAArch64TargetCodeGenInfo(CGM, Kind); + } + + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: { + WebAssemblyABIKind Kind = WebAssemblyABIKind::MVP; + if (Target.getABI() == "experimental-mv") + Kind = WebAssemblyABIKind::ExperimentalMV; + return createWebAssemblyTargetCodeGenInfo(CGM, Kind); + } + + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: { + if (Triple.getOS() == llvm::Triple::Win32) + return createWindowsARMTargetCodeGenInfo(CGM, ARMABIKind::AAPCS_VFP); + + ARMABIKind Kind = ARMABIKind::AAPCS; + StringRef ABIStr = Target.getABI(); + if (ABIStr == "apcs-gnu") + Kind = ARMABIKind::APCS; + else if (ABIStr == "aapcs16") + Kind = ARMABIKind::AAPCS16_VFP; + else if (CodeGenOpts.FloatABI == "hard" || + (CodeGenOpts.FloatABI != "soft" && + (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || + Triple.getEnvironment() == llvm::Triple::MuslEABIHF || + Triple.getEnvironment() == llvm::Triple::EABIHF))) + Kind = ARMABIKind::AAPCS_VFP; + + return createARMTargetCodeGenInfo(CGM, Kind); + } + + case llvm::Triple::ppc: { + if (Triple.isOSAIX()) + return createAIXTargetCodeGenInfo(CGM, /*Is64Bit=*/false); + + bool IsSoftFloat = + CodeGenOpts.FloatABI == "soft" || Target.hasFeature("spe"); + return createPPC32TargetCodeGenInfo(CGM, IsSoftFloat); + } + case llvm::Triple::ppcle: { + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; + return createPPC32TargetCodeGenInfo(CGM, IsSoftFloat); + } + case llvm::Triple::ppc64: + if (Triple.isOSAIX()) + return createAIXTargetCodeGenInfo(CGM, /*Is64Bit=*/true); + + if (Triple.isOSBinFormatELF()) { + PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv1; + if (Target.getABI() == "elfv2") + Kind = PPC64_SVR4_ABIKind::ELFv2; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; + + return createPPC64_SVR4_TargetCodeGenInfo(CGM, Kind, IsSoftFloat); + } + return createPPC64TargetCodeGenInfo(CGM); + case llvm::Triple::ppc64le: { + assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); + PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv2; + if (Target.getABI() == "elfv1") + Kind = PPC64_SVR4_ABIKind::ELFv1; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; + + return createPPC64_SVR4_TargetCodeGenInfo(CGM, Kind, IsSoftFloat); + } + + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + return createNVPTXTargetCodeGenInfo(CGM); + + case llvm::Triple::msp430: + return createMSP430TargetCodeGenInfo(CGM); + + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: { + StringRef ABIStr = Target.getABI(); + unsigned XLen = Target.getPointerWidth(LangAS::Default); + unsigned ABIFLen = 0; + if (ABIStr.endswith("f")) + ABIFLen = 32; + else if (ABIStr.endswith("d")) + ABIFLen = 64; + return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen); + } + + case llvm::Triple::systemz: { + bool SoftFloat = CodeGenOpts.FloatABI == "soft"; + bool HasVector = !SoftFloat && Target.getABI() == "vector"; + return createSystemZTargetCodeGenInfo(CGM, HasVector, SoftFloat); + } + + case llvm::Triple::tce: + case llvm::Triple::tcele: + return createTCETargetCodeGenInfo(CGM); + + case llvm::Triple::x86: { + bool IsDarwinVectorABI = Triple.isOSDarwin(); + bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); + + if (Triple.getOS() == llvm::Triple::Win32) { + return createWinX86_32TargetCodeGenInfo( + CGM, IsDarwinVectorABI, IsWin32FloatStructABI, + CodeGenOpts.NumRegisterParameters); + } + return createX86_32TargetCodeGenInfo( + CGM, IsDarwinVectorABI, IsWin32FloatStructABI, + CodeGenOpts.NumRegisterParameters, CodeGenOpts.FloatABI == "soft"); + } + + case llvm::Triple::x86_64: { + StringRef ABI = Target.getABI(); + X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 + : ABI == "avx" ? X86AVXABILevel::AVX + : X86AVXABILevel::None); + + switch (Triple.getOS()) { + case llvm::Triple::Win32: + return createWinX86_64TargetCodeGenInfo(CGM, AVXLevel); + default: + return createX86_64TargetCodeGenInfo(CGM, AVXLevel); + } + } + case llvm::Triple::hexagon: + return createHexagonTargetCodeGenInfo(CGM); + case llvm::Triple::lanai: + return createLanaiTargetCodeGenInfo(CGM); + case llvm::Triple::r600: + return createAMDGPUTargetCodeGenInfo(CGM); + case llvm::Triple::amdgcn: + return createAMDGPUTargetCodeGenInfo(CGM); + case llvm::Triple::sparc: + return createSparcV8TargetCodeGenInfo(CGM); + case llvm::Triple::sparcv9: + return createSparcV9TargetCodeGenInfo(CGM); + case llvm::Triple::xcore: + return createXCoreTargetCodeGenInfo(CGM); + case llvm::Triple::arc: + return createARCTargetCodeGenInfo(CGM); + case llvm::Triple::spir: + case llvm::Triple::spir64: + return createCommonSPIRTargetCodeGenInfo(CGM); + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + return createSPIRVTargetCodeGenInfo(CGM); + case llvm::Triple::ve: + return createVETargetCodeGenInfo(CGM); + case llvm::Triple::csky: { + bool IsSoftFloat = !Target.hasFeature("hard-float-abi"); + bool hasFP64 = + Target.hasFeature("fpuv2_df") || Target.hasFeature("fpuv3_df"); + return createCSKYTargetCodeGenInfo(CGM, IsSoftFloat ? 0 + : hasFP64 ? 64 + : 32); + } + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + return createBPFTargetCodeGenInfo(CGM); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { + StringRef ABIStr = Target.getABI(); + unsigned ABIFRLen = 0; + if (ABIStr.endswith("f")) + ABIFRLen = 32; + else if (ABIStr.endswith("d")) + ABIFRLen = 64; + return createLoongArchTargetCodeGenInfo( + CGM, Target.getPointerWidth(LangAS::Default), ABIFRLen); + } + } +} + +const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + if (!TheTargetCodeGenInfo) + TheTargetCodeGenInfo = createTargetCodeGenInfo(*this); + return *TheTargetCodeGenInfo; +} + CodeGenModule::CodeGenModule(ASTContext &C, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, const HeaderSearchOptions &HSO, @@ -107,11 +330,11 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) - : Context(C), LangOpts(C.getLangOpts()), FS(std::move(FS)), - HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), - TheModule(M), Diags(diags), Target(C.getTargetInfo()), - ABI(createCXXABI(*this)), VMContext(M.getContext()), Types(*this), - VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { + : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), + PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), + Target(C.getTargetInfo()), ABI(createCXXABI(*this)), + VMContext(M.getContext()), Types(*this), VTables(*this), + SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); @@ -174,8 +397,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, // If debug info or coverage generation is enabled, create the CGDebugInfo // object. - if (CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo || - CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes) + if (CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo || + CodeGenOpts.CoverageNotesFile.size() || + CodeGenOpts.CoverageDataFile.size()) DebugInfo.reset(new CGDebugInfo(*this)); Block.GlobalUniqueCount = 0; @@ -185,7 +409,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( - CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); + CodeGenOpts.ProfileInstrumentUsePath, *FS, + CodeGenOpts.ProfileRemappingFile); // We're checking for profile read errors in CompilerInvocation, so if // there was an error it should've already been caught. If it hasn't been // somehow, trip an assertion. @@ -245,7 +470,7 @@ void CodeGenModule::createOpenMPRuntime() { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: case llvm::Triple::amdgcn: - assert(getLangOpts().OpenMPIsDevice && + assert(getLangOpts().OpenMPIsTargetDevice && "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this)); break; @@ -272,7 +497,7 @@ void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) { void CodeGenModule::applyReplacements() { for (auto &I : Replacements) { - StringRef MangledName = I.first(); + StringRef MangledName = I.first; llvm::Constant *Replacement = I.second; llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (!Entry) @@ -337,10 +562,11 @@ static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) { return FinalGV; } -static bool checkAliasedGlobal(DiagnosticsEngine &Diags, - SourceLocation Location, bool IsIFunc, - const llvm::GlobalValue *Alias, - const llvm::GlobalValue *&GV) { +static bool checkAliasedGlobal( + DiagnosticsEngine &Diags, SourceLocation Location, bool IsIFunc, + const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV, + const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames, + SourceRange AliasRange) { GV = getAliasedGlobal(Alias); if (!GV) { Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; @@ -349,6 +575,22 @@ static bool checkAliasedGlobal(DiagnosticsEngine &Diags, if (GV->isDeclaration()) { Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; + Diags.Report(Location, diag::note_alias_requires_mangled_name) + << IsIFunc << IsIFunc; + // Provide a note if the given function is not found and exists as a + // mangled name. + for (const auto &[Decl, Name] : MangledDeclNames) { + if (const auto *ND = dyn_cast<NamedDecl>(Decl.getDecl())) { + if (ND->getName() == GV->getName()) { + Diags.Report(Location, diag::note_alias_mangled_name_alternative) + << Name + << FixItHint::CreateReplacement( + AliasRange, + (Twine(IsIFunc ? "ifunc" : "alias") + "(\"" + Name + "\")") + .str()); + } + } + } return false; } @@ -380,16 +622,19 @@ void CodeGenModule::checkAliases() { for (const GlobalDecl &GD : Aliases) { const auto *D = cast<ValueDecl>(GD.getDecl()); SourceLocation Location; + SourceRange Range; bool IsIFunc = D->hasAttr<IFuncAttr>(); - if (const Attr *A = D->getDefiningAttr()) + if (const Attr *A = D->getDefiningAttr()) { Location = A->getLocation(); - else + Range = A->getRange(); + } else llvm_unreachable("Not an alias or ifunc?"); StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Alias = GetGlobalValue(MangledName); const llvm::GlobalValue *GV = nullptr; - if (!checkAliasedGlobal(Diags, Location, IsIFunc, Alias, GV)) { + if (!checkAliasedGlobal(Diags, Location, IsIFunc, Alias, GV, + MangledDeclNames, Range)) { Error = true; continue; } @@ -508,7 +753,7 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, } void CodeGenModule::Release() { - Module *Primary = getContext().getModuleForCodeGen(); + Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); EmitDeferred(); @@ -527,6 +772,8 @@ void CodeGenModule::Release() { GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; } + // Module implementations are initialized the same way as a regular TU that + // imports one or more modules. if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else @@ -579,20 +826,6 @@ void CodeGenModule::Release() { EmitMainVoidAlias(); if (getTriple().isAMDGPU()) { - // Emit reference of __amdgpu_device_library_preserve_asan_functions to - // preserve ASAN functions in bitcode libraries. - if (LangOpts.Sanitize.has(SanitizerKind::Address)) { - auto *FT = llvm::FunctionType::get(VoidTy, {}); - auto *F = llvm::Function::Create( - FT, llvm::GlobalValue::ExternalLinkage, - "__amdgpu_device_library_preserve_asan_functions", &getModule()); - auto *Var = new llvm::GlobalVariable( - getModule(), FT->getPointerTo(), - /*isConstant=*/true, llvm::GlobalValue::WeakAnyLinkage, F, - "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr, - llvm::GlobalVariable::NotThreadLocal); - addCompilerUsedGlobal(Var); - } // Emit amdgpu_code_object_version module flag, which is code object version // times 100. if (getTarget().getTargetOpts().CodeObjectVersion != @@ -601,6 +834,17 @@ void CodeGenModule::Release() { "amdgpu_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); } + + // Currently, "-mprintf-kind" option is only supported for HIP + if (LangOpts.HIP) { + auto *MDStr = llvm::MDString::get( + getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal == + TargetOptions::AMDGPUPrintfKind::Hostcall) + ? "hostcall" + : "buffered"); + getModule().addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind", + MDStr); + } } // Emit a global array containing all external kernels or device variables @@ -845,7 +1089,7 @@ void CodeGenModule::Release() { // Indicate whether this Module was compiled with -fopenmp if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) getModule().addModuleFlag(llvm::Module::Max, "openmp", LangOpts.OpenMP); - if (getLangOpts().OpenMPIsDevice) + if (getLangOpts().OpenMPIsTargetDevice) getModule().addModuleFlag(llvm::Module::Max, "openmp-device", LangOpts.OpenMP); @@ -898,6 +1142,12 @@ void CodeGenModule::Release() { if (CodeGenOpts.NoPLT) getModule().setRtLibUseGOT(); + if (getTriple().isOSBinFormatELF() && + CodeGenOpts.DirectAccessExternalData != + getModule().getDirectAccessExternalData()) { + getModule().setDirectAccessExternalData( + CodeGenOpts.DirectAccessExternalData); + } if (CodeGenOpts.UnwindTables) getModule().setUwtable(llvm::UWTableKind(CodeGenOpts.UnwindTables)); @@ -918,7 +1168,8 @@ void CodeGenModule::Release() { if (getCodeGenOpts().EmitDeclMetadata) EmitDeclMetadata(); - if (getCodeGenOpts().EmitGcovArcs || getCodeGenOpts().EmitGcovNotes) + if (getCodeGenOpts().CoverageNotesFile.size() || + getCodeGenOpts().CoverageDataFile.size()) EmitCoverageFile(); if (CGDebugInfo *DI = getModuleDebugInfo()) @@ -946,6 +1197,10 @@ void CodeGenModule::Release() { if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); + if (getContext().getTargetInfo().getMaxTLSAlign()) + getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign", + getContext().getTargetInfo().getMaxTLSAlign()); + getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); @@ -977,9 +1232,9 @@ void CodeGenModule::EmitOpenCLMetadata() { } void CodeGenModule::EmitBackendOptionsMetadata( - const CodeGenOptions CodeGenOpts) { + const CodeGenOptions &CodeGenOpts) { if (getTriple().isRISCV()) { - getModule().addModuleFlag(llvm::Module::Error, "SmallDataLimit", + getModule().addModuleFlag(llvm::Module::Min, "SmallDataLimit", CodeGenOpts.SmallDataLimit); } } @@ -1347,8 +1602,13 @@ static void AppendTargetVersionMangling(const CodeGenModule &CGM, if (Attr->isDefaultVersion()) return; Out << "._"; + const TargetInfo &TI = CGM.getTarget(); llvm::SmallVector<StringRef, 8> Feats; Attr->getFeatures(Feats); + llvm::stable_sort(Feats, [&TI](const StringRef FeatL, const StringRef FeatR) { + return TI.multiVersionSortPriority(FeatL) < + TI.multiVersionSortPriority(FeatR); + }); for (const auto &Feat : Feats) { Out << 'M'; Out << Feat; @@ -1400,13 +1660,19 @@ static void AppendTargetClonesMangling(const CodeGenModule &CGM, const TargetClonesAttr *Attr, unsigned VersionIndex, raw_ostream &Out) { - if (CGM.getTarget().getTriple().isAArch64()) { + const TargetInfo &TI = CGM.getTarget(); + if (TI.getTriple().isAArch64()) { StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); if (FeatureStr == "default") return; Out << "._"; SmallVector<StringRef, 8> Features; FeatureStr.split(Features, "+"); + llvm::stable_sort(Features, + [&TI](const StringRef FeatL, const StringRef FeatR) { + return TI.multiVersionSortPriority(FeatL) < + TI.multiVersionSortPriority(FeatR); + }); for (auto &Feat : Features) { Out << 'M'; Out << Feat; @@ -1726,7 +1992,11 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) { std::string OutName; llvm::raw_string_ostream Out(OutName); - getCXXABI().getMangleContext().mangleTypeName(T, Out); + getCXXABI().getMangleContext().mangleTypeName( + T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); + + if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) + Out << ".normalized"; return llvm::ConstantInt::get(Int32Ty, static_cast<uint32_t>(llvm::xxHash64(OutName))); @@ -1981,22 +2251,6 @@ CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { return MostBases.takeVector(); } -llvm::GlobalVariable * -CodeGenModule::GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr) { - auto It = RTTIProxyMap.find(Addr); - if (It != RTTIProxyMap.end()) - return It->second; - - auto *FTRTTIProxy = new llvm::GlobalVariable( - TheModule, Addr->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Addr, - "__llvm_rtti_proxy"); - FTRTTIProxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - RTTIProxyMap[Addr] = FTRTTIProxy; - return FTRTTIProxy; -} - void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F) { llvm::AttrBuilder B(F->getContext()); @@ -2132,8 +2386,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { - if (F->getAlignment() < 2 && isa<CXXMethodDecl>(D)) - F->setAlignment(llvm::Align(2)); + if (F->getPointerAlignment(getDataLayout()) < 2 && isa<CXXMethodDecl>(D)) + F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne())); } // In the cross-dso CFI mode with canonical jump tables, we want !type @@ -2162,15 +2416,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, } } -void CodeGenModule::setLLVMFunctionFEnvAttributes(const FunctionDecl *D, - llvm::Function *F) { - if (D->hasAttr<StrictFPAttr>()) { - llvm::AttrBuilder FuncAttrs(F->getContext()); - FuncAttrs.addAttribute("strictfp"); - F->addFnAttrs(FuncAttrs); - } -} - void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { const Decl *D = GD.getDecl(); if (isa_and_nonnull<NamedDecl>(D)) @@ -2181,16 +2426,19 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { if (D && D->hasAttr<UsedAttr>()) addUsedOrCompilerUsedGlobal(GV); - if (CodeGenOpts.KeepStaticConsts && D && isa<VarDecl>(D)) { - const auto *VD = cast<VarDecl>(D); - if (VD->getType().isConstQualified() && - VD->getStorageDuration() == SD_Static) - addUsedOrCompilerUsedGlobal(GV); - } + if (const auto *VD = dyn_cast_if_present<VarDecl>(D); + VD && + ((CodeGenOpts.KeepPersistentStorageVariables && + (VD->getStorageDuration() == SD_Static || + VD->getStorageDuration() == SD_Thread)) || + (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && + VD->getType().isConstQualified()))) + addUsedOrCompilerUsedGlobal(GV); } bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, - llvm::AttrBuilder &Attrs) { + llvm::AttrBuilder &Attrs, + bool SetTargetFeatures) { // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. @@ -2233,8 +2481,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, if (SD) { // Apply the given CPU name as the 'tune-cpu' so that the optimizer can // favor this processor. - TuneCPU = getTarget().getCPUSpecificTuneName( - SD->getCPUName(GD.getMultiVersionIndex())->getName()); + TuneCPU = SD->getCPUName(GD.getMultiVersionIndex())->getName(); } } else { // Otherwise just add the existing target cpu and target features to the @@ -2250,7 +2497,10 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, Attrs.addAttribute("tune-cpu", TuneCPU); AddedAttr = true; } - if (!Features.empty()) { + if (!Features.empty() && SetTargetFeatures) { + llvm::erase_if(Features, [&](const std::string& F) { + return getTarget().isReadOnlyFeature(F.substr(1)); + }); llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); AddedAttr = true; @@ -2353,9 +2603,6 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, } void CodeGenModule::setKCFIType(const FunctionDecl *FD, llvm::Function *F) { - if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic()) - return; - llvm::LLVMContext &Ctx = F->getContext(); llvm::MDBuilder MDB(Ctx); F->setMetadata(llvm::LLVMContext::MD_kcfi_type, @@ -3067,12 +3314,14 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { if (LangOpts.EmitAllDecls) return true; - if (CodeGenOpts.KeepStaticConsts) { - const auto *VD = dyn_cast<VarDecl>(Global); - if (VD && VD->getType().isConstQualified() && - VD->getStorageDuration() == SD_Static) - return true; - } + const auto *VD = dyn_cast<VarDecl>(Global); + if (VD && + ((CodeGenOpts.KeepPersistentStorageVariables && + (VD->getStorageDuration() == SD_Static || + VD->getStorageDuration() == SD_Thread)) || + (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && + VD->getType().isConstQualified()))) + return true; return getContext().DeclMustBeEmitted(Global); } @@ -3115,7 +3364,7 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) && - !isTypeConstant(Global->getType(), false) && + !isTypeConstant(Global->getType(), false, false) && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global)) return false; @@ -3223,9 +3472,13 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject( return ConstantAddress::invalid(); } - auto *GV = new llvm::GlobalVariable( - getModule(), Init->getType(), - /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name); + llvm::GlobalValue::LinkageTypes Linkage = + isExternallyVisible(TPO->getLinkageAndVisibility().getLinkage()) + ? llvm::GlobalValue::LinkOnceODRLinkage + : llvm::GlobalValue::InternalLinkage; + auto *GV = new llvm::GlobalVariable(getModule(), Init->getType(), + /*isConstant=*/true, Linkage, Init, Name); + setGVProperties(GV, TPO); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); Emitter.finalize(GV); @@ -3318,7 +3571,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (MustBeEmitted(Global)) EmitOMPDeclareReduction(DRD); return; - } else if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Global)) { + } + if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareMapper(DMD); return; @@ -4007,7 +4261,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // the iFunc instead. Name Mangling will handle the rest of the changes. if (const FunctionDecl *FD = cast_or_null<FunctionDecl>(D)) { // For the device mark the function as one that should be emitted. - if (getLangOpts().OpenMPIsDevice && OpenMPRuntime && + if (getLangOpts().OpenMPIsTargetDevice && OpenMPRuntime && !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() && !DontDefer && !IsForDefinition) { if (const FunctionDecl *FDDef = FD->getDefinition()) { @@ -4184,13 +4438,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( /// GetAddrOfFunction - Return the address of the given function. If Ty is /// non-null, then this function will use the specified type if it has to /// create it (this occurs when we see a definition of the function). -llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, - llvm::Type *Ty, - bool ForVTable, - bool DontDefer, - ForDefinition_t IsForDefinition) { - assert(!cast<FunctionDecl>(GD.getDecl())->isConsteval() && - "consteval function should never be emitted"); +llvm::Constant * +CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, + bool DontDefer, + ForDefinition_t IsForDefinition) { // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast<FunctionDecl>(GD.getDecl()); @@ -4315,8 +4566,9 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, /// /// If ExcludeCtor is true, the duration when the object's constructor runs /// will not be considered. The caller will need to verify that the object is -/// not written to during its construction. -bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) { +/// not written to during its construction. ExcludeDtor works similarly. +bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor, + bool ExcludeDtor) { if (!Ty.isConstant(Context) && !Ty->isReferenceType()) return false; @@ -4324,7 +4576,7 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) { if (const CXXRecordDecl *Record = Context.getBaseElementType(Ty)->getAsCXXRecordDecl()) return ExcludeCtor && !Record->hasMutableFields() && - Record->hasTrivialDestructor(); + (Record->hasTrivialDestructor() || ExcludeDtor); } return true; @@ -4437,7 +4689,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, // FIXME: This code is overly simple and should be merged with other global // handling. - GV->setConstant(isTypeConstant(D->getType(), false)); + GV->setConstant(isTypeConstant(D->getType(), false, false)); GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); @@ -4514,7 +4766,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, } } - if (GV->isDeclaration()) { + if (D && + D->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly) { getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); // External HIP managed variables needed to be recorded for transformation // in both device and host compilations. @@ -4687,16 +4940,17 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::sycl_global; if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { - if (D && D->hasAttr<CUDAConstantAttr>()) - return LangAS::cuda_constant; - else if (D && D->hasAttr<CUDASharedAttr>()) - return LangAS::cuda_shared; - else if (D && D->hasAttr<CUDADeviceAttr>()) - return LangAS::cuda_device; - else if (D && D->getType().isConstQualified()) - return LangAS::cuda_constant; - else - return LangAS::cuda_device; + if (D) { + if (D->hasAttr<CUDAConstantAttr>()) + return LangAS::cuda_constant; + if (D->hasAttr<CUDASharedAttr>()) + return LangAS::cuda_shared; + if (D->hasAttr<CUDADeviceAttr>()) + return LangAS::cuda_device; + if (D->getType().isConstQualified()) + return LangAS::cuda_constant; + } + return LangAS::cuda_device; } if (LangOpts.OpenMP) { @@ -4807,6 +5061,10 @@ static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) { llvm_unreachable("No such linkage"); } +bool CodeGenModule::supportsCOMDAT() const { + return getTriple().supportsCOMDAT(); +} + void CodeGenModule::maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO) { if (!shouldBeInCOMDAT(*this, D)) @@ -4825,7 +5083,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // If this is OpenMP device, check if it is legal to emit this global // normally. - if (LangOpts.OpenMPIsDevice && OpenMPRuntime && + if (LangOpts.OpenMPIsTargetDevice && OpenMPRuntime && OpenMPRuntime->emitTargetGlobalVariable(D)) return; @@ -4973,7 +5231,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Is accessible from all the threads within the grid and from the host // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." - if (GV && LangOpts.CUDA) { + if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { if (Linkage != llvm::GlobalValue::InternalLinkage && (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || @@ -4992,7 +5250,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && - isTypeConstant(D->getType(), true)); + isTypeConstant(D->getType(), true, true)); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *SA = D->getAttr<SectionAttr>()) { @@ -5396,9 +5654,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, maybeSetTrivialComdat(*D, *Fn); - // Set CodeGen attributes that represent floating point environment. - setLLVMFunctionFEnvAttributes(D, Fn); - CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); @@ -5845,6 +6100,7 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { // Resize the string to the right size, which is indicated by its type. const ConstantArrayType *CAT = Context.getAsConstantArrayType(E->getType()); + assert(CAT && "String literal not of constant array type!"); Str.resize(CAT->getSize().getZExtValue()); return llvm::ConstantDataArray::getString(VMContext, Str, false); } @@ -6066,7 +6322,8 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( emitter.emplace(*this); InitialValue = emitter->emitForInitializer(*Value, AddrSpace, MaterializedType); - Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/Value); + Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/ Value, + /*ExcludeDtor*/ false); Type = InitialValue->getType(); } else { // No initializer, the initialization will be provided when we @@ -6228,6 +6485,10 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { } void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) { + // Device code should not be at top level. + if (LangOpts.CUDA && LangOpts.CUDAIsDevice) + return; + std::unique_ptr<CodeGenFunction> &CurCGF = GlobalTopLevelStmtBlockInFlight.first; @@ -6283,9 +6544,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { return; // Consteval function shouldn't be emitted. - if (auto *FD = dyn_cast<FunctionDecl>(D)) - if (FD->isConsteval()) - return; + if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->isImmediateFunction()) + return; switch (D->getKind()) { case Decl::CXXConversion: @@ -6459,7 +6719,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { if (LangOpts.CUDA && LangOpts.CUDAIsDevice) break; // File-scope asm is ignored during device-side OpenMP compilation. - if (LangOpts.OpenMPIsDevice) + if (LangOpts.OpenMPIsTargetDevice) break; // File-scope asm is ignored during device-side SYCL compilation. if (LangOpts.SYCLIsDevice) @@ -6511,16 +6771,14 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitTopLevelDecl(D); // Visit the submodules of this module. - for (clang::Module::submodule_iterator Sub = Mod->submodule_begin(), - SubEnd = Mod->submodule_end(); - Sub != SubEnd; ++Sub) { + for (auto *Submodule : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to emit // the initializers. - if ((*Sub)->IsExplicit) + if (Submodule->IsExplicit) continue; - if (Visited.insert(*Sub).second) - Stack.push_back(*Sub); + if (Visited.insert(Submodule).second) + Stack.push_back(Submodule); } } break; @@ -6869,10 +7127,6 @@ void CodeGenModule::EmitCommandLineMetadata() { } void CodeGenModule::EmitCoverageFile() { - if (getCodeGenOpts().CoverageDataFile.empty() && - getCodeGenOpts().CoverageNotesFile.empty()) - return; - llvm::NamedMDNode *CUNode = TheModule.getNamedMetadata("llvm.dbg.cu"); if (!CUNode) return; @@ -6895,10 +7149,8 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? - if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice || - (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && - getTriple().isNVPTX())) - return llvm::Constant::getNullValue(Int8PtrTy); + if (!shouldEmitRTTI(ForEH)) + return llvm::Constant::getNullValue(GlobalsInt8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && LangOpts.ObjCRuntime.isGNUFamily()) @@ -6942,7 +7194,12 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, if (isExternallyVisible(T->getLinkage())) { std::string OutName; llvm::raw_string_ostream Out(OutName); - getCXXABI().getMangleContext().mangleTypeName(T, Out); + getCXXABI().getMangleContext().mangleTypeName( + T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); + + if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) + Out << ".normalized"; + Out << Suffix; InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); @@ -7202,7 +7459,6 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { "Newly created module should not have manglings"); NewBuilder->Manglings = std::move(Manglings); - assert(WeakRefReferences.empty() && "Not all WeakRefRefs have been applied"); NewBuilder->WeakRefReferences = std::move(WeakRefReferences); NewBuilder->TBAA = std::move(TBAA); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index b3354657b237..05cb217e2bee 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -30,6 +30,7 @@ #include "clang/Basic/XRayLists.h" #include "clang/Lex/PreprocessorOptions.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" @@ -378,8 +379,7 @@ private: /// multiversion function resolvers and ifuncs are defined and emitted. std::vector<GlobalDecl> MultiVersionFuncs; - typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy; - ReplacementsTy Replacements; + llvm::MapVector<StringRef, llvm::TrackingVH<llvm::Constant>> Replacements; /// List of global values to be replaced with something else. Used when we /// want to replace a GlobalValue but can't identify it by its mangled name @@ -590,8 +590,6 @@ private: MetadataTypeMap VirtualMetadataIdMap; MetadataTypeMap GeneralizedMetadataIdMap; - llvm::DenseMap<const llvm::Constant *, llvm::GlobalVariable *> RTTIProxyMap; - // Helps squashing blocks of TopLevelStmtDecl into a single llvm::Function // when used with -fincremental-extensions. std::pair<std::unique_ptr<CodeGenFunction>, const TopLevelStmtDecl *> @@ -816,7 +814,7 @@ public: return getTBAAAccessInfo(AccessType); } - bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor); + bool isTypeConstant(QualType QTy, bool ExcludeCtor, bool ExcludeDtor); bool isPaddedAtomicType(QualType type); bool isPaddedAtomicType(const AtomicType *type); @@ -928,6 +926,13 @@ public: // Return the function body address of the given function. llvm::Constant *GetFunctionStart(const ValueDecl *Decl); + // Return whether RTTI information should be emitted for this target. + bool shouldEmitRTTI(bool ForEH = false) { + return (ForEH || getLangOpts().RTTI) && !getLangOpts().CUDAIsDevice && + !(getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice && + getTriple().isNVPTX()); + } + /// Get the address of the RTTI descriptor for the given type. llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false); @@ -1272,6 +1277,8 @@ public: /// function which relies on particular fast-math attributes for correctness. /// It's up to you to ensure that this is safe. void addDefaultFunctionDefinitionAttributes(llvm::Function &F); + void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, + bool WillInternalize); /// Like the overload taking a `Function &`, but intended specifically /// for frontends that want to build on Clang's target-configuration logic. @@ -1501,9 +1508,6 @@ public: std::vector<const CXXRecordDecl *> getMostBaseClasses(const CXXRecordDecl *RD); - llvm::GlobalVariable * - GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr); - /// Get the declaration of std::terminate for the platform. llvm::FunctionCallee getTerminateFn(); @@ -1581,7 +1585,8 @@ private: ForDefinition_t IsForDefinition = NotForDefinition); bool GetCPUAndFeaturesAttributes(GlobalDecl GD, - llvm::AttrBuilder &AttrBuilder); + llvm::AttrBuilder &AttrBuilder, + bool SetTargetFeatures = true); void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO); /// Set function attributes for a function declaration. @@ -1711,7 +1716,7 @@ private: /// Emit the module flag metadata used to pass options controlling the /// the backend to LLVM. - void EmitBackendOptionsMetadata(const CodeGenOptions CodeGenOpts); + void EmitBackendOptionsMetadata(const CodeGenOptions &CodeGenOpts); /// Emits OpenCL specific Metadata e.g. OpenCL version. void EmitOpenCLMetadata(); @@ -1734,6 +1739,12 @@ private: /// function. void SimplifyPersonality(); + /// Helper function for getDefaultFunctionAttributes. Builds a set of function + /// attributes which can be simply added to a function. + void getTrivialDefaultFunctionAttributes(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs); + /// Helper function for ConstructAttributeList and /// addDefaultFunctionDefinitionAttributes. Builds a set of function /// attributes to add to a function with the given properties. diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 15a3d74666ca..b80317529b72 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1036,7 +1036,7 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, llvm::Expected<llvm::InstrProfRecord> RecordExpected = PGOReader->getInstrProfRecord(FuncName, FunctionHash); if (auto E = RecordExpected.takeError()) { - auto IPE = llvm::InstrProfError::take(std::move(E)); + auto IPE = std::get<0>(llvm::InstrProfError::take(std::move(E))); if (IPE == llvm::instrprof_error::unknown_function) CGM.getPGOStats().addMissing(IsInMainFile); else if (IPE == llvm::instrprof_error::hash_mismatch) diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h index 66c93cba4bb0..392ec5a144fe 100644 --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -114,7 +114,12 @@ public: return 0; if (!haveRegionCounts()) return 0; - return RegionCounts[(*RegionCounterMap)[S]]; + // With profiles from a differing version of clang we can have mismatched + // decl counts. Don't crash in such a case. + auto Index = (*RegionCounterMap)[S]; + if (Index >= RegionCounts.size()) + return 0; + return RegionCounts[Index]; } }; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index abbf71daf1d5..30021794a0bb 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -125,93 +125,9 @@ bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const { return I != RecordDeclTypes.end() && !I->second->isOpaque(); } -static bool -isSafeToConvert(QualType T, CodeGenTypes &CGT, - llvm::SmallPtrSet<const RecordDecl*, 16> &AlreadyChecked); - - -/// isSafeToConvert - Return true if it is safe to convert the specified record -/// decl to IR and lay it out, false if doing so would cause us to get into a -/// recursive compilation mess. -static bool -isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT, - llvm::SmallPtrSet<const RecordDecl*, 16> &AlreadyChecked) { - // If we have already checked this type (maybe the same type is used by-value - // multiple times in multiple structure fields, don't check again. - if (!AlreadyChecked.insert(RD).second) - return true; - - const Type *Key = CGT.getContext().getTagDeclType(RD).getTypePtr(); - - // If this type is already laid out, converting it is a noop. - if (CGT.isRecordLayoutComplete(Key)) return true; - - // If this type is currently being laid out, we can't recursively compile it. - if (CGT.isRecordBeingLaidOut(Key)) - return false; - - // If this type would require laying out bases that are currently being laid - // out, don't do it. This includes virtual base classes which get laid out - // when a class is translated, even though they aren't embedded by-value into - // the class. - if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { - for (const auto &I : CRD->bases()) - if (!isSafeToConvert(I.getType()->castAs<RecordType>()->getDecl(), CGT, - AlreadyChecked)) - return false; - } - - // If this type would require laying out members that are currently being laid - // out, don't do it. - for (const auto *I : RD->fields()) - if (!isSafeToConvert(I->getType(), CGT, AlreadyChecked)) - return false; - - // If there are no problems, lets do it. - return true; -} - -/// isSafeToConvert - Return true if it is safe to convert this field type, -/// which requires the structure elements contained by-value to all be -/// recursively safe to convert. -static bool -isSafeToConvert(QualType T, CodeGenTypes &CGT, - llvm::SmallPtrSet<const RecordDecl*, 16> &AlreadyChecked) { - // Strip off atomic type sugar. - if (const auto *AT = T->getAs<AtomicType>()) - T = AT->getValueType(); - - // If this is a record, check it. - if (const auto *RT = T->getAs<RecordType>()) - return isSafeToConvert(RT->getDecl(), CGT, AlreadyChecked); - - // If this is an array, check the elements, which are embedded inline. - if (const auto *AT = CGT.getContext().getAsArrayType(T)) - return isSafeToConvert(AT->getElementType(), CGT, AlreadyChecked); - - // Otherwise, there is no concern about transforming this. We only care about - // things that are contained by-value in a structure that can have another - // structure as a member. - return true; -} - - -/// isSafeToConvert - Return true if it is safe to convert the specified record -/// decl to IR and lay it out, false if doing so would cause us to get into a -/// recursive compilation mess. -static bool isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT) { - // If no structs are being laid out, we can certainly do this one. - if (CGT.noRecordsBeingLaidOut()) return true; - - llvm::SmallPtrSet<const RecordDecl*, 16> AlreadyChecked; - return isSafeToConvert(RD, CGT, AlreadyChecked); -} - /// isFuncParamTypeConvertible - Return true if the specified type in a /// function parameter or result position can be converted to an IR type at this -/// point. This boils down to being whether it is complete, as well as whether -/// we've temporarily deferred expanding the type because we're in a recursive -/// context. +/// point. This boils down to being whether it is complete. bool CodeGenTypes::isFuncParamTypeConvertible(QualType Ty) { // Some ABIs cannot have their member pointers represented in IR unless // certain circumstances have been reached. @@ -223,21 +139,7 @@ bool CodeGenTypes::isFuncParamTypeConvertible(QualType Ty) { if (!TT) return true; // Incomplete types cannot be converted. - if (TT->isIncompleteType()) - return false; - - // If this is an enum, then it is always safe to convert. - const RecordType *RT = dyn_cast<RecordType>(TT); - if (!RT) return true; - - // Otherwise, we have to be careful. If it is a struct that we're in the - // process of expanding, then we can't convert the function type. That's ok - // though because we must be in a pointer context under the struct, so we can - // just convert it to a dummy type. - // - // We decide this by checking whether ConvertRecordDeclType returns us an - // opaque type for a struct that we know is defined. - return isSafeToConvert(RT->getDecl(), *this); + return !TT->isIncompleteType(); } @@ -333,7 +235,6 @@ static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext, llvm::Type *CodeGenTypes::ConvertFunctionTypeInternal(QualType QFT) { assert(QFT.isCanonical()); - const Type *Ty = QFT.getTypePtr(); const FunctionType *FT = cast<FunctionType>(QFT.getTypePtr()); // First, check whether we can build the full function type. If the // function type depends on an incomplete type (e.g. a struct or enum), we @@ -356,14 +257,6 @@ llvm::Type *CodeGenTypes::ConvertFunctionTypeInternal(QualType QFT) { return llvm::StructType::get(getLLVMContext()); } - // While we're converting the parameter types for a function, we don't want - // to recursively convert any pointed-to structs. Converting directly-used - // structs is ok though. - if (!RecordsBeingLaidOut.insert(Ty).second) { - SkippedLayout = true; - return llvm::StructType::get(getLLVMContext()); - } - // The function type can be built; call the appropriate routines to // build it. const CGFunctionInfo *FI; @@ -389,11 +282,6 @@ llvm::Type *CodeGenTypes::ConvertFunctionTypeInternal(QualType QFT) { ResultType = GetFunctionType(*FI); } - RecordsBeingLaidOut.erase(Ty); - - if (RecordsBeingLaidOut.empty()) - while (!DeferredRecords.empty()) - ConvertRecordDeclType(DeferredRecords.pop_back_val()); return ResultType; } @@ -421,27 +309,16 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { if (const RecordType *RT = dyn_cast<RecordType>(Ty)) return ConvertRecordDeclType(RT->getDecl()); - // The LLVM type we return for a given Clang type may not always be the same, - // most notably when dealing with recursive structs. We mark these potential - // cases with ShouldUseCache below. Builtin types cannot be recursive. - // TODO: when clang uses LLVM opaque pointers we won't be able to represent - // recursive types with LLVM types, making this logic much simpler. llvm::Type *CachedType = nullptr; - bool ShouldUseCache = - Ty->isBuiltinType() || - (noRecordsBeingLaidOut() && FunctionsBeingProcessed.empty()); - if (ShouldUseCache) { - llvm::DenseMap<const Type *, llvm::Type *>::iterator TCI = - TypeCache.find(Ty); - if (TCI != TypeCache.end()) - CachedType = TCI->second; - // With expensive checks, check that the type we compute matches the - // cached type. + auto TCI = TypeCache.find(Ty); + if (TCI != TypeCache.end()) + CachedType = TCI->second; + // With expensive checks, check that the type we compute matches the + // cached type. #ifndef EXPENSIVE_CHECKS - if (CachedType) - return CachedType; + if (CachedType) + return CachedType; #endif - } // If we don't have it in the cache, convert it now. llvm::Type *ResultType = nullptr; @@ -596,6 +473,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::SveInt64x4: case BuiltinType::SveUint64x4: case BuiltinType::SveBool: + case BuiltinType::SveBoolx2: + case BuiltinType::SveBoolx4: case BuiltinType::SveFloat16: case BuiltinType::SveFloat16x2: case BuiltinType::SveFloat16x3: @@ -618,6 +497,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { Info.EC.getKnownMinValue() * Info.NumVectors); } + case BuiltinType::SveCount: + return llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ ResultType = \ @@ -626,14 +507,31 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" - { - ASTContext::BuiltinVectorTypeInfo Info = - Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty)); - return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), - Info.EC.getKnownMinValue() * - Info.NumVectors); - } - case BuiltinType::Dependent: + { + ASTContext::BuiltinVectorTypeInfo Info = + Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty)); + // Tuple types are expressed as aggregregate types of the same scalable + // vector type (e.g. vint32m1x2_t is two vint32m1_t, which is {<vscale x + // 2 x i32>, <vscale x 2 x i32>}). + if (Info.NumVectors != 1) { + llvm::Type *EltTy = llvm::ScalableVectorType::get( + ConvertType(Info.ElementType), Info.EC.getKnownMinValue()); + llvm::SmallVector<llvm::Type *, 4> EltTys(Info.NumVectors, EltTy); + return llvm::StructType::get(getLLVMContext(), EltTys); + } + return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), + Info.EC.getKnownMinValue() * + Info.NumVectors); + } +#define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS) \ + case BuiltinType::Id: { \ + if (BuiltinType::Id == BuiltinType::WasmExternRef) \ + ResultType = CGM.getTargetCodeGenInfo().getWasmExternrefReferenceType(); \ + else \ + llvm_unreachable("Unexpected wasm reference builtin type!"); \ + } break; +#include "clang/Basic/WebAssemblyReferenceTypes.def" + case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ case BuiltinType::Id: @@ -654,19 +552,15 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case Type::RValueReference: { const ReferenceType *RTy = cast<ReferenceType>(Ty); QualType ETy = RTy->getPointeeType(); - llvm::Type *PointeeType = ConvertTypeForMem(ETy); unsigned AS = getTargetAddressSpace(ETy); - ResultType = llvm::PointerType::get(PointeeType, AS); + ResultType = llvm::PointerType::get(getLLVMContext(), AS); break; } case Type::Pointer: { const PointerType *PTy = cast<PointerType>(Ty); QualType ETy = PTy->getPointeeType(); - llvm::Type *PointeeType = ConvertTypeForMem(ETy); - if (PointeeType->isVoidTy()) - PointeeType = llvm::Type::getInt8Ty(getLLVMContext()); unsigned AS = getTargetAddressSpace(ETy); - ResultType = llvm::PointerType::get(PointeeType, AS); + ResultType = llvm::PointerType::get(getLLVMContext(), AS); break; } @@ -743,15 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; } - case Type::ObjCObjectPointer: { - // Protocol qualifications do not influence the LLVM type, we just return a - // pointer to the underlying interface type. We don't need to worry about - // recursive conversion. - llvm::Type *T = - ConvertTypeForMem(cast<ObjCObjectPointerType>(Ty)->getPointeeType()); - ResultType = T->getPointerTo(); + case Type::ObjCObjectPointer: + ResultType = llvm::PointerType::getUnqual(getLLVMContext()); break; - } case Type::Enum: { const EnumDecl *ED = cast<EnumType>(Ty)->getDecl(); @@ -765,18 +653,15 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } case Type::BlockPointer: { - const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); - llvm::Type *PointeeType = CGM.getLangOpts().OpenCL - ? CGM.getGenericBlockLiteralType() - : ConvertTypeForMem(FTy); // Block pointers lower to function type. For function type, // getTargetAddressSpace() returns default address space for // function pointer i.e. program address space. Therefore, for block // pointers, it is important to pass the pointee AST address space when // calling getTargetAddressSpace(), to ensure that we get the LLVM IR // address space for data pointers and not function pointers. + const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); unsigned AS = Context.getTargetAddressSpace(FTy.getAddressSpace()); - ResultType = llvm::PointerType::get(PointeeType, AS); + ResultType = llvm::PointerType::get(getLLVMContext(), AS); break; } @@ -827,8 +712,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { assert((!CachedType || CachedType == ResultType) && "Cached type doesn't match computed type"); - if (ShouldUseCache) - TypeCache[Ty] = ResultType; + TypeCache[Ty] = ResultType; return ResultType; } @@ -861,17 +745,6 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { if (!RD || !RD->isCompleteDefinition() || !Ty->isOpaque()) return Ty; - // If converting this type would cause us to infinitely loop, don't do it! - if (!isSafeToConvert(RD, *this)) { - DeferredRecords.push_back(RD); - return Ty; - } - - // Okay, this is a definition of a type. Compile the implementation now. - bool InsertResult = RecordsBeingLaidOut.insert(Key).second; - (void)InsertResult; - assert(InsertResult && "Recursively compiling a struct?"); - // Force conversion of non-virtual base classes recursively. if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { for (const auto &I : CRD->bases()) { @@ -884,22 +757,12 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { std::unique_ptr<CGRecordLayout> Layout = ComputeRecordLayout(RD, Ty); CGRecordLayouts[Key] = std::move(Layout); - // We're done laying out this struct. - bool EraseResult = RecordsBeingLaidOut.erase(Key); (void)EraseResult; - assert(EraseResult && "struct not in RecordsBeingLaidOut set?"); - // If this struct blocked a FunctionType conversion, then recompute whatever // was derived from that. // FIXME: This is hugely overconservative. if (SkippedLayout) TypeCache.clear(); - // If we're done converting the outer-most record, then convert any deferred - // structs as well. - if (RecordsBeingLaidOut.empty()) - while (!DeferredRecords.empty()) - ConvertRecordDeclType(DeferredRecords.pop_back_val()); - return Ty; } diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index e76fda95513f..9088f77b95c3 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -78,20 +78,12 @@ class CodeGenTypes { /// Hold memoized CGFunctionInfo results. llvm::FoldingSet<CGFunctionInfo> FunctionInfos{FunctionInfosLog2InitSize}; - /// This set keeps track of records that we're currently converting - /// to an IR type. For example, when converting: - /// struct A { struct B { int x; } } when processing 'x', the 'A' and 'B' - /// types will be in this set. - llvm::SmallPtrSet<const Type*, 4> RecordsBeingLaidOut; - llvm::SmallPtrSet<const CGFunctionInfo*, 4> FunctionsBeingProcessed; /// True if we didn't layout a function due to a being inside /// a recursive struct conversion, set this to true. bool SkippedLayout; - SmallVector<const RecordDecl *, 8> DeferredRecords; - /// This map keeps cache of llvm::Types and maps clang::Type to /// corresponding llvm::Type. llvm::DenseMap<const Type *, llvm::Type *> TypeCache; @@ -300,12 +292,6 @@ public: // These are internal details of CGT that shouldn't be used externally. bool isZeroInitializable(const RecordDecl *RD); bool isRecordLayoutComplete(const Type *Ty) const; - bool noRecordsBeingLaidOut() const { - return RecordsBeingLaidOut.empty(); - } - bool isRecordBeingLaidOut(const Type *Ty) const { - return RecordsBeingLaidOut.count(Ty); - } unsigned getTargetAddressSpace(QualType T) const; }; diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h index 1a7a181ca7f0..a55da0dcad79 100644 --- a/clang/lib/CodeGen/ConstantEmitter.h +++ b/clang/lib/CodeGen/ConstantEmitter.h @@ -42,7 +42,7 @@ private: /// The AST address space where this (non-abstract) initializer is going. /// Used for generating appropriate placeholders. - LangAS DestAddressSpace; + LangAS DestAddressSpace = LangAS::Default; llvm::SmallVector<std::pair<llvm::Constant *, llvm::GlobalVariable*>, 4> PlaceholderAddresses; diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 101cd6a67b49..bb4c6f5e0cde 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -37,6 +37,11 @@ static llvm::cl::opt<bool> EmptyLineCommentCoverage( "disable it on test)"), llvm::cl::init(true), llvm::cl::Hidden); +static llvm::cl::opt<bool> SystemHeadersCoverage( + "system-headers-coverage", + llvm::cl::desc("Enable collecting coverage from system headers"), + llvm::cl::init(false), llvm::cl::Hidden); + using namespace clang; using namespace CodeGen; using namespace llvm::coverage; @@ -301,8 +306,9 @@ public: if (!Visited.insert(File).second) continue; - // Do not map FileID's associated with system headers. - if (SM.isInSystemHeader(SM.getSpellingLoc(Loc))) + // Do not map FileID's associated with system headers unless collecting + // coverage from system headers is explicitly enabled. + if (!SystemHeadersCoverage && SM.isInSystemHeader(SM.getSpellingLoc(Loc))) continue; unsigned Depth = 0; @@ -416,8 +422,10 @@ public: SourceLocation LocStart = Region.getBeginLoc(); assert(SM.getFileID(LocStart).isValid() && "region in invalid file"); - // Ignore regions from system headers. - if (SM.isInSystemHeader(SM.getSpellingLoc(LocStart))) + // Ignore regions from system headers unless collecting coverage from + // system headers is explicitly enabled. + if (!SystemHeadersCoverage && + SM.isInSystemHeader(SM.getSpellingLoc(LocStart))) continue; auto CovFileID = getCoverageFileID(LocStart); @@ -594,6 +602,19 @@ struct CounterCoverageMappingBuilder MostRecentLocation = *StartLoc; } + // If either of these locations is invalid, something elsewhere in the + // compiler has broken. + assert((!StartLoc || StartLoc->isValid()) && "Start location is not valid"); + assert((!EndLoc || EndLoc->isValid()) && "End location is not valid"); + + // However, we can still recover without crashing. + // If either location is invalid, set it to std::nullopt to avoid + // letting users of RegionStack think that region has a valid start/end + // location. + if (StartLoc && StartLoc->isInvalid()) + StartLoc = std::nullopt; + if (EndLoc && EndLoc->isInvalid()) + EndLoc = std::nullopt; RegionStack.emplace_back(Count, FalseCount, StartLoc, EndLoc); return RegionStack.size() - 1; @@ -616,7 +637,8 @@ struct CounterCoverageMappingBuilder assert(RegionStack.size() >= ParentIndex && "parent not in stack"); while (RegionStack.size() > ParentIndex) { SourceMappingRegion &Region = RegionStack.back(); - if (Region.hasStartLoc()) { + if (Region.hasStartLoc() && + (Region.hasEndLoc() || RegionStack[ParentIndex].hasEndLoc())) { SourceLocation StartLoc = Region.getBeginLoc(); SourceLocation EndLoc = Region.hasEndLoc() ? Region.getEndLoc() @@ -683,7 +705,7 @@ struct CounterCoverageMappingBuilder assert(SM.isWrittenInSameFile(Region.getBeginLoc(), EndLoc)); assert(SpellingRegion(SM, Region).isInSourceOrder()); SourceRegions.push_back(Region); - } + } RegionStack.pop_back(); } } @@ -1000,8 +1022,10 @@ struct CounterCoverageMappingBuilder void VisitDecl(const Decl *D) { Stmt *Body = D->getBody(); - // Do not propagate region counts into system headers. - if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) + // Do not propagate region counts into system headers unless collecting + // coverage from system headers is explicitly enabled. + if (!SystemHeadersCoverage && Body && + SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) return; // Do not visit the artificial children nodes of defaulted methods. The @@ -1456,6 +1480,7 @@ struct CounterCoverageMappingBuilder Counter TrueCount = getRegionCounter(E); propagateCounts(ParentCount, E->getCond()); + Counter OutCount; if (!isa<BinaryConditionalOperator>(E)) { // The 'then' count applies to the area immediately after the condition. @@ -1465,12 +1490,18 @@ struct CounterCoverageMappingBuilder fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), TrueCount); extendRegion(E->getTrueExpr()); - propagateCounts(TrueCount, E->getTrueExpr()); + OutCount = propagateCounts(TrueCount, E->getTrueExpr()); } extendRegion(E->getFalseExpr()); - propagateCounts(subtractCounters(ParentCount, TrueCount), - E->getFalseExpr()); + OutCount = addCounters( + OutCount, propagateCounts(subtractCounters(ParentCount, TrueCount), + E->getFalseExpr())); + + if (OutCount != ParentCount) { + pushRegion(OutCount); + GapRegionCounter = OutCount; + } // Create Branch Region around condition. createBranchRegion(E->getCond(), TrueCount, @@ -1504,9 +1535,19 @@ struct CounterCoverageMappingBuilder subtractCounters(RHSExecCnt, RHSTrueCnt)); } + // Determine whether the right side of OR operation need to be visited. + bool shouldVisitRHS(const Expr *LHS) { + bool LHSIsTrue = false; + bool LHSIsConst = false; + if (!LHS->isValueDependent()) + LHSIsConst = LHS->EvaluateAsBooleanCondition( + LHSIsTrue, CVM.getCodeGenModule().getContext()); + return !LHSIsConst || (LHSIsConst && !LHSIsTrue); + } + void VisitBinLOr(const BinaryOperator *E) { extendRegion(E->getLHS()); - propagateCounts(getRegion().getCounter(), E->getLHS()); + Counter OutCount = propagateCounts(getRegion().getCounter(), E->getLHS()); handleFileExit(getEnd(E->getLHS())); // Counter tracks the right hand side of a logical or operator. @@ -1519,6 +1560,10 @@ struct CounterCoverageMappingBuilder // Extract the RHS's "False" Instance Counter. Counter RHSFalseCnt = getRegionCounter(E->getRHS()); + if (!shouldVisitRHS(E->getLHS())) { + GapRegionCounter = OutCount; + } + // Extract the Parent Region Counter. Counter ParentCnt = getRegion().getCounter(); @@ -1535,6 +1580,15 @@ struct CounterCoverageMappingBuilder // Lambdas are treated as their own functions for now, so we shouldn't // propagate counts into them. } + + void VisitPseudoObjectExpr(const PseudoObjectExpr *POE) { + // Just visit syntatic expression as this is what users actually write. + VisitStmt(POE->getSyntacticForm()); + } + + void VisitOpaqueValueExpr(const OpaqueValueExpr* OVE) { + Visit(OVE->getSourceExpr()); + } }; } // end anonymous namespace @@ -1580,9 +1634,7 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, CoverageMappingModuleGen::CoverageMappingModuleGen( CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) - : CGM(CGM), SourceInfo(SourceInfo) { - CoveragePrefixMap = CGM.getCodeGenOpts().CoveragePrefixMap; -} + : CGM(CGM), SourceInfo(SourceInfo) {} std::string CoverageMappingModuleGen::getCurrentDirname() { if (!CGM.getCodeGenOpts().CoverageCompilationDir.empty()) @@ -1596,8 +1648,13 @@ std::string CoverageMappingModuleGen::getCurrentDirname() { std::string CoverageMappingModuleGen::normalizeFilename(StringRef Filename) { llvm::SmallString<256> Path(Filename); llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); - for (const auto &Entry : CoveragePrefixMap) { - if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second)) + + /// Traverse coverage prefix map in reverse order because prefix replacements + /// are applied in reverse order starting from the last one when multiple + /// prefix replacement options are provided. + for (const auto &[From, To] : + llvm::reverse(CGM.getCodeGenOpts().CoveragePrefixMap)) { + if (llvm::sys::path::replace_path_prefix(Path, From, To)) break; } return Path.str().str(); diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h index f5282601b640..eca68d9abd79 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -107,7 +107,6 @@ class CoverageMappingModuleGen { llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries; std::vector<llvm::Constant *> FunctionNames; std::vector<FunctionInfo> FunctionRecords; - std::map<std::string, std::string> CoveragePrefixMap; std::string getCurrentDirname(); std::string normalizeFilename(StringRef Filename); diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h index cd649cb11f9b..3c8a51590d1b 100644 --- a/clang/lib/CodeGen/EHScopeStack.h +++ b/clang/lib/CodeGen/EHScopeStack.h @@ -148,6 +148,12 @@ public: public: Cleanup(const Cleanup &) = default; Cleanup(Cleanup &&) {} + + // The copy and move assignment operator is defined as deleted pending + // further motivation. + Cleanup &operator=(const Cleanup &) = delete; + Cleanup &operator=(Cleanup &&) = delete; + Cleanup() = default; virtual bool isRedundantBeforeReturn() { return false; } @@ -272,6 +278,9 @@ public: CGF(nullptr) {} ~EHScopeStack() { delete[] StartOfBuffer; } + EHScopeStack(const EHScopeStack &) = delete; + EHScopeStack &operator=(const EHScopeStack &) = delete; + /// Push a lazily-created cleanup on the stack. template <class T, class... As> void pushCleanup(CleanupKind Kind, As... A) { static_assert(alignof(T) <= ScopeStackAlignment, diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 18403036e700..79a926cb9edd 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -36,6 +36,8 @@ #include "llvm/IR/Value.h" #include "llvm/Support/ScopedPrinter.h" +#include <optional> + using namespace clang; using namespace CodeGen; @@ -185,14 +187,58 @@ public: bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr, QualType SrcRecordTy) override; - llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value, + /// Determine whether we know that all instances of type RecordTy will have + /// the same vtable pointer values, that is distinct from all other vtable + /// pointers. While this is required by the Itanium ABI, it doesn't happen in + /// practice in some cases due to language extensions. + bool hasUniqueVTablePointer(QualType RecordTy) { + const CXXRecordDecl *RD = RecordTy->getAsCXXRecordDecl(); + + // Under -fapple-kext, multiple definitions of the same vtable may be + // emitted. + if (!CGM.getCodeGenOpts().AssumeUniqueVTables || + getContext().getLangOpts().AppleKext) + return false; + + // If the type_info* would be null, the vtable might be merged with that of + // another type. + if (!CGM.shouldEmitRTTI()) + return false; + + // If there's only one definition of the vtable in the program, it has a + // unique address. + if (!llvm::GlobalValue::isWeakForLinker(CGM.getVTableLinkage(RD))) + return true; + + // Even if there are multiple definitions of the vtable, they are required + // by the ABI to use the same symbol name, so should be merged at load + // time. However, if the class has hidden visibility, there can be + // different versions of the class in different modules, and the ABI + // library might treat them as being the same. + if (CGM.GetLLVMVisibility(RD->getVisibility()) != + llvm::GlobalValue::DefaultVisibility) + return false; + + return true; + } + + bool shouldEmitExactDynamicCast(QualType DestRecordTy) override { + return hasUniqueVTablePointer(DestRecordTy); + } + + llvm::Value *emitDynamicCastCall(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) override; - llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, - QualType SrcRecordTy, - QualType DestTy) override; + llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address ThisAddr, + QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy, + llvm::BasicBlock *CastSuccess, + llvm::BasicBlock *CastFail) override; + + llvm::Value *emitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, + QualType SrcRecordTy) override; bool EmitBadCastCall(CodeGenFunction &CGF) override; @@ -580,13 +626,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGBuilderTy &Builder = CGF.Builder; const FunctionProtoType *FPT = - MPT->getPointeeType()->getAs<FunctionProtoType>(); + MPT->getPointeeType()->castAs<FunctionProtoType>(); auto *RD = cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( - CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); - llvm::Constant *ptrdiff_1 = llvm::ConstantInt::get(CGM.PtrDiffTy, 1); llvm::BasicBlock *FnVirtual = CGF.createBasicBlock("memptr.virtual"); @@ -628,7 +671,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGF.EmitBlock(FnVirtual); // Cast the adjusted this to a pointer to vtable pointer and load. - llvm::Type *VTableTy = Builder.getInt8PtrTy(); + llvm::Type *VTableTy = CGF.CGM.GlobalsInt8PtrTy; CharUnits VTablePtrAlign = CGF.CGM.getDynamicOffsetAlignment(ThisAddr.getAlignment(), RD, CGF.getPointerAlign()); @@ -687,8 +730,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId}); CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0); - VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(), - "memptr.virtualfn"); } else { // When not doing VFE, emit a normal load, as it allows more // optimisations than type.checked.load. @@ -709,15 +750,12 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGM.getIntrinsic(llvm::Intrinsic::load_relative, {VTableOffset->getType()}), {VTable, VTableOffset}); - VirtualFn = CGF.Builder.CreateBitCast(VirtualFn, FTy->getPointerTo()); } else { llvm::Value *VFPAddr = CGF.Builder.CreateGEP(CGF.Int8Ty, VTable, VTableOffset); - VFPAddr = CGF.Builder.CreateBitCast( - VFPAddr, FTy->getPointerTo()->getPointerTo()); VirtualFn = CGF.Builder.CreateAlignedLoad( - FTy->getPointerTo(), VFPAddr, CGF.getPointerAlign(), - "memptr.virtualfn"); + llvm::PointerType::getUnqual(CGF.getLLVMContext()), VFPAddr, + CGF.getPointerAlign(), "memptr.virtualfn"); } } assert(VirtualFn && "Virtual fuction pointer not created!"); @@ -757,8 +795,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // In the non-virtual path, the function pointer is actually a // function pointer. CGF.EmitBlock(FnNonVirtual); - llvm::Value *NonVirtualFn = - Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn"); + llvm::Value *NonVirtualFn = Builder.CreateIntToPtr( + FnAsInt, llvm::PointerType::getUnqual(CGF.getLLVMContext()), + "memptr.nonvirtualfn"); // Check the function pointer if CFI on member function pointers is enabled. if (ShouldEmitCFICheck) { @@ -799,7 +838,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( // We're done. CGF.EmitBlock(FnEnd); - llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2); + llvm::PHINode *CalleePtr = + Builder.CreatePHI(llvm::PointerType::getUnqual(CGF.getLLVMContext()), 2); CalleePtr->addIncoming(VirtualFn, FnVirtual); CalleePtr->addIncoming(NonVirtualFn, FnNonVirtual); @@ -816,18 +856,9 @@ llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress( CGBuilderTy &Builder = CGF.Builder; - // Cast to char*. - Base = Builder.CreateElementBitCast(Base, CGF.Int8Ty); - // Apply the offset, which we assume is non-null. - llvm::Value *Addr = Builder.CreateInBoundsGEP( - Base.getElementType(), Base.getPointer(), MemPtr, "memptr.offset"); - - // Cast the address to the appropriate pointer type, adopting the - // address space of the base pointer. - llvm::Type *PType = CGF.ConvertTypeForMem(MPT->getPointeeType()) - ->getPointerTo(Base.getAddressSpace()); - return Builder.CreateBitCast(Addr, PType); + return Builder.CreateInBoundsGEP(CGF.Int8Ty, Base.getPointer(), MemPtr, + "memptr.offset"); } /// Perform a bitcast, derived-to-base, or base-to-derived member pointer @@ -1212,13 +1243,14 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // Grab the vtable pointer as an intptr_t*. auto *ClassDecl = cast<CXXRecordDecl>(ElementType->castAs<RecordType>()->getDecl()); - llvm::Value *VTable = - CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl); + llvm::Value *VTable = CGF.GetVTablePtr( + Ptr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), ClassDecl); // Track back to entry -2 and pull out the offset there. llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64( CGF.IntPtrTy, VTable, -2, "complete-offset.ptr"); - llvm::Value *Offset = CGF.Builder.CreateAlignedLoad(CGF.IntPtrTy, OffsetPtr, CGF.getPointerAlign()); + llvm::Value *Offset = CGF.Builder.CreateAlignedLoad(CGF.IntPtrTy, OffsetPtr, + CGF.getPointerAlign()); // Apply the offset. llvm::Value *CompletePtr = @@ -1268,7 +1300,7 @@ static llvm::FunctionCallee getThrowFn(CodeGenModule &CGM) { // void __cxa_throw(void *thrown_exception, std::type_info *tinfo, // void (*dest) (void *)); - llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy }; + llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.GlobalsInt8PtrTy, CGM.Int8PtrTy }; llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false); @@ -1417,8 +1449,8 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, llvm::Type *StdTypeInfoPtrTy) { auto *ClassDecl = cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); - llvm::Value *Value = - CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl); + llvm::Value *Value = CGF.GetVTablePtr( + ThisPtr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), ClassDecl); if (CGM.getItaniumVTableContext().isRelativeLayout()) { // Load the type info. @@ -1426,9 +1458,6 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, Value = CGF.Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}), {Value, llvm::ConstantInt::get(CGM.Int32Ty, -4)}); - - // Setup to dereference again since this is a proxy we accessed. - Value = CGF.Builder.CreateBitCast(Value, StdTypeInfoPtrTy->getPointerTo()); } else { // Load the type info. Value = @@ -1443,12 +1472,11 @@ bool ItaniumCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr, return SrcIsPtr; } -llvm::Value *ItaniumCXXABI::EmitDynamicCastCall( +llvm::Value *ItaniumCXXABI::emitDynamicCastCall( CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) { llvm::Type *PtrDiffLTy = CGF.ConvertType(CGF.getContext().getPointerDiffType()); - llvm::Type *DestLTy = CGF.ConvertType(DestTy); llvm::Value *SrcRTTI = CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType()); @@ -1463,12 +1491,9 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastCall( computeOffsetHint(CGF.getContext(), SrcDecl, DestDecl).getQuantity()); // Emit the call to __dynamic_cast. - llvm::Value *Value = ThisAddr.getPointer(); - Value = CGF.EmitCastToVoidPtr(Value); - - llvm::Value *args[] = {Value, SrcRTTI, DestRTTI, OffsetHint}; - Value = CGF.EmitNounwindRuntimeCall(getItaniumDynamicCastFn(CGF), args); - Value = CGF.Builder.CreateBitCast(Value, DestLTy); + llvm::Value *Args[] = {ThisAddr.getPointer(), SrcRTTI, DestRTTI, OffsetHint}; + llvm::Value *Value = + CGF.EmitNounwindRuntimeCall(getItaniumDynamicCastFn(CGF), Args); /// C++ [expr.dynamic.cast]p9: /// A failed cast to reference type throws std::bad_cast @@ -1486,18 +1511,95 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastCall( return Value; } -llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, +llvm::Value *ItaniumCXXABI::emitExactDynamicCast( + CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, + QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastSuccess, + llvm::BasicBlock *CastFail) { + ASTContext &Context = getContext(); + + // Find all the inheritance paths. + const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl(); + const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl(); + CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true, + /*DetectVirtual=*/false); + (void)DestDecl->isDerivedFrom(SrcDecl, Paths); + + // Find an offset within `DestDecl` where a `SrcDecl` instance and its vptr + // might appear. + std::optional<CharUnits> Offset; + for (const CXXBasePath &Path : Paths) { + // dynamic_cast only finds public inheritance paths. + if (Path.Access != AS_public) + continue; + + CharUnits PathOffset; + for (const CXXBasePathElement &PathElement : Path) { + // Find the offset along this inheritance step. + const CXXRecordDecl *Base = + PathElement.Base->getType()->getAsCXXRecordDecl(); + if (PathElement.Base->isVirtual()) { + // For a virtual base class, we know that the derived class is exactly + // DestDecl, so we can use the vbase offset from its layout. + const ASTRecordLayout &L = Context.getASTRecordLayout(DestDecl); + PathOffset = L.getVBaseClassOffset(Base); + } else { + const ASTRecordLayout &L = + Context.getASTRecordLayout(PathElement.Class); + PathOffset += L.getBaseClassOffset(Base); + } + } + + if (!Offset) + Offset = PathOffset; + else if (Offset != PathOffset) { + // Base appears in at least two different places. Find the most-derived + // object and see if it's a DestDecl. Note that the most-derived object + // must be at least as aligned as this base class subobject, and must + // have a vptr at offset 0. + ThisAddr = Address(emitDynamicCastToVoid(CGF, ThisAddr, SrcRecordTy), + CGF.VoidPtrTy, ThisAddr.getAlignment()); + SrcDecl = DestDecl; + Offset = CharUnits::Zero(); + break; + } + } + + if (!Offset) { + // If there are no public inheritance paths, the cast always fails. + CGF.EmitBranch(CastFail); + return llvm::PoisonValue::get(CGF.VoidPtrTy); + } + + // Compare the vptr against the expected vptr for the destination type at + // this offset. Note that we do not know what type ThisAddr points to in + // the case where the derived class multiply inherits from the base class + // so we can't use GetVTablePtr, so we load the vptr directly instead. + llvm::Instruction *VPtr = CGF.Builder.CreateLoad( + ThisAddr.withElementType(CGF.VoidPtrPtrTy), "vtable"); + CGM.DecorateInstructionWithTBAA( + VPtr, CGM.getTBAAVTablePtrAccessInfo(CGF.VoidPtrPtrTy)); + llvm::Value *Success = CGF.Builder.CreateICmpEQ( + VPtr, getVTableAddressPoint(BaseSubobject(SrcDecl, *Offset), DestDecl)); + llvm::Value *Result = ThisAddr.getPointer(); + if (!Offset->isZero()) + Result = CGF.Builder.CreateInBoundsGEP( + CGF.CharTy, Result, + {llvm::ConstantInt::get(CGF.PtrDiffTy, -Offset->getQuantity())}); + CGF.Builder.CreateCondBr(Success, CastSuccess, CastFail); + return Result; +} + +llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, Address ThisAddr, - QualType SrcRecordTy, - QualType DestTy) { - llvm::Type *DestLTy = CGF.ConvertType(DestTy); + QualType SrcRecordTy) { auto *ClassDecl = cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); llvm::Value *OffsetToTop; if (CGM.getItaniumVTableContext().isRelativeLayout()) { // Get the vtable pointer. - llvm::Value *VTable = - CGF.GetVTablePtr(ThisAddr, CGM.Int32Ty->getPointerTo(), ClassDecl); + llvm::Value *VTable = CGF.GetVTablePtr( + ThisAddr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), + ClassDecl); // Get the offset-to-top from the vtable. OffsetToTop = @@ -1509,8 +1611,9 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, CGF.ConvertType(CGF.getContext().getPointerDiffType()); // Get the vtable pointer. - llvm::Value *VTable = - CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(), ClassDecl); + llvm::Value *VTable = CGF.GetVTablePtr( + ThisAddr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), + ClassDecl); // Get the offset-to-top from the vtable. OffsetToTop = @@ -1519,10 +1622,8 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, PtrDiffLTy, OffsetToTop, CGF.getPointerAlign(), "offset.to.top"); } // Finally, add the offset to the pointer. - llvm::Value *Value = ThisAddr.getPointer(); - Value = CGF.EmitCastToVoidPtr(Value); - Value = CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, Value, OffsetToTop); - return CGF.Builder.CreateBitCast(Value, DestLTy); + return CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, ThisAddr.getPointer(), + OffsetToTop); } bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) { @@ -1549,14 +1650,10 @@ ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *VBaseOffset; if (CGM.getItaniumVTableContext().isRelativeLayout()) { - VBaseOffsetPtr = - CGF.Builder.CreateBitCast(VBaseOffsetPtr, CGF.Int32Ty->getPointerTo()); VBaseOffset = CGF.Builder.CreateAlignedLoad( CGF.Int32Ty, VBaseOffsetPtr, CharUnits::fromQuantity(4), "vbase.offset"); } else { - VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr, - CGM.PtrDiffTy->getPointerTo()); VBaseOffset = CGF.Builder.CreateAlignedLoad( CGM.PtrDiffTy, VBaseOffsetPtr, CGF.getPointerAlign(), "vbase.offset"); } @@ -1587,12 +1684,14 @@ ItaniumCXXABI::buildStructorSignature(GlobalDecl GD, // All parameters are already in place except VTT, which goes after 'this'. // These are Clang types, so we don't need to worry about sret yet. - // Check if we need to add a VTT parameter (which has type void **). + // Check if we need to add a VTT parameter (which has type global void **). if ((isa<CXXConstructorDecl>(GD.getDecl()) ? GD.getCtorType() == Ctor_Base : GD.getDtorType() == Dtor_Base) && cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) { + LangAS AS = CGM.GetGlobalVarAddressSpace(nullptr); + QualType Q = Context.getAddrSpaceQualType(Context.VoidPtrTy, AS); ArgTys.insert(ArgTys.begin() + 1, - Context.getPointerType(Context.VoidPtrTy)); + Context.getPointerType(CanQualType::CreateUnsafe(Q))); return AddedStructorArgCounts::prefix(1); } return AddedStructorArgCounts{}; @@ -1625,7 +1724,9 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, ASTContext &Context = getContext(); // FIXME: avoid the fake decl - QualType T = Context.getPointerType(Context.VoidPtrTy); + LangAS AS = CGM.GetGlobalVarAddressSpace(nullptr); + QualType Q = Context.getAddrSpaceQualType(Context.VoidPtrTy, AS); + QualType T = Context.getPointerType(Q); auto *VTTDecl = ImplicitParamDecl::Create( Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"), T, ImplicitParamDecl::CXXVTT); @@ -1667,10 +1768,14 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::getImplicitConstructorArgs( if (!NeedsVTTParameter(GlobalDecl(D, Type))) return AddedStructorArgs{}; - // Insert the implicit 'vtt' argument as the second argument. + // Insert the implicit 'vtt' argument as the second argument. Make sure to + // correctly reflect its address space, which can differ from generic on + // some targets. llvm::Value *VTT = CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating); - QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); + LangAS AS = CGM.GetGlobalVarAddressSpace(nullptr); + QualType Q = getContext().getAddrSpaceQualType(getContext().VoidPtrTy, AS); + QualType VTTTy = getContext().getPointerType(Q); return AddedStructorArgs::prefix({{VTT, VTTTy}}); } @@ -1842,11 +1947,11 @@ llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructorWithVTT( /// Load the VTT. llvm::Value *VTT = CGF.LoadCXXVTT(); if (VirtualPointerIndex) - VTT = CGF.Builder.CreateConstInBoundsGEP1_64( - CGF.VoidPtrTy, VTT, VirtualPointerIndex); + VTT = CGF.Builder.CreateConstInBoundsGEP1_64(CGF.GlobalsVoidPtrTy, VTT, + VirtualPointerIndex); // And load the address point from the VTT. - return CGF.Builder.CreateAlignedLoad(CGF.VoidPtrTy, VTT, + return CGF.Builder.CreateAlignedLoad(CGF.GlobalsVoidPtrTy, VTT, CGF.getPointerAlign()); } @@ -1874,12 +1979,13 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, CGM.getItaniumVTableContext().getVTableLayout(RD); llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout); - // Use pointer alignment for the vtable. Otherwise we would align them based - // on the size of the initializer which doesn't make sense as only single - // values are read. + // Use pointer to global alignment for the vtable. Otherwise we would align + // them based on the size of the initializer which doesn't make sense as only + // single values are read. + LangAS AS = CGM.GetGlobalVarAddressSpace(nullptr); unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout() ? 32 - : CGM.getTarget().getPointerAlign(LangAS::Default); + : CGM.getTarget().getPointerAlign(AS); VTable = CGM.CreateOrReplaceCXXRuntimeVariable( Name, VTableType, llvm::GlobalValue::ExternalLinkage, @@ -1914,16 +2020,15 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, Address This, llvm::Type *Ty, SourceLocation Loc) { - llvm::Type *TyPtr = Ty->getPointerTo(); + llvm::Type *PtrTy = CGM.GlobalsInt8PtrTy; auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl()); - llvm::Value *VTable = CGF.GetVTablePtr( - This, TyPtr->getPointerTo(), MethodDecl->getParent()); + llvm::Value *VTable = CGF.GetVTablePtr(This, PtrTy, MethodDecl->getParent()); uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD); llvm::Value *VFunc; if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) { VFunc = CGF.EmitVTableTypeCheckedLoad( - MethodDecl->getParent(), VTable, TyPtr, + MethodDecl->getParent(), VTable, PtrTy, VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(LangAS::Default) / 8); @@ -1932,19 +2037,14 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, llvm::Value *VFuncLoad; if (CGM.getItaniumVTableContext().isRelativeLayout()) { - VTable = CGF.Builder.CreateBitCast(VTable, CGM.Int8PtrTy); - llvm::Value *Load = CGF.Builder.CreateCall( + VFuncLoad = CGF.Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}), {VTable, llvm::ConstantInt::get(CGM.Int32Ty, 4 * VTableIndex)}); - VFuncLoad = CGF.Builder.CreateBitCast(Load, TyPtr); } else { - VTable = - CGF.Builder.CreateBitCast(VTable, TyPtr->getPointerTo()); llvm::Value *VTableSlotPtr = CGF.Builder.CreateConstInBoundsGEP1_64( - TyPtr, VTable, VTableIndex, "vfn"); - VFuncLoad = - CGF.Builder.CreateAlignedLoad(TyPtr, VTableSlotPtr, - CGF.getPointerAlign()); + PtrTy, VTable, VTableIndex, "vfn"); + VFuncLoad = CGF.Builder.CreateAlignedLoad(PtrTy, VTableSlotPtr, + CGF.getPointerAlign()); } // Add !invariant.load md to virtual function load to indicate that @@ -2067,7 +2167,7 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, if (!NonVirtualAdjustment && !VirtualAdjustment) return InitialPtr.getPointer(); - Address V = CGF.Builder.CreateElementBitCast(InitialPtr, CGF.Int8Ty); + Address V = InitialPtr.withElementType(CGF.Int8Ty); // In a base-to-derived cast, the non-virtual adjustment is applied first. if (NonVirtualAdjustment && !IsReturnAdjustment) { @@ -2078,7 +2178,7 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, // Perform the virtual adjustment if we have one. llvm::Value *ResultPtr; if (VirtualAdjustment) { - Address VTablePtrPtr = CGF.Builder.CreateElementBitCast(V, CGF.Int8PtrTy); + Address VTablePtrPtr = V.withElementType(CGF.Int8PtrTy); llvm::Value *VTablePtr = CGF.Builder.CreateLoad(VTablePtrPtr); llvm::Value *Offset; @@ -2086,8 +2186,6 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, CGF.Int8Ty, VTablePtr, VirtualAdjustment); if (CGF.CGM.getItaniumVTableContext().isRelativeLayout()) { // Load the adjustment offset from the vtable as a 32-bit int. - OffsetPtr = - CGF.Builder.CreateBitCast(OffsetPtr, CGF.Int32Ty->getPointerTo()); Offset = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, OffsetPtr, CharUnits::fromQuantity(4)); @@ -2095,9 +2193,6 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, llvm::Type *PtrDiffTy = CGF.ConvertType(CGF.getContext().getPointerDiffType()); - OffsetPtr = - CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo()); - // Load the adjustment offset from the vtable. Offset = CGF.Builder.CreateAlignedLoad(PtrDiffTy, OffsetPtr, CGF.getPointerAlign()); @@ -2180,8 +2275,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CookiePtr = CGF.Builder.CreateConstInBoundsByteGEP(CookiePtr, CookieOffset); // Write the number of elements into the appropriate slot. - Address NumElementsPtr = - CGF.Builder.CreateElementBitCast(CookiePtr, CGF.SizeTy); + Address NumElementsPtr = CookiePtr.withElementType(CGF.SizeTy); llvm::Instruction *SI = CGF.Builder.CreateStore(NumElements, NumElementsPtr); // Handle the array cookie specially in ASan. @@ -2189,7 +2283,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, (expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() || CGM.getCodeGenOpts().SanitizeAddressPoisonCustomArrayCookie)) { // The store to the CookiePtr does not need to be instrumented. - CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI); + SI->setNoSanitizeMetadata(); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, NumElementsPtr.getType(), false); llvm::FunctionCallee F = @@ -2213,7 +2307,7 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, CGF.Builder.CreateConstInBoundsByteGEP(numElementsPtr, numElementsOffset); unsigned AS = allocPtr.getAddressSpace(); - numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy); + numElementsPtr = numElementsPtr.withElementType(CGF.SizeTy); if (!CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) || AS != 0) return CGF.Builder.CreateLoad(numElementsPtr); // In asan mode emit a function call instead of a regular load and let the @@ -2221,8 +2315,8 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, // cookie, otherwise return 0 to avoid an infinite loop calling DTORs. // We can't simply ignore this load using nosanitize metadata because // the metadata may be lost. - llvm::FunctionType *FTy = - llvm::FunctionType::get(CGF.SizeTy, CGF.SizeTy->getPointerTo(0), false); + llvm::FunctionType *FTy = llvm::FunctionType::get( + CGF.SizeTy, llvm::PointerType::getUnqual(CGF.getLLVMContext()), false); llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie"); return CGF.Builder.CreateCall(F, numElementsPtr.getPointer()); @@ -2252,7 +2346,7 @@ Address ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, Address cookie = newPtr; // The first element is the element size. - cookie = CGF.Builder.CreateElementBitCast(cookie, CGF.SizeTy); + cookie = cookie.withElementType(CGF.SizeTy); llvm::Value *elementSize = llvm::ConstantInt::get(CGF.SizeTy, getContext().getTypeSizeInChars(elementType).getQuantity()); CGF.Builder.CreateStore(elementSize, cookie); @@ -2275,7 +2369,7 @@ llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, Address numElementsPtr = CGF.Builder.CreateConstInBoundsByteGEP(allocPtr, CGF.getSizeSize()); - numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy); + numElementsPtr = numElementsPtr.withElementType(CGF.SizeTy); return CGF.Builder.CreateLoad(numElementsPtr); } @@ -2372,7 +2466,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, CharUnits::fromQuantity(CGM.getDataLayout().getABITypeAlign(guardTy)); } } - llvm::PointerType *guardPtrTy = guardTy->getPointerTo( + llvm::PointerType *guardPtrTy = llvm::PointerType::get( + CGF.CGM.getLLVMContext(), CGF.CGM.getDataLayout().getDefaultGlobalsAddressSpace()); // Create the guard variable if we don't already have it (as we @@ -2443,7 +2538,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, if (!threadsafe || MaxInlineWidthInBits) { // Load the first byte of the guard variable. llvm::LoadInst *LI = - Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); + Builder.CreateLoad(guardAddr.withElementType(CGM.Int8Ty)); // Itanium ABI: // An implementation supporting thread-safety on multiprocessor @@ -2524,7 +2619,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // variable before the object initialization begins so that references // to the variable during initialization don't restart initialization. Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1), - Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); + guardAddr.withElementType(CGM.Int8Ty)); } // Emit the initializer and add a global destructor if appropriate. @@ -2542,7 +2637,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // after the object initialization completes so that initialization is // retried if initialization is interrupted by an exception. Builder.CreateStore(llvm::ConstantInt::get(CGM.Int8Ty, 1), - Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty)); + guardAddr.withElementType(CGM.Int8Ty)); } CGF.EmitBlock(EndBlock); @@ -2563,15 +2658,13 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, } // We're assuming that the destructor function is something we can - // reasonably call with the default CC. Go ahead and cast it to the - // right prototype. - llvm::Type *dtorTy = - llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo(); + // reasonably call with the default CC. + llvm::Type *dtorTy = llvm::PointerType::getUnqual(CGF.getLLVMContext()); // Preserve address space of addr. auto AddrAS = addr ? addr->getType()->getPointerAddressSpace() : 0; - auto AddrInt8PtrTy = - AddrAS ? CGF.Int8Ty->getPointerTo(AddrAS) : CGF.Int8PtrTy; + auto AddrPtrTy = AddrAS ? llvm::PointerType::get(CGF.getLLVMContext(), AddrAS) + : CGF.Int8PtrTy; // Create a variable that binds the atexit to this shared object. llvm::Constant *handle = @@ -2580,7 +2673,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, GV->setVisibility(llvm::GlobalValue::HiddenVisibility); // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); - llvm::Type *paramTys[] = {dtorTy, AddrInt8PtrTy, handle->getType()}; + llvm::Type *paramTys[] = {dtorTy, AddrPtrTy, handle->getType()}; llvm::FunctionType *atexitTy = llvm::FunctionType::get(CGF.IntTy, paramTys, false); @@ -2596,10 +2689,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // function. addr = llvm::Constant::getNullValue(CGF.Int8PtrTy); - llvm::Value *args[] = {llvm::ConstantExpr::getBitCast( - cast<llvm::Constant>(dtor.getCallee()), dtorTy), - llvm::ConstantExpr::getBitCast(addr, AddrInt8PtrTy), - handle}; + llvm::Value *args[] = {dtor.getCallee(), addr, handle}; CGF.EmitNounwindRuntimeCall(atexit, args); } @@ -2631,7 +2721,6 @@ void CodeGenModule::unregisterGlobalDtorsWithUnAtExit() { // Get the destructor function type, void(*)(void). llvm::FunctionType *dtorFuncTy = llvm::FunctionType::get(CGF.VoidTy, false); - llvm::Type *dtorTy = dtorFuncTy->getPointerTo(); // Destructor functions are run/unregistered in non-ascending // order of their priorities. @@ -2641,10 +2730,8 @@ void CodeGenModule::unregisterGlobalDtorsWithUnAtExit() { llvm::Function *Dtor = *itv; // We're assuming that the destructor function is something we can - // reasonably call with the correct CC. Go ahead and cast it to the - // right prototype. - llvm::Constant *dtor = llvm::ConstantExpr::getBitCast(Dtor, dtorTy); - llvm::Value *V = CGF.unregisterGlobalDtorWithUnAtExit(dtor); + // reasonably call with the correct CC. + llvm::Value *V = CGF.unregisterGlobalDtorWithUnAtExit(Dtor); llvm::Value *NeedsDestruct = CGF.Builder.CreateIsNull(V, "needs_destruct"); @@ -2659,7 +2746,7 @@ void CodeGenModule::unregisterGlobalDtorsWithUnAtExit() { CGF.EmitBlock(DestructCallBlock); // Emit the call to casted Dtor. - llvm::CallInst *CI = CGF.Builder.CreateCall(dtorFuncTy, dtor); + llvm::CallInst *CI = CGF.Builder.CreateCall(dtorFuncTy, Dtor); // Make sure the call and the callee agree on calling convention. CI->setCallingConv(Dtor->getCallingConv()); @@ -2699,15 +2786,9 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { if (getCodeGenOpts().CXAAtExit) { emitGlobalDtorWithCXAAtExit(CGF, Dtor, nullptr, false); } else { - // Get the destructor function type, void(*)(void). - llvm::Type *dtorTy = - llvm::FunctionType::get(CGF.VoidTy, false)->getPointerTo(); - // We're assuming that the destructor function is something we can - // reasonably call with the correct CC. Go ahead and cast it to the - // right prototype. - CGF.registerGlobalDtorWithAtExit( - llvm::ConstantExpr::getBitCast(Dtor, dtorTy)); + // reasonably call with the correct CC. + CGF.registerGlobalDtorWithAtExit(Dtor); } } @@ -3203,10 +3284,9 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { // Note for the future: If we would ever like to do deferred emission of // RTTI, check if emitting vtables opportunistically need any adjustment. - GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, - /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, nullptr, - Name); + GV = new llvm::GlobalVariable( + CGM.getModule(), CGM.GlobalsInt8PtrTy, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, Name); const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); CGM.setGVProperties(GV, RD); // Import the typeinfo symbol when all non-inline virtual methods are @@ -3219,7 +3299,7 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { } } - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return GV; } /// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type @@ -3290,6 +3370,8 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" +#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/WebAssemblyReferenceTypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: @@ -3595,7 +3677,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { if (CGM.getItaniumVTableContext().isRelativeLayout()) VTable = CGM.getModule().getNamedAlias(VTableName); if (!VTable) - VTable = CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy); + VTable = + CGM.getModule().getOrInsertGlobal(VTableName, CGM.GlobalsInt8PtrTy); CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts())); @@ -3607,15 +3690,13 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { // The vtable address point is 8 bytes after its start: // 4 for the offset to top + 4 for the relative offset to rtti. llvm::Constant *Eight = llvm::ConstantInt::get(CGM.Int32Ty, 8); - VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy); VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8Ty, VTable, Eight); } else { llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2); - VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, - Two); + VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.GlobalsInt8PtrTy, + VTable, Two); } - VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy); Fields.push_back(VTable); } @@ -3643,7 +3724,6 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, return llvm::GlobalValue::InternalLinkage; case VisibleNoLinkage: - case ModuleInternalLinkage: case ModuleLinkage: case ExternalLinkage: // RTTI is not enabled, which means that this type info struct is going @@ -3688,7 +3768,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty) { assert(!OldGV->hasAvailableExternallyLinkage() && "available_externally typeinfos not yet implemented"); - return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy); + return OldGV; } // Check if there is already an external RTTI descriptor for this type. @@ -3748,9 +3828,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( llvm::ConstantInt::get(CGM.Int64Ty, ((uint64_t)1) << 63); TypeNameField = llvm::ConstantExpr::getAdd(TypeNameField, flag); TypeNameField = - llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy); + llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.GlobalsInt8PtrTy); } else { - TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy); + TypeNameField = TypeName; } Fields.push_back(TypeNameField); @@ -3880,7 +3960,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( GV->setComdat(M.getOrInsertComdat(GV->getName())); CharUnits Align = CGM.getContext().toCharUnitsFromBits( - CGM.getTarget().getPointerAlign(LangAS::Default)); + CGM.getTarget().getPointerAlign(CGM.GetGlobalVarAddressSpace(nullptr))); GV->setAlignment(Align.getAsAlign()); // The Itanium ABI specifies that type_info objects must be globally @@ -3912,7 +3992,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( TypeName->setPartition(CGM.getCodeGenOpts().SymbolPartition); GV->setPartition(CGM.getCodeGenOpts().SymbolPartition); - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return GV; } /// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info @@ -4558,10 +4638,7 @@ static void InitCatchParam(CodeGenFunction &CGF, // Otherwise, it returns a pointer into the exception object. - llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok - llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy); - - LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType); + LValue srcLV = CGF.MakeNaturalAlignAddrLValue(AdjustedExn, CatchType); LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType); switch (TEK) { case TEK_Complex: @@ -4583,7 +4660,8 @@ static void InitCatchParam(CodeGenFunction &CGF, auto catchRD = CatchType->getAsCXXRecordDecl(); CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD); - llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok + llvm::Type *PtrTy = + llvm::PointerType::getUnqual(CGF.getLLVMContext()); // addrspace 0 ok // Check for a copy expression. If we don't have a copy expression, // that means a trivial copy is okay. @@ -4688,6 +4766,7 @@ static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) { cast<llvm::Function>(fnRef.getCallee()->stripPointerCasts()); if (fn->empty()) { CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, fn, /*IsThunk=*/false); + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, fn); fn->setDoesNotThrow(); fn->setDoesNotReturn(); @@ -4770,14 +4849,12 @@ void XLCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::FunctionCallee Dtor, llvm::Constant *Addr) { if (D.getTLSKind() != VarDecl::TLS_None) { - // atexit routine expects "int(*)(int,...)" - llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.IntTy, CGM.IntTy, true); - llvm::PointerType *FpTy = FTy->getPointerTo(); + llvm::PointerType *PtrTy = + llvm::PointerType::getUnqual(CGF.getLLVMContext()); // extern "C" int __pt_atexit_np(int flags, int(*)(int,...), ...); llvm::FunctionType *AtExitTy = - llvm::FunctionType::get(CGM.IntTy, {CGM.IntTy, FpTy}, true); + llvm::FunctionType::get(CGM.IntTy, {CGM.IntTy, PtrTy}, true); // Fetch the actual function. llvm::FunctionCallee AtExit = diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index ae785cce09f9..a692abaf3b75 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "ABIInfo.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGVTables.h" @@ -152,14 +153,25 @@ public: bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr, QualType SrcRecordTy) override; - llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value, + bool shouldEmitExactDynamicCast(QualType DestRecordTy) override { + // TODO: Add support for exact dynamic_casts. + return false; + } + llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address Value, + QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy, + llvm::BasicBlock *CastSuccess, + llvm::BasicBlock *CastFail) override { + llvm_unreachable("unsupported"); + } + + llvm::Value *emitDynamicCastCall(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) override; - llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, - QualType SrcRecordTy, - QualType DestTy) override; + llvm::Value *emitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, + QualType SrcRecordTy) override; bool EmitBadCastCall(CodeGenFunction &CGF) override; bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override { @@ -235,11 +247,24 @@ public: void EmitCXXDestructors(const CXXDestructorDecl *D) override; - const CXXRecordDecl * - getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override { - if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) { + const CXXRecordDecl *getThisArgumentTypeForMethod(GlobalDecl GD) override { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); + + if (MD->isVirtual()) { + GlobalDecl LookupGD = GD; + if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) { + // Complete dtors take a pointer to the complete object, + // thus don't need adjustment. + if (GD.getDtorType() == Dtor_Complete) + return MD->getParent(); + + // There's only Dtor_Deleting in vftable but it shares the this + // adjustment with the base one, so look up the deleting one instead. + LookupGD = GlobalDecl(DD, Dtor_Deleting); + } MethodVFTableLocation ML = - CGM.getMicrosoftVTableContext().getMethodVFTableLocation(MD); + CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); + // The vbases might be ordered differently in the final overrider object // and the complete object, so the "this" argument may sometimes point to // memory that has no particular type (e.g. past the complete object). @@ -923,7 +948,7 @@ void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF, std::tuple<Address, llvm::Value *, const CXXRecordDecl *> MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy) { - Value = CGF.Builder.CreateElementBitCast(Value, CGF.Int8Ty); + Value = Value.withElementType(CGF.Int8Ty); const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl(); const ASTContext &Context = getContext(); @@ -997,11 +1022,9 @@ bool MicrosoftCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr, !getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr(); } -llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall( - CodeGenFunction &CGF, Address This, QualType SrcRecordTy, - QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) { - llvm::Type *DestLTy = CGF.ConvertType(DestTy); - +llvm::Value *MicrosoftCXXABI::emitDynamicCastCall( + CodeGenFunction &CGF, Address This, QualType SrcRecordTy, QualType DestTy, + QualType DestRecordTy, llvm::BasicBlock *CastEnd) { llvm::Value *SrcRTTI = CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType()); llvm::Value *DestRTTI = @@ -1027,14 +1050,12 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall( llvm::Value *Args[] = { ThisPtr, Offset, SrcRTTI, DestRTTI, llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())}; - ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args); - return CGF.Builder.CreateBitCast(ThisPtr, DestLTy); + return CGF.EmitRuntimeCallOrInvoke(Function, Args); } -llvm::Value * -MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, - QualType SrcRecordTy, - QualType DestTy) { +llvm::Value *MicrosoftCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF, + Address Value, + QualType SrcRecordTy) { std::tie(Value, std::ignore, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy); @@ -1086,7 +1107,19 @@ bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { return isDeletingDtor(GD); } -static bool isTrivialForMSVC(const CXXRecordDecl *RD) { +static bool isTrivialForMSVC(const CXXRecordDecl *RD, QualType Ty, + CodeGenModule &CGM) { + // On AArch64, HVAs that can be passed in registers can also be returned + // in registers. (Note this is using the MSVC definition of an HVA; see + // isPermittedToBeHomogeneousAggregate().) + const Type *Base = nullptr; + uint64_t NumElts = 0; + if (CGM.getTarget().getTriple().isAArch64() && + CGM.getTypes().getABIInfo().isHomogeneousAggregate(Ty, Base, NumElts) && + isa<VectorType>(Base)) { + return true; + } + // We use the C++14 definition of an aggregate, so we also // check for: // No private or protected non static data members. @@ -1115,7 +1148,8 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - bool isTrivialForABI = RD->canPassInRegisters() && isTrivialForMSVC(RD); + bool isTrivialForABI = RD->canPassInRegisters() && + isTrivialForMSVC(RD, FI.getReturnType(), CGM); // MSVC always returns structs indirectly from C++ instance methods. bool isIndirectReturn = !isTrivialForABI || FI.isInstanceMethod(); @@ -1266,7 +1300,7 @@ void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD) { Address This = getThisAddress(CGF); - This = CGF.Builder.CreateElementBitCast(This, CGM.Int8Ty, "this.int8"); + This = This.withElementType(CGM.Int8Ty); const ASTContext &Context = getContext(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); @@ -1283,8 +1317,7 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, Address VBPtr = CGF.Builder.CreateConstInBoundsByteGEP(This, Offs); llvm::Value *GVPtr = CGF.Builder.CreateConstInBoundsGEP2_32(GV->getValueType(), GV, 0, 0); - VBPtr = CGF.Builder.CreateElementBitCast(VBPtr, GVPtr->getType(), - "vbptr." + VBT->ObjectWithVPtr->getName()); + VBPtr = VBPtr.withElementType(GVPtr->getType()); CGF.Builder.CreateStore(GVPtr, VBPtr); } } @@ -1426,7 +1459,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( if (Adjustment.isZero()) return This; - This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty); + This = This.withElementType(CGF.Int8Ty); assert(Adjustment.isPositive()); return CGF.Builder.CreateConstByteGEP(This, Adjustment); } @@ -1457,7 +1490,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( Address Result = This; if (ML.VBase) { - Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty); + Result = Result.withElementType(CGF.Int8Ty); const CXXRecordDecl *Derived = MD->getParent(); const CXXRecordDecl *VBase = ML.VBase; @@ -1471,7 +1504,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( } if (!StaticOffset.isZero()) { assert(StaticOffset.isPositive()); - Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty); + Result = Result.withElementType(CGF.Int8Ty); if (ML.VBase) { // Non-virtual adjustment might result in a pointer outside the allocated // object, e.g. if the final overrider class is laid out after the virtual @@ -1556,11 +1589,8 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { // 1) getThisValue is currently protected // 2) in theory, an ABI could implement 'this' returns some other way; // HasThisReturn only specifies a contract, not the implementation - if (HasThisReturn(CGF.CurGD)) + if (HasThisReturn(CGF.CurGD) || hasMostDerivedReturn(CGF.CurGD)) CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue); - else if (hasMostDerivedReturn(CGF.CurGD)) - CGF.Builder.CreateStore(CGF.EmitCastToVoidPtr(getThisValue(CGF)), - CGF.ReturnValue); if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) { assert(getStructorImplicitParamDecl(CGF) && @@ -2191,7 +2221,7 @@ llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF, if (TA.isEmpty()) return This.getPointer(); - This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty); + This = This.withElementType(CGF.Int8Ty); llvm::Value *V; if (TA.Virtual.isEmpty()) { @@ -2202,7 +2232,7 @@ llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF, Address VtorDispPtr = CGF.Builder.CreateConstInBoundsByteGEP(This, CharUnits::fromQuantity(TA.Virtual.Microsoft.VtordispOffset)); - VtorDispPtr = CGF.Builder.CreateElementBitCast(VtorDispPtr, CGF.Int32Ty); + VtorDispPtr = VtorDispPtr.withElementType(CGF.Int32Ty); llvm::Value *VtorDisp = CGF.Builder.CreateLoad(VtorDispPtr, "vtordisp"); V = CGF.Builder.CreateGEP(This.getElementType(), This.getPointer(), CGF.Builder.CreateNeg(VtorDisp)); @@ -2244,7 +2274,7 @@ MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret, return Ret.getPointer(); auto OrigTy = Ret.getType(); - Ret = CGF.Builder.CreateElementBitCast(Ret, CGF.Int8Ty); + Ret = Ret.withElementType(CGF.Int8Ty); llvm::Value *V = Ret.getPointer(); if (RA.Virtual.Microsoft.VBIndex) { @@ -2288,8 +2318,7 @@ CharUnits MicrosoftCXXABI::getArrayCookieSizeImpl(QualType type) { llvm::Value *MicrosoftCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, Address allocPtr, CharUnits cookieSize) { - Address numElementsPtr = - CGF.Builder.CreateElementBitCast(allocPtr, CGF.SizeTy); + Address numElementsPtr = allocPtr.withElementType(CGF.SizeTy); return CGF.Builder.CreateLoad(numElementsPtr); } @@ -2307,8 +2336,7 @@ Address MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, Address cookiePtr = newPtr; // Write the number of elements into the appropriate slot. - Address numElementsPtr - = CGF.Builder.CreateElementBitCast(cookiePtr, CGF.SizeTy); + Address numElementsPtr = cookiePtr.withElementType(CGF.SizeTy); CGF.Builder.CreateStore(numElements, numElementsPtr); // Finally, compute a pointer to the actual data buffer by skipping @@ -3108,12 +3136,10 @@ MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF, llvm::Value **VBPtrOut) { CGBuilderTy &Builder = CGF.Builder; // Load the vbtable pointer from the vbptr in the instance. - This = Builder.CreateElementBitCast(This, CGM.Int8Ty); - llvm::Value *VBPtr = Builder.CreateInBoundsGEP( - This.getElementType(), This.getPointer(), VBPtrOffset, "vbptr"); - if (VBPtrOut) *VBPtrOut = VBPtr; - VBPtr = Builder.CreateBitCast(VBPtr, - CGM.Int32Ty->getPointerTo(0)->getPointerTo(This.getAddressSpace())); + llvm::Value *VBPtr = Builder.CreateInBoundsGEP(CGM.Int8Ty, This.getPointer(), + VBPtrOffset, "vbptr"); + if (VBPtrOut) + *VBPtrOut = VBPtr; CharUnits VBPtrAlign; if (auto CI = dyn_cast<llvm::ConstantInt>(VBPtrOffset)) { @@ -3134,7 +3160,6 @@ MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF, // Load an i32 offset from the vb-table. llvm::Value *VBaseOffs = Builder.CreateInBoundsGEP(CGM.Int32Ty, VBTable, VBTableIndex); - VBaseOffs = Builder.CreateBitCast(VBaseOffs, CGM.Int32Ty->getPointerTo(0)); return Builder.CreateAlignedLoad(CGM.Int32Ty, VBaseOffs, CharUnits::fromQuantity(4), "vbase_offs"); } @@ -3145,7 +3170,7 @@ llvm::Value *MicrosoftCXXABI::AdjustVirtualBase( CodeGenFunction &CGF, const Expr *E, const CXXRecordDecl *RD, Address Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) { CGBuilderTy &Builder = CGF.Builder; - Base = Builder.CreateElementBitCast(Base, CGM.Int8Ty); + Base = Base.withElementType(CGM.Int8Ty); llvm::BasicBlock *OriginalBB = nullptr; llvm::BasicBlock *SkipAdjustBB = nullptr; llvm::BasicBlock *VBaseAdjustBB = nullptr; @@ -3655,7 +3680,6 @@ static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) { return llvm::GlobalValue::InternalLinkage; case VisibleNoLinkage: - case ModuleInternalLinkage: case ModuleLinkage: case ExternalLinkage: return llvm::GlobalValue::LinkOnceODRLinkage; @@ -3745,7 +3769,7 @@ llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() { Classes.front().initialize(/*Parent=*/nullptr, /*Specifier=*/nullptr); detectAmbiguousBases(Classes); int Flags = 0; - for (auto Class : Classes) { + for (const MSRTTIClass &Class : Classes) { if (Class.RD->getNumBases() > 1) Flags |= HasBranchingHierarchy; // Note: cl.exe does not calculate "HasAmbiguousBases" correctly. We diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp index e3e953c34c59..3594f4c66e67 100644 --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -36,7 +36,7 @@ namespace { IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; // Only used for debug info. const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info. const PreprocessorOptions &PreprocessorOpts; // Only used for debug info. - const CodeGenOptions CodeGenOpts; // Intentionally copied in. + const CodeGenOptions &CodeGenOpts; unsigned HandlingTopLevelDecls; diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 677b66d3e1dc..114a9c1e2eac 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -158,7 +158,7 @@ public: // When building a module MainFileName is the name of the modulemap file. CodeGenOpts.MainFileName = LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; - CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); + CodeGenOpts.setDebugInfo(llvm::codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); CodeGenOpts.DebugPrefixMap = CI.getInvocation().getCodeGenOpts().DebugPrefixMap; @@ -320,7 +320,7 @@ public: clang::EmitBackendOutput( Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayoutString(), M.get(), - BackendAction::Backend_EmitLL, + BackendAction::Backend_EmitLL, FS, std::make_unique<llvm::raw_svector_ostream>(Buffer)); llvm::dbgs() << Buffer; }); @@ -329,7 +329,7 @@ public: clang::EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayoutString(), M.get(), - BackendAction::Backend_EmitObj, std::move(OS)); + BackendAction::Backend_EmitObj, FS, std::move(OS)); // Free the memory for the temporary buffer. llvm::SmallVector<char, 0> Empty; @@ -349,6 +349,11 @@ ObjectFilePCHContainerWriter::CreatePCHContainerGenerator( CI, MainFileName, OutputFileName, std::move(OS), Buffer); } +ArrayRef<StringRef> ObjectFilePCHContainerReader::getFormats() const { + static StringRef Formats[] = {"obj", "raw"}; + return Formats; +} + StringRef ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const { StringRef PCH; diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index 554f1ea2a47d..53161c316c58 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -101,8 +101,3 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D, void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { reportGlobal(GV, SourceLocation(), "", QualType(), SanitizerKind::All); } - -void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) { - I->setMetadata(llvm::LLVMContext::MD_nosanitize, - llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt)); -} diff --git a/clang/lib/CodeGen/SanitizerMetadata.h b/clang/lib/CodeGen/SanitizerMetadata.h index f5dd0e503cc0..000f02cf8dcf 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.h +++ b/clang/lib/CodeGen/SanitizerMetadata.h @@ -44,7 +44,6 @@ public: SanitizerMask NoSanitizeAttrMask = {}, bool IsDynInit = false); void disableSanitizerForGlobal(llvm::GlobalVariable *GV); - void disableSanitizerForInstruction(llvm::Instruction *I); }; } // end namespace CodeGen } // end namespace clang diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp index 63d975193c02..055dd3704386 100644 --- a/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -728,7 +728,7 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize, // The largest size that we're still considering making subvectors of. // Always a power of 2. - unsigned logCandidateNumElts = llvm::findLastSet(numElts, llvm::ZB_Undefined); + unsigned logCandidateNumElts = llvm::Log2_32(numElts); unsigned candidateNumElts = 1U << logCandidateNumElts; assert(candidateNumElts <= numElts && candidateNumElts * 2 > numElts); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index be1dbe8480c6..3d79f92137ab 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -13,243 +13,18 @@ #include "TargetInfo.h" #include "ABIInfo.h" -#include "CGBlocks.h" -#include "CGCXXABI.h" -#include "CGValue.h" +#include "ABIInfoImpl.h" #include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/RecordLayout.h" -#include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" -#include "clang/Basic/DiagnosticFrontend.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IntrinsicsNVPTX.h" -#include "llvm/IR/IntrinsicsS390.h" #include "llvm/IR/Type.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> using namespace clang; using namespace CodeGen; -// Helper for coercing an aggregate argument or return value into an integer -// array of the same size (including padding) and alignment. This alternate -// coercion happens only for the RenderScript ABI and can be removed after -// runtimes that rely on it are no longer supported. -// -// RenderScript assumes that the size of the argument / return value in the IR -// is the same as the size of the corresponding qualified type. This helper -// coerces the aggregate type into an array of the same size (including -// padding). This coercion is used in lieu of expansion of struct members or -// other canonical coercions that return a coerced-type of larger size. -// -// Ty - The argument / return value type -// Context - The associated ASTContext -// LLVMContext - The associated LLVMContext -static ABIArgInfo coerceToIntArray(QualType Ty, - ASTContext &Context, - llvm::LLVMContext &LLVMContext) { - // Alignment and Size are measured in bits. - const uint64_t Size = Context.getTypeSize(Ty); - const uint64_t Alignment = Context.getTypeAlign(Ty); - llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment); - const uint64_t NumElements = (Size + Alignment - 1) / Alignment; - return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); -} - -static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, - llvm::Value *Array, - llvm::Value *Value, - unsigned FirstIndex, - unsigned LastIndex) { - // Alternatively, we could emit this as a loop in the source. - for (unsigned I = FirstIndex; I <= LastIndex; ++I) { - llvm::Value *Cell = - Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I); - Builder.CreateAlignedStore(Value, Cell, CharUnits::One()); - } -} - -static bool isAggregateTypeForABI(QualType T) { - return !CodeGenFunction::hasScalarEvaluationKind(T) || - T->isMemberFunctionPointerType(); -} - -ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal, - bool Realign, - llvm::Type *Padding) const { - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal, - Realign, Padding); -} - -ABIArgInfo -ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const { - return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty), - /*ByVal*/ false, Realign); -} - -Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - return Address::invalid(); -} - -static llvm::Type *getVAListElementType(CodeGenFunction &CGF) { - return CGF.ConvertTypeForMem( - CGF.getContext().getBuiltinVaListType()->getPointeeType()); -} - -bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { - if (getContext().isPromotableIntegerType(Ty)) - return true; - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy)) - return true; - - return false; -} - -ABIInfo::~ABIInfo() = default; - -SwiftABIInfo::~SwiftABIInfo() = default; - -/// Does the given lowering require more than the given number of -/// registers when expanded? -/// -/// This is intended to be the basis of a reasonable basic implementation -/// of should{Pass,Return}IndirectlyForSwift. -/// -/// For most targets, a limit of four total registers is reasonable; this -/// limits the amount of code required in order to move around the value -/// in case it wasn't produced immediately prior to the call by the caller -/// (or wasn't produced in exactly the right registers) or isn't used -/// immediately within the callee. But some targets may need to further -/// limit the register count due to an inability to support that many -/// return registers. -static bool occupiesMoreThan(CodeGenTypes &cgt, - ArrayRef<llvm::Type*> scalarTypes, - unsigned maxAllRegisters) { - unsigned intCount = 0, fpCount = 0; - for (llvm::Type *type : scalarTypes) { - if (type->isPointerTy()) { - intCount++; - } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) { - auto ptrWidth = cgt.getTarget().getPointerWidth(LangAS::Default); - intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; - } else { - assert(type->isVectorTy() || type->isFloatingPointTy()); - fpCount++; - } - } - - return (intCount + fpCount > maxAllRegisters); -} - -bool SwiftABIInfo::shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, - bool AsReturnValue) const { - return occupiesMoreThan(CGT, ComponentTys, /*total=*/4); -} - -bool SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, - unsigned NumElts) const { - // The default implementation of this assumes that the target guarantees - // 128-bit SIMD support but nothing more. - return (VectorSize.getQuantity() > 8 && VectorSize.getQuantity() <= 16); -} - -static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, - CGCXXABI &CXXABI) { - const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()); - if (!RD) { - if (!RT->getDecl()->canPassInRegisters()) - return CGCXXABI::RAA_Indirect; - return CGCXXABI::RAA_Default; - } - return CXXABI.getRecordArgABI(RD); -} - -static CGCXXABI::RecordArgABI getRecordArgABI(QualType T, - CGCXXABI &CXXABI) { - const RecordType *RT = T->getAs<RecordType>(); - if (!RT) - return CGCXXABI::RAA_Default; - return getRecordArgABI(RT, CXXABI); -} - -static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, - const ABIInfo &Info) { - QualType Ty = FI.getReturnType(); - - if (const auto *RT = Ty->getAs<RecordType>()) - if (!isa<CXXRecordDecl>(RT->getDecl()) && - !RT->getDecl()->canPassInRegisters()) { - FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty); - return true; - } - - return CXXABI.classifyReturnType(FI); -} - -/// Pass transparent unions as if they were the type of the first element. Sema -/// should ensure that all elements of the union have the same "machine type". -static QualType useFirstFieldIfTransparentUnion(QualType Ty) { - if (const RecordType *UT = Ty->getAsUnionType()) { - const RecordDecl *UD = UT->getDecl(); - if (UD->hasAttr<TransparentUnionAttr>()) { - assert(!UD->field_empty() && "sema created an empty transparent union"); - return UD->field_begin()->getType(); - } - } - return Ty; -} - -CGCXXABI &ABIInfo::getCXXABI() const { - return CGT.getCXXABI(); -} - -ASTContext &ABIInfo::getContext() const { - return CGT.getContext(); -} - -llvm::LLVMContext &ABIInfo::getVMContext() const { - return CGT.getLLVMContext(); -} - -const llvm::DataLayout &ABIInfo::getDataLayout() const { - return CGT.getDataLayout(); -} - -const TargetInfo &ABIInfo::getTarget() const { - return CGT.getTarget(); -} - -const CodeGenOptions &ABIInfo::getCodeGenOpts() const { - return CGT.getCodeGenOpts(); -} - -bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } - -bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { - return false; -} - -bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, - uint64_t Members) const { - return false; -} - -bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { - // For compatibility with GCC, ignore empty bitfields in C++ mode. - return getContext().getLangOpts().CPlusPlus; -} - LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; @@ -291,171 +66,6 @@ LLVM_DUMP_METHOD void ABIArgInfo::dump() const { OS << ")\n"; } -// Dynamically round a pointer up to a multiple of the given alignment. -static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, - llvm::Value *Ptr, - CharUnits Align) { - llvm::Value *PtrAsInt = Ptr; - // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; - PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); - PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, - llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1)); - PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, - llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())); - PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt, - Ptr->getType(), - Ptr->getName() + ".aligned"); - return PtrAsInt; -} - -/// Emit va_arg for a platform using the common void* representation, -/// where arguments are simply emitted in an array of slots on the stack. -/// -/// This version implements the core direct-value passing rules. -/// -/// \param SlotSize - The size and alignment of a stack slot. -/// Each argument will be allocated to a multiple of this number of -/// slots, and all the slots will be aligned to this value. -/// \param AllowHigherAlign - The slot alignment is not a cap; -/// an argument type with an alignment greater than the slot size -/// will be emitted on a higher-alignment address, potentially -/// leaving one or more empty slots behind as padding. If this -/// is false, the returned address might be less-aligned than -/// DirectAlign. -/// \param ForceRightAdjust - Default is false. On big-endian platform and -/// if the argument is smaller than a slot, set this flag will force -/// right-adjust the argument in its slot irrespective of the type. -static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, - Address VAListAddr, - llvm::Type *DirectTy, - CharUnits DirectSize, - CharUnits DirectAlign, - CharUnits SlotSize, - bool AllowHigherAlign, - bool ForceRightAdjust = false) { - // Cast the element type to i8* if necessary. Some platforms define - // va_list as a struct containing an i8* instead of just an i8*. - if (VAListAddr.getElementType() != CGF.Int8PtrTy) - VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); - - llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur"); - - // If the CC aligns values higher than the slot size, do so if needed. - Address Addr = Address::invalid(); - if (AllowHigherAlign && DirectAlign > SlotSize) { - Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), - CGF.Int8Ty, DirectAlign); - } else { - Addr = Address(Ptr, CGF.Int8Ty, SlotSize); - } - - // Advance the pointer past the argument, then store that back. - CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); - Address NextPtr = - CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next"); - CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr); - - // If the argument is smaller than a slot, and this is a big-endian - // target, the argument will be right-adjusted in its slot. - if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && - (!DirectTy->isStructTy() || ForceRightAdjust)) { - Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); - } - - Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy); - return Addr; -} - -/// Emit va_arg for a platform using the common void* representation, -/// where arguments are simply emitted in an array of slots on the stack. -/// -/// \param IsIndirect - Values of this type are passed indirectly. -/// \param ValueInfo - The size and alignment of this type, generally -/// computed with getContext().getTypeInfoInChars(ValueTy). -/// \param SlotSizeAndAlign - The size and alignment of a stack slot. -/// Each argument will be allocated to a multiple of this number of -/// slots, and all the slots will be aligned to this value. -/// \param AllowHigherAlign - The slot alignment is not a cap; -/// an argument type with an alignment greater than the slot size -/// will be emitted on a higher-alignment address, potentially -/// leaving one or more empty slots behind as padding. -/// \param ForceRightAdjust - Default is false. On big-endian platform and -/// if the argument is smaller than a slot, set this flag will force -/// right-adjust the argument in its slot irrespective of the type. -static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType ValueTy, bool IsIndirect, - TypeInfoChars ValueInfo, - CharUnits SlotSizeAndAlign, - bool AllowHigherAlign, - bool ForceRightAdjust = false) { - // The size and alignment of the value that was passed directly. - CharUnits DirectSize, DirectAlign; - if (IsIndirect) { - DirectSize = CGF.getPointerSize(); - DirectAlign = CGF.getPointerAlign(); - } else { - DirectSize = ValueInfo.Width; - DirectAlign = ValueInfo.Align; - } - - // Cast the address we've calculated to the right type. - llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy), *ElementTy = DirectTy; - if (IsIndirect) - DirectTy = DirectTy->getPointerTo(0); - - Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, - DirectAlign, SlotSizeAndAlign, - AllowHigherAlign, ForceRightAdjust); - - if (IsIndirect) { - Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align); - } - - return Addr; -} - -static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty, CharUnits SlotSize, - CharUnits EltSize, const ComplexType *CTy) { - Address Addr = - emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2, - SlotSize, SlotSize, /*AllowHigher*/ true); - - Address RealAddr = Addr; - Address ImagAddr = RealAddr; - if (CGF.CGM.getDataLayout().isBigEndian()) { - RealAddr = - CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize); - ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, - 2 * SlotSize - EltSize); - } else { - ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); - } - - llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); - RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy); - ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy); - llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); - llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); - - Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); - CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), - /*init*/ true); - return Temp; -} - -static Address emitMergePHI(CodeGenFunction &CGF, - Address Addr1, llvm::BasicBlock *Block1, - Address Addr2, llvm::BasicBlock *Block2, - const llvm::Twine &Name = "") { - assert(Addr1.getType() == Addr2.getType()); - llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name); - PHI->addIncoming(Addr1.getPointer(), Block1); - PHI->addIncoming(Addr2.getPointer(), Block2); - CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment()); - return Address(PHI, Addr1.getElementType(), Align); -} - TargetCodeGenInfo::TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info) : Info(std::move(Info)) {} @@ -467,7 +77,7 @@ unsigned TargetCodeGenInfo::getSizeOfUnwindException() const { // Verified for: // x86-64 FreeBSD, Linux, Darwin // x86-32 FreeBSD, Linux, Darwin - // PowerPC Linux, Darwin + // PowerPC Linux // ARM Darwin (*not* EABI) // AArch64 Linux return 32; @@ -548,2154 +158,9 @@ TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } -static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); - -/// isEmptyField - Return true iff a the field is "empty", that is it -/// is an unnamed bit-field or an (array of) empty record(s). -static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, - bool AllowArrays) { - if (FD->isUnnamedBitfield()) - return true; - - QualType FT = FD->getType(); - - // Constant arrays of empty records count as empty, strip them off. - // Constant arrays of zero length always count as empty. - bool WasArray = false; - if (AllowArrays) - while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { - if (AT->getSize() == 0) - return true; - FT = AT->getElementType(); - // The [[no_unique_address]] special case below does not apply to - // arrays of C++ empty records, so we need to remember this fact. - WasArray = true; - } - - const RecordType *RT = FT->getAs<RecordType>(); - if (!RT) - return false; - - // C++ record fields are never empty, at least in the Itanium ABI. - // - // FIXME: We should use a predicate for whether this behavior is true in the - // current ABI. - // - // The exception to the above rule are fields marked with the - // [[no_unique_address]] attribute (since C++20). Those do count as empty - // according to the Itanium ABI. The exception applies only to records, - // not arrays of records, so we must also check whether we stripped off an - // array type above. - if (isa<CXXRecordDecl>(RT->getDecl()) && - (WasArray || !FD->hasAttr<NoUniqueAddressAttr>())) - return false; - - return isEmptyRecord(Context, FT, AllowArrays); -} - -/// isEmptyRecord - Return true iff a structure contains only empty -/// fields. Note that a structure with a flexible array member is not -/// considered empty. -static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { - const RecordType *RT = T->getAs<RecordType>(); - if (!RT) - return false; - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return false; - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - for (const auto &I : CXXRD->bases()) - if (!isEmptyRecord(Context, I.getType(), true)) - return false; - - for (const auto *I : RD->fields()) - if (!isEmptyField(Context, I, AllowArrays)) - return false; - return true; -} - -/// isSingleElementStruct - Determine if a structure is a "single -/// element struct", i.e. it has exactly one non-empty field or -/// exactly one field which is itself a single element -/// struct. Structures with flexible array members are never -/// considered single element structs. -/// -/// \return The field declaration for the single non-empty field, if -/// it exists. -static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { - const RecordType *RT = T->getAs<RecordType>(); - if (!RT) - return nullptr; - - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return nullptr; - - const Type *Found = nullptr; - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - for (const auto &I : CXXRD->bases()) { - // Ignore empty records. - if (isEmptyRecord(Context, I.getType(), true)) - continue; - - // If we already found an element then this isn't a single-element struct. - if (Found) - return nullptr; - - // If this is non-empty and not a single element struct, the composite - // cannot be a single element struct. - Found = isSingleElementStruct(I.getType(), Context); - if (!Found) - return nullptr; - } - } - - // Check for single element. - for (const auto *FD : RD->fields()) { - QualType FT = FD->getType(); - - // Ignore empty fields. - if (isEmptyField(Context, FD, true)) - continue; - - // If we already found an element then this isn't a single-element - // struct. - if (Found) - return nullptr; - - // Treat single element arrays as the element. - while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { - if (AT->getSize().getZExtValue() != 1) - break; - FT = AT->getElementType(); - } - - if (!isAggregateTypeForABI(FT)) { - Found = FT.getTypePtr(); - } else { - Found = isSingleElementStruct(FT, Context); - if (!Found) - return nullptr; - } - } - - // We don't consider a struct a single-element struct if it has - // padding beyond the element type. - if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T)) - return nullptr; - - return Found; -} - -namespace { -Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, - const ABIArgInfo &AI) { - // This default implementation defers to the llvm backend's va_arg - // instruction. It can handle only passing arguments directly - // (typically only handled in the backend for primitive types), or - // aggregates passed indirectly by pointer (NOTE: if the "byval" - // flag has ABI impact in the callee, this implementation cannot - // work.) - - // Only a few cases are covered here at the moment -- those needed - // by the default abi. - llvm::Value *Val; - - if (AI.isIndirect()) { - assert(!AI.getPaddingType() && - "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); - assert( - !AI.getIndirectRealign() && - "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!"); - - auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty); - CharUnits TyAlignForABI = TyInfo.Align; - - llvm::Type *ElementTy = CGF.ConvertTypeForMem(Ty); - llvm::Type *BaseTy = llvm::PointerType::getUnqual(ElementTy); - llvm::Value *Addr = - CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy); - return Address(Addr, ElementTy, TyAlignForABI); - } else { - assert((AI.isDirect() || AI.isExtend()) && - "Unexpected ArgInfo Kind in generic VAArg emitter!"); - - assert(!AI.getInReg() && - "Unexpected InReg seen in arginfo in generic VAArg emitter!"); - assert(!AI.getPaddingType() && - "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); - assert(!AI.getDirectOffset() && - "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!"); - assert(!AI.getCoerceToType() && - "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!"); - - Address Temp = CGF.CreateMemTemp(Ty, "varet"); - Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), - CGF.ConvertTypeForMem(Ty)); - CGF.Builder.CreateStore(Val, Temp); - return Temp; - } -} - -/// DefaultABIInfo - The default implementation for ABI specific -/// details. This implementation provides information which results in -/// self-consistent and sensible LLVM IR generation, but does not -/// conform to any particular ABI. -class DefaultABIInfo : public ABIInfo { -public: - DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy) const; - - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - } - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override { - return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); - } -}; - -class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { -public: - DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} -}; - -ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - if (isAggregateTypeForABI(Ty)) { - // Records with non-trivial destructors/copy-constructors should not be - // passed by value. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - return getNaturalAlignIndirect(Ty); - } - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - ASTContext &Context = getContext(); - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > - Context.getTypeSize(Context.getTargetInfo().hasInt128Type() - ? Context.Int128Ty - : Context.LongLongTy)) - return getNaturalAlignIndirect(Ty); - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); -} - -ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (isAggregateTypeForABI(RetTy)) - return getNaturalAlignIndirect(RetTy); - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > - getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type() - ? getContext().Int128Ty - : getContext().LongLongTy)) - return getNaturalAlignIndirect(RetTy); - - return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -//===----------------------------------------------------------------------===// -// WebAssembly ABI Implementation -// -// This is a very simple ABI that relies a lot on DefaultABIInfo. -//===----------------------------------------------------------------------===// - -class WebAssemblyABIInfo final : public ABIInfo { -public: - enum ABIKind { - MVP = 0, - ExperimentalMV = 1, - }; - -private: - DefaultABIInfo defaultInfo; - ABIKind Kind; - -public: - explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind) - : ABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {} - -private: - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty) const; - - // DefaultABIInfo's classifyReturnType and classifyArgumentType are - // non-virtual, but computeInfo and EmitVAArg are virtual, so we - // overload them. - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &Arg : FI.arguments()) - Arg.info = classifyArgumentType(Arg.type); - } - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { -public: - explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, - WebAssemblyABIInfo::ABIKind K) - : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) { - SwiftInfo = - std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) { - llvm::Function *Fn = cast<llvm::Function>(GV); - llvm::AttrBuilder B(GV->getContext()); - B.addAttribute("wasm-import-module", Attr->getImportModule()); - Fn->addFnAttrs(B); - } - if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) { - llvm::Function *Fn = cast<llvm::Function>(GV); - llvm::AttrBuilder B(GV->getContext()); - B.addAttribute("wasm-import-name", Attr->getImportName()); - Fn->addFnAttrs(B); - } - if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) { - llvm::Function *Fn = cast<llvm::Function>(GV); - llvm::AttrBuilder B(GV->getContext()); - B.addAttribute("wasm-export-name", Attr->getExportName()); - Fn->addFnAttrs(B); - } - } - - if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { - llvm::Function *Fn = cast<llvm::Function>(GV); - if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) - Fn->addFnAttr("no-prototype"); - } - } -}; - -/// Classify argument of given type \p Ty. -ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - if (isAggregateTypeForABI(Ty)) { - // Records with non-trivial destructors/copy-constructors should not be - // passed by value. - if (auto RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - // Lower single-element structs to just pass a regular value. TODO: We - // could do reasonable-size multiple-element structs too, using getExpand(), - // though watch out for things like bitfields. - if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); - // For the experimental multivalue ABI, fully expand all other aggregates - if (Kind == ABIKind::ExperimentalMV) { - const RecordType *RT = Ty->getAs<RecordType>(); - assert(RT); - bool HasBitField = false; - for (auto *Field : RT->getDecl()->fields()) { - if (Field->isBitField()) { - HasBitField = true; - break; - } - } - if (!HasBitField) - return ABIArgInfo::getExpand(); - } - } - - // Otherwise just do the default thing. - return defaultInfo.classifyArgumentType(Ty); -} - -ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { - if (isAggregateTypeForABI(RetTy)) { - // Records with non-trivial destructors/copy-constructors should not be - // returned by value. - if (!getRecordArgABI(RetTy, getCXXABI())) { - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), RetTy, true)) - return ABIArgInfo::getIgnore(); - // Lower single-element structs to just return a regular value. TODO: We - // could do reasonable-size multiple-element structs too, using - // ABIArgInfo::getDirect(). - if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); - // For the experimental multivalue ABI, return all other aggregates - if (Kind == ABIKind::ExperimentalMV) - return ABIArgInfo::getDirect(); - } - } - - // Otherwise just do the default thing. - return defaultInfo.classifyReturnType(RetTy); -} - -Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - bool IsIndirect = isAggregateTypeForABI(Ty) && - !isEmptyRecord(getContext(), Ty, true) && - !isSingleElementStruct(Ty, getContext()); - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, - getContext().getTypeInfoInChars(Ty), - CharUnits::fromQuantity(4), - /*AllowHigherAlign=*/true); -} - -//===----------------------------------------------------------------------===// -// le32/PNaCl bitcode ABI Implementation -// -// This is a simplified version of the x86_32 ABI. Arguments and return values -// are always passed on the stack. -//===----------------------------------------------------------------------===// - -class PNaClABIInfo : public ABIInfo { - public: - PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy) const; - - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, - Address VAListAddr, QualType Ty) const override; -}; - -class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { - public: - PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {} -}; - -void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); -} - -Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - // The PNaCL ABI is a bit odd, in that varargs don't use normal - // function classification. Structs get passed directly for varargs - // functions, through a rewriting transform in - // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows - // this target to actually support a va_arg instructions with an - // aggregate type, unlike other targets. - return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); -} - -/// Classify argument of given type \p Ty. -ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { - if (isAggregateTypeForABI(Ty)) { - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - return getNaturalAlignIndirect(Ty); - } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) { - // Treat an enum type as its underlying type. - Ty = EnumTy->getDecl()->getIntegerType(); - } else if (Ty->isFloatingType()) { - // Floating-point types don't go inreg. - return ABIArgInfo::getDirect(); - } else if (const auto *EIT = Ty->getAs<BitIntType>()) { - // Treat bit-precise integers as integers if <= 64, otherwise pass - // indirectly. - if (EIT->getNumBits() > 64) - return getNaturalAlignIndirect(Ty); - return ABIArgInfo::getDirect(); - } - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); -} - -ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - // In the PNaCl ABI we always return records/structures on the stack. - if (isAggregateTypeForABI(RetTy)) - return getNaturalAlignIndirect(RetTy); - - // Treat bit-precise integers as integers if <= 64, otherwise pass indirectly. - if (const auto *EIT = RetTy->getAs<BitIntType>()) { - if (EIT->getNumBits() > 64) - return getNaturalAlignIndirect(RetTy); - return ABIArgInfo::getDirect(); - } - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -/// IsX86_MMXType - Return true if this is an MMX type. -bool IsX86_MMXType(llvm::Type *IRType) { - // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. - return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && - cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && - IRType->getScalarSizeInBits() != 64; -} - -static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, - StringRef Constraint, - llvm::Type* Ty) { - bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) - .Cases("y", "&y", "^Ym", true) - .Default(false); - if (IsMMXCons && Ty->isVectorTy()) { - if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() != - 64) { - // Invalid MMX constraint - return nullptr; - } - - return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); - } - - // No operation needed - return Ty; -} - -/// Returns true if this type can be passed in SSE registers with the -/// X86_VectorCall calling convention. Shared between x86_32 and x86_64. -static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { - if (BT->getKind() == BuiltinType::LongDouble) { - if (&Context.getTargetInfo().getLongDoubleFormat() == - &llvm::APFloat::x87DoubleExtended()) - return false; - } - return true; - } - } else if (const VectorType *VT = Ty->getAs<VectorType>()) { - // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX - // registers specially. - unsigned VecSize = Context.getTypeSize(VT); - if (VecSize == 128 || VecSize == 256 || VecSize == 512) - return true; - } - return false; -} - -/// Returns true if this aggregate is small enough to be passed in SSE registers -/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. -static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { - return NumMembers <= 4; -} - -/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. -static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { - auto AI = ABIArgInfo::getDirect(T); - AI.setInReg(true); - AI.setCanBeFlattened(false); - return AI; -} - -//===----------------------------------------------------------------------===// -// X86-32 ABI Implementation -//===----------------------------------------------------------------------===// - -/// Similar to llvm::CCState, but for Clang. -struct CCState { - CCState(CGFunctionInfo &FI) - : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} - - llvm::SmallBitVector IsPreassigned; - unsigned CC = CallingConv::CC_C; - unsigned FreeRegs = 0; - unsigned FreeSSERegs = 0; -}; - -/// X86_32ABIInfo - The X86-32 ABI information. -class X86_32ABIInfo : public ABIInfo { - enum Class { - Integer, - Float - }; - - static const unsigned MinABIStackAlignInBytes = 4; - - bool IsDarwinVectorABI; - bool IsRetSmallStructInRegABI; - bool IsWin32StructABI; - bool IsSoftFloatABI; - bool IsMCUABI; - bool IsLinuxABI; - unsigned DefaultNumRegisterParameters; - - static bool isRegisterSize(unsigned Size) { - return (Size == 8 || Size == 16 || Size == 32 || Size == 64); - } - - bool isHomogeneousAggregateBaseType(QualType Ty) const override { - // FIXME: Assumes vectorcall is in use. - return isX86VectorTypeForVectorCall(getContext(), Ty); - } - - bool isHomogeneousAggregateSmallEnough(const Type *Ty, - uint64_t NumMembers) const override { - // FIXME: Assumes vectorcall is in use. - return isX86VectorCallAggregateSmallEnough(NumMembers); - } - - bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; - - /// getIndirectResult - Give a source type \arg Ty, return a suitable result - /// such that the argument will be passed in memory. - ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; - - ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; - - /// Return the alignment to use for the given type on the stack. - unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; - - Class classify(QualType Ty) const; - ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; - ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; - - /// Updates the number of available free registers, returns - /// true if any registers were allocated. - bool updateFreeRegs(QualType Ty, CCState &State) const; - - bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, - bool &NeedsPadding) const; - bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; - - bool canExpandIndirectArgument(QualType Ty) const; - - /// Rewrite the function info so that all memory arguments use - /// inalloca. - void rewriteWithInAlloca(CGFunctionInfo &FI) const; - - void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, - CharUnits &StackOffset, ABIArgInfo &Info, - QualType Type) const; - void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; - -public: - - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, - bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) - : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), - IsRetSmallStructInRegABI(RetSmallStructInRegABI), - IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), - IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || - CGT.getTarget().getTriple().isOSCygMing()), - DefaultNumRegisterParameters(NumRegisterParameters) {} -}; - -class X86_32SwiftABIInfo : public SwiftABIInfo { -public: - explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) - : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} - - bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, - bool AsReturnValue) const override { - // LLVM's x86-32 lowering currently only assigns up to three - // integer registers and three fp registers. Oddly, it'll use up to - // four vector registers for vectors, but those can overlap with the - // scalar registers. - return occupiesMoreThan(CGT, ComponentTys, /*total=*/3); - } -}; - -class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { -public: - X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, - bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) - : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( - CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, - NumRegisterParameters, SoftFloatABI)) { - SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); - } - - static bool isStructReturnInRegABI( - const llvm::Triple &Triple, const CodeGenOptions &Opts); - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { - // Darwin uses different dwarf register numbers for EH. - if (CGM.getTarget().getTriple().isOSDarwin()) return 5; - return 4; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; - - llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, - StringRef Constraint, - llvm::Type* Ty) const override { - return X86AdjustInlineAsmType(CGF, Constraint, Ty); - } - - void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, - std::string &Constraints, - std::vector<llvm::Type *> &ResultRegTypes, - std::vector<llvm::Type *> &ResultTruncRegTypes, - std::vector<LValue> &ResultRegDests, - std::string &AsmString, - unsigned NumOutputs) const override; - - llvm::Constant * - getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { - unsigned Sig = (0xeb << 0) | // jmp rel8 - (0x06 << 8) | // .+0x08 - ('v' << 16) | - ('2' << 24); - return llvm::ConstantInt::get(CGM.Int32Ty, Sig); - } - - StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "movl\t%ebp, %ebp" - "\t\t// marker for objc_retainAutoreleaseReturnValue"; - } -}; - -} - -/// Rewrite input constraint references after adding some output constraints. -/// In the case where there is one output and one input and we add one output, -/// we need to replace all operand references greater than or equal to 1: -/// mov $0, $1 -/// mov eax, $1 -/// The result will be: -/// mov $0, $2 -/// mov eax, $2 -static void rewriteInputConstraintReferences(unsigned FirstIn, - unsigned NumNewOuts, - std::string &AsmString) { - std::string Buf; - llvm::raw_string_ostream OS(Buf); - size_t Pos = 0; - while (Pos < AsmString.size()) { - size_t DollarStart = AsmString.find('$', Pos); - if (DollarStart == std::string::npos) - DollarStart = AsmString.size(); - size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); - if (DollarEnd == std::string::npos) - DollarEnd = AsmString.size(); - OS << StringRef(&AsmString[Pos], DollarEnd - Pos); - Pos = DollarEnd; - size_t NumDollars = DollarEnd - DollarStart; - if (NumDollars % 2 != 0 && Pos < AsmString.size()) { - // We have an operand reference. - size_t DigitStart = Pos; - if (AsmString[DigitStart] == '{') { - OS << '{'; - ++DigitStart; - } - size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); - if (DigitEnd == std::string::npos) - DigitEnd = AsmString.size(); - StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); - unsigned OperandIndex; - if (!OperandStr.getAsInteger(10, OperandIndex)) { - if (OperandIndex >= FirstIn) - OperandIndex += NumNewOuts; - OS << OperandIndex; - } else { - OS << OperandStr; - } - Pos = DigitEnd; - } - } - AsmString = std::move(OS.str()); -} - -/// Add output constraints for EAX:EDX because they are return registers. -void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( - CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, - std::vector<llvm::Type *> &ResultRegTypes, - std::vector<llvm::Type *> &ResultTruncRegTypes, - std::vector<LValue> &ResultRegDests, std::string &AsmString, - unsigned NumOutputs) const { - uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); - - // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is - // larger. - if (!Constraints.empty()) - Constraints += ','; - if (RetWidth <= 32) { - Constraints += "={eax}"; - ResultRegTypes.push_back(CGF.Int32Ty); - } else { - // Use the 'A' constraint for EAX:EDX. - Constraints += "=A"; - ResultRegTypes.push_back(CGF.Int64Ty); - } - - // Truncate EAX or EAX:EDX to an integer of the appropriate size. - llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); - ResultTruncRegTypes.push_back(CoerceTy); - - // Coerce the integer by bitcasting the return slot pointer. - ReturnSlot.setAddress( - CGF.Builder.CreateElementBitCast(ReturnSlot.getAddress(CGF), CoerceTy)); - ResultRegDests.push_back(ReturnSlot); - - rewriteInputConstraintReferences(NumOutputs, 1, AsmString); -} - -/// shouldReturnTypeInRegister - Determine if the given type should be -/// returned in a register (for the Darwin and MCU ABI). -bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, - ASTContext &Context) const { - uint64_t Size = Context.getTypeSize(Ty); - - // For i386, type must be register sized. - // For the MCU ABI, it only needs to be <= 8-byte - if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) - return false; - - if (Ty->isVectorType()) { - // 64- and 128- bit vectors inside structures are not returned in - // registers. - if (Size == 64 || Size == 128) - return false; - - return true; - } - - // If this is a builtin, pointer, enum, complex type, member pointer, or - // member function pointer it is ok. - if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || - Ty->isAnyComplexType() || Ty->isEnumeralType() || - Ty->isBlockPointerType() || Ty->isMemberPointerType()) - return true; - - // Arrays are treated like records. - if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) - return shouldReturnTypeInRegister(AT->getElementType(), Context); - - // Otherwise, it must be a record type. - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) return false; - - // FIXME: Traverse bases here too. - - // Structure types are passed in register if all fields would be - // passed in a register. - for (const auto *FD : RT->getDecl()->fields()) { - // Empty fields are ignored. - if (isEmptyField(Context, FD, true)) - continue; - - // Check fields recursively. - if (!shouldReturnTypeInRegister(FD->getType(), Context)) - return false; - } - return true; -} - -static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { - // Treat complex types as the element type. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - // Check for a type which we know has a simple scalar argument-passing - // convention without any padding. (We're specifically looking for 32 - // and 64-bit integer and integer-equivalents, float, and double.) - if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && - !Ty->isEnumeralType() && !Ty->isBlockPointerType()) - return false; - - uint64_t Size = Context.getTypeSize(Ty); - return Size == 32 || Size == 64; -} - -static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, - uint64_t &Size) { - for (const auto *FD : RD->fields()) { - // Scalar arguments on the stack get 4 byte alignment on x86. If the - // argument is smaller than 32-bits, expanding the struct will create - // alignment padding. - if (!is32Or64BitBasicType(FD->getType(), Context)) - return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) - return false; - - Size += Context.getTypeSize(FD->getType()); - } - return true; -} - -static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, - uint64_t &Size) { - // Don't do this if there are any non-empty bases. - for (const CXXBaseSpecifier &Base : RD->bases()) { - if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), - Size)) - return false; - } - if (!addFieldSizes(Context, RD, Size)) - return false; - return true; -} - -/// Test whether an argument type which is to be passed indirectly (on the -/// stack) would have the equivalent layout if it was expanded into separate -/// arguments. If so, we prefer to do the latter to avoid inhibiting -/// optimizations. -bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { - // We can only expand structure types. - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) - return false; - const RecordDecl *RD = RT->getDecl(); - uint64_t Size = 0; - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!IsWin32StructABI) { - // On non-Windows, we have to conservatively match our old bitcode - // prototypes in order to be ABI-compatible at the bitcode level. - if (!CXXRD->isCLike()) - return false; - } else { - // Don't do this for dynamic classes. - if (CXXRD->isDynamicClass()) - return false; - } - if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) - return false; - } else { - if (!addFieldSizes(getContext(), RD, Size)) - return false; - } - - // We can do this if there was no alignment padding. - return Size == getContext().getTypeSize(Ty); -} - -ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { - // If the return value is indirect, then the hidden argument is consuming one - // integer register. - if (State.FreeRegs) { - --State.FreeRegs; - if (!IsMCUABI) - return getNaturalAlignIndirectInReg(RetTy); - } - return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); -} - -ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, - CCState &State) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - const Type *Base = nullptr; - uint64_t NumElts = 0; - if ((State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) && - isHomogeneousAggregate(RetTy, Base, NumElts)) { - // The LLVM struct type for such an aggregate should lower properly. - return ABIArgInfo::getDirect(); - } - - if (const VectorType *VT = RetTy->getAs<VectorType>()) { - // On Darwin, some vectors are returned in registers. - if (IsDarwinVectorABI) { - uint64_t Size = getContext().getTypeSize(RetTy); - - // 128-bit vectors are a special case; they are returned in - // registers and we need to make sure to pick a type the LLVM - // backend will like. - if (Size == 128) - return ABIArgInfo::getDirect(llvm::FixedVectorType::get( - llvm::Type::getInt64Ty(getVMContext()), 2)); - - // Always return in register if it fits in a general purpose - // register, or if it is 64 bits and has a single element. - if ((Size == 8 || Size == 16 || Size == 32) || - (Size == 64 && VT->getNumElements() == 1)) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), - Size)); - - return getIndirectReturnResult(RetTy, State); - } - - return ABIArgInfo::getDirect(); - } - - if (isAggregateTypeForABI(RetTy)) { - if (const RecordType *RT = RetTy->getAs<RecordType>()) { - // Structures with flexible arrays are always indirect. - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectReturnResult(RetTy, State); - } - - // If specified, structs and unions are always indirect. - if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) - return getIndirectReturnResult(RetTy, State); - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), RetTy, true)) - return ABIArgInfo::getIgnore(); - - // Return complex of _Float16 as <2 x half> so the backend will use xmm0. - if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { - QualType ET = getContext().getCanonicalType(CT->getElementType()); - if (ET->isFloat16Type()) - return ABIArgInfo::getDirect(llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getVMContext()), 2)); - } - - // Small structures which are register sized are generally returned - // in a register. - if (shouldReturnTypeInRegister(RetTy, getContext())) { - uint64_t Size = getContext().getTypeSize(RetTy); - - // As a special-case, if the struct is a "single-element" struct, and - // the field is of type "float" or "double", return it in a - // floating-point register. (MSVC does not apply this special case.) - // We apply a similar transformation for pointer types to improve the - // quality of the generated IR. - if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) - if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) - || SeltTy->hasPointerRepresentation()) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); - - // FIXME: We should be able to narrow this integer in cases with dead - // padding. - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); - } - - return getIndirectReturnResult(RetTy, State); - } - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > 64) - return getIndirectReturnResult(RetTy, State); - - return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -static bool isSIMDVectorType(ASTContext &Context, QualType Ty) { - return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128; -} - -static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) { - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) - return false; - const RecordDecl *RD = RT->getDecl(); - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - for (const auto &I : CXXRD->bases()) - if (!isRecordWithSIMDVectorType(Context, I.getType())) - return false; - - for (const auto *i : RD->fields()) { - QualType FT = i->getType(); - - if (isSIMDVectorType(Context, FT)) - return true; - - if (isRecordWithSIMDVectorType(Context, FT)) - return true; - } - - return false; -} - -unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, - unsigned Align) const { - // Otherwise, if the alignment is less than or equal to the minimum ABI - // alignment, just use the default; the backend will handle this. - if (Align <= MinABIStackAlignInBytes) - return 0; // Use default alignment. - - if (IsLinuxABI) { - // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't - // want to spend any effort dealing with the ramifications of ABI breaks. - // - // If the vector type is __m128/__m256/__m512, return the default alignment. - if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) - return Align; - } - // On non-Darwin, the stack type alignment is always 4. - if (!IsDarwinVectorABI) { - // Set explicit alignment, since we may need to realign the top. - return MinABIStackAlignInBytes; - } - - // Otherwise, if the type contains an SSE vector type, the alignment is 16. - if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || - isRecordWithSIMDVectorType(getContext(), Ty))) - return 16; - - return MinABIStackAlignInBytes; -} - -ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, - CCState &State) const { - if (!ByVal) { - if (State.FreeRegs) { - --State.FreeRegs; // Non-byval indirects just use one pointer. - if (!IsMCUABI) - return getNaturalAlignIndirectInReg(Ty); - } - return getNaturalAlignIndirect(Ty, false); - } - - // Compute the byval alignment. - unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; - unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); - if (StackAlign == 0) - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); - - // If the stack alignment is less than the type alignment, realign the - // argument. - bool Realign = TypeAlign > StackAlign; - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), - /*ByVal=*/true, Realign); -} - -X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { - const Type *T = isSingleElementStruct(Ty, getContext()); - if (!T) - T = Ty.getTypePtr(); - - if (const BuiltinType *BT = T->getAs<BuiltinType>()) { - BuiltinType::Kind K = BT->getKind(); - if (K == BuiltinType::Float || K == BuiltinType::Double) - return Float; - } - return Integer; -} - -bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { - if (!IsSoftFloatABI) { - Class C = classify(Ty); - if (C == Float) - return false; - } - - unsigned Size = getContext().getTypeSize(Ty); - unsigned SizeInRegs = (Size + 31) / 32; - - if (SizeInRegs == 0) - return false; - - if (!IsMCUABI) { - if (SizeInRegs > State.FreeRegs) { - State.FreeRegs = 0; - return false; - } - } else { - // The MCU psABI allows passing parameters in-reg even if there are - // earlier parameters that are passed on the stack. Also, - // it does not allow passing >8-byte structs in-register, - // even if there are 3 free registers available. - if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) - return false; - } - - State.FreeRegs -= SizeInRegs; - return true; -} - -bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, - bool &InReg, - bool &NeedsPadding) const { - // On Windows, aggregates other than HFAs are never passed in registers, and - // they do not consume register slots. Homogenous floating-point aggregates - // (HFAs) have already been dealt with at this point. - if (IsWin32StructABI && isAggregateTypeForABI(Ty)) - return false; - - NeedsPadding = false; - InReg = !IsMCUABI; - - if (!updateFreeRegs(Ty, State)) - return false; - - if (IsMCUABI) - return true; - - if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall) { - if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) - NeedsPadding = true; - - return false; - } - - return true; -} - -bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { - bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && - (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || - Ty->isReferenceType()); - - if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall)) - return false; - - if (!updateFreeRegs(Ty, State)) - return false; - - if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) - return false; - - // Return true to apply inreg to all legal parameters except for MCU targets. - return !IsMCUABI; -} - -void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { - // Vectorcall x86 works subtly different than in x64, so the format is - // a bit different than the x64 version. First, all vector types (not HVAs) - // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. - // This differs from the x64 implementation, where the first 6 by INDEX get - // registers. - // In the second pass over the arguments, HVAs are passed in the remaining - // vector registers if possible, or indirectly by address. The address will be - // passed in ECX/EDX if available. Any other arguments are passed according to - // the usual fastcall rules. - MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); - for (int I = 0, E = Args.size(); I < E; ++I) { - const Type *Base = nullptr; - uint64_t NumElts = 0; - const QualType &Ty = Args[I].type; - if ((Ty->isVectorType() || Ty->isBuiltinType()) && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - Args[I].info = ABIArgInfo::getDirectInReg(); - State.IsPreassigned.set(I); - } - } - } -} - -ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, - CCState &State) const { - // FIXME: Set alignment on indirect arguments. - bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; - bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; - bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; - - Ty = useFirstFieldIfTransparentUnion(Ty); - TypeInfo TI = getContext().getTypeInfo(Ty); - - // Check with the C++ ABI first. - const RecordType *RT = Ty->getAs<RecordType>(); - if (RT) { - CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); - if (RAA == CGCXXABI::RAA_Indirect) { - return getIndirectResult(Ty, false, State); - } else if (RAA == CGCXXABI::RAA_DirectInMemory) { - // The field index doesn't matter, we'll fix it up later. - return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); - } - } - - // Regcall uses the concept of a homogenous vector aggregate, similar - // to other targets. - const Type *Base = nullptr; - uint64_t NumElts = 0; - if ((IsRegCall || IsVectorCall) && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - - // Vectorcall passes HVAs directly and does not flatten them, but regcall - // does. - if (IsVectorCall) - return getDirectX86Hva(); - - if (Ty->isBuiltinType() || Ty->isVectorType()) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - - if (isAggregateTypeForABI(Ty)) { - // Structures with flexible arrays are always indirect. - // FIXME: This should not be byval! - if (RT && RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, true, State); - - // Ignore empty structs/unions on non-Windows. - if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - llvm::LLVMContext &LLVMContext = getVMContext(); - llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - bool NeedsPadding = false; - bool InReg; - if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { - unsigned SizeInRegs = (TI.Width + 31) / 32; - SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); - llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); - if (InReg) - return ABIArgInfo::getDirectInReg(Result); - else - return ABIArgInfo::getDirect(Result); - } - llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; - - // Pass over-aligned aggregates on Windows indirectly. This behavior was - // added in MSVC 2015. - if (IsWin32StructABI && TI.isAlignRequired() && TI.Align > 32) - return getIndirectResult(Ty, /*ByVal=*/false, State); - - // Expand small (<= 128-bit) record types when we know that the stack layout - // of those arguments will match the struct. This is important because the - // LLVM backend isn't smart enough to remove byval, which inhibits many - // optimizations. - // Don't do this for the MCU if there are still free integer registers - // (see X86_64 ABI for full explanation). - if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && - canExpandIndirectArgument(Ty)) - return ABIArgInfo::getExpandWithPadding( - IsFastCall || IsVectorCall || IsRegCall, PaddingType); - - return getIndirectResult(Ty, true, State); - } - - if (const VectorType *VT = Ty->getAs<VectorType>()) { - // On Windows, vectors are passed directly if registers are available, or - // indirectly if not. This avoids the need to align argument memory. Pass - // user-defined vector types larger than 512 bits indirectly for simplicity. - if (IsWin32StructABI) { - if (TI.Width <= 512 && State.FreeSSERegs > 0) { - --State.FreeSSERegs; - return ABIArgInfo::getDirectInReg(); - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - - // On Darwin, some vectors are passed in memory, we handle this by passing - // it as an i8/i16/i32/i64. - if (IsDarwinVectorABI) { - if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || - (TI.Width == 64 && VT->getNumElements() == 1)) - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), TI.Width)); - } - - if (IsX86_MMXType(CGT.ConvertType(Ty))) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); - - return ABIArgInfo::getDirect(); - } - - - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - bool InReg = shouldPrimitiveUseInReg(Ty, State); - - if (isPromotableIntegerTypeForABI(Ty)) { - if (InReg) - return ABIArgInfo::getExtendInReg(Ty); - return ABIArgInfo::getExtend(Ty); - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) { - if (EIT->getNumBits() <= 64) { - if (InReg) - return ABIArgInfo::getDirectInReg(); - return ABIArgInfo::getDirect(); - } - return getIndirectResult(Ty, /*ByVal=*/false, State); - } - - if (InReg) - return ABIArgInfo::getDirectInReg(); - return ABIArgInfo::getDirect(); -} - -void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { - CCState State(FI); - if (IsMCUABI) - State.FreeRegs = 3; - else if (State.CC == llvm::CallingConv::X86_FastCall) { - State.FreeRegs = 2; - State.FreeSSERegs = 3; - } else if (State.CC == llvm::CallingConv::X86_VectorCall) { - State.FreeRegs = 2; - State.FreeSSERegs = 6; - } else if (FI.getHasRegParm()) - State.FreeRegs = FI.getRegParm(); - else if (State.CC == llvm::CallingConv::X86_RegCall) { - State.FreeRegs = 5; - State.FreeSSERegs = 8; - } else if (IsWin32StructABI) { - // Since MSVC 2015, the first three SSE vectors have been passed in - // registers. The rest are passed indirectly. - State.FreeRegs = DefaultNumRegisterParameters; - State.FreeSSERegs = 3; - } else - State.FreeRegs = DefaultNumRegisterParameters; - - if (!::classifyReturnType(getCXXABI(), FI, *this)) { - FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); - } else if (FI.getReturnInfo().isIndirect()) { - // The C++ ABI is not aware of register usage, so we have to check if the - // return value was sret and put it in a register ourselves if appropriate. - if (State.FreeRegs) { - --State.FreeRegs; // The sret parameter consumes a register. - if (!IsMCUABI) - FI.getReturnInfo().setInReg(true); - } - } - - // The chain argument effectively gives us another free register. - if (FI.isChainCall()) - ++State.FreeRegs; - - // For vectorcall, do a first pass over the arguments, assigning FP and vector - // arguments to XMM registers as available. - if (State.CC == llvm::CallingConv::X86_VectorCall) - runVectorCallFirstPass(FI, State); - - bool UsedInAlloca = false; - MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); - for (int I = 0, E = Args.size(); I < E; ++I) { - // Skip arguments that have already been assigned. - if (State.IsPreassigned.test(I)) - continue; - - Args[I].info = classifyArgumentType(Args[I].type, State); - UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); - } - - // If we needed to use inalloca for any argument, do a second pass and rewrite - // all the memory arguments to use inalloca. - if (UsedInAlloca) - rewriteWithInAlloca(FI); -} - -void -X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, - CharUnits &StackOffset, ABIArgInfo &Info, - QualType Type) const { - // Arguments are always 4-byte-aligned. - CharUnits WordSize = CharUnits::fromQuantity(4); - assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); - - // sret pointers and indirect things will require an extra pointer - // indirection, unless they are byval. Most things are byval, and will not - // require this indirection. - bool IsIndirect = false; - if (Info.isIndirect() && !Info.getIndirectByVal()) - IsIndirect = true; - Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); - llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); - if (IsIndirect) - LLTy = LLTy->getPointerTo(0); - FrameFields.push_back(LLTy); - StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); - - // Insert padding bytes to respect alignment. - CharUnits FieldEnd = StackOffset; - StackOffset = FieldEnd.alignTo(WordSize); - if (StackOffset != FieldEnd) { - CharUnits NumBytes = StackOffset - FieldEnd; - llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); - Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); - FrameFields.push_back(Ty); - } -} - -static bool isArgInAlloca(const ABIArgInfo &Info) { - // Leave ignored and inreg arguments alone. - switch (Info.getKind()) { - case ABIArgInfo::InAlloca: - return true; - case ABIArgInfo::Ignore: - case ABIArgInfo::IndirectAliased: - return false; - case ABIArgInfo::Indirect: - case ABIArgInfo::Direct: - case ABIArgInfo::Extend: - return !Info.getInReg(); - case ABIArgInfo::Expand: - case ABIArgInfo::CoerceAndExpand: - // These are aggregate types which are never passed in registers when - // inalloca is involved. - return true; - } - llvm_unreachable("invalid enum"); -} - -void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { - assert(IsWin32StructABI && "inalloca only supported on win32"); - - // Build a packed struct type for all of the arguments in memory. - SmallVector<llvm::Type *, 6> FrameFields; - - // The stack alignment is always 4. - CharUnits StackAlign = CharUnits::fromQuantity(4); - - CharUnits StackOffset; - CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); - - // Put 'this' into the struct before 'sret', if necessary. - bool IsThisCall = - FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; - ABIArgInfo &Ret = FI.getReturnInfo(); - if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && - isArgInAlloca(I->info)) { - addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); - ++I; - } - - // Put the sret parameter into the inalloca struct if it's in memory. - if (Ret.isIndirect() && !Ret.getInReg()) { - addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); - // On Windows, the hidden sret parameter is always returned in eax. - Ret.setInAllocaSRet(IsWin32StructABI); - } - - // Skip the 'this' parameter in ecx. - if (IsThisCall) - ++I; - - // Put arguments passed in memory into the struct. - for (; I != E; ++I) { - if (isArgInAlloca(I->info)) - addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); - } - - FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, - /*isPacked=*/true), - StackAlign); -} - -Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, - Address VAListAddr, QualType Ty) const { - - auto TypeInfo = getContext().getTypeInfoInChars(Ty); - - // x86-32 changes the alignment of certain arguments on the stack. - // - // Just messing with TypeInfo like this works because we never pass - // anything indirectly. - TypeInfo.Align = CharUnits::fromQuantity( - getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, - TypeInfo, CharUnits::fromQuantity(4), - /*AllowHigherAlign*/ true); -} - -bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( - const llvm::Triple &Triple, const CodeGenOptions &Opts) { - assert(Triple.getArch() == llvm::Triple::x86); - - switch (Opts.getStructReturnConvention()) { - case CodeGenOptions::SRCK_Default: - break; - case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return - return false; - case CodeGenOptions::SRCK_InRegs: // -freg-struct-return - return true; - } - - if (Triple.isOSDarwin() || Triple.isOSIAMCU()) - return true; - - switch (Triple.getOS()) { - case llvm::Triple::DragonFly: - case llvm::Triple::FreeBSD: - case llvm::Triple::OpenBSD: - case llvm::Triple::Win32: - return true; - default: - return false; - } -} - -static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) { - if (!FD->hasAttr<AnyX86InterruptAttr>()) - return; - - llvm::Function *Fn = cast<llvm::Function>(GV); - Fn->setCallingConv(llvm::CallingConv::X86_INTR); - if (FD->getNumParams() == 0) - return; - - auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); - llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); - llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( - Fn->getContext(), ByValTy); - Fn->addParamAttr(0, NewAttr); -} - -void X86_32TargetCodeGenInfo::setTargetAttributes( +void TargetCodeGenInfo::addStackProbeTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - if (GV->isDeclaration()) - return; - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - llvm::Function *Fn = cast<llvm::Function>(GV); - Fn->addFnAttr("stackrealign"); - } - - addX86InterruptAttrs(FD, GV, CGM); - } -} - -bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( - CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - CodeGen::CGBuilderTy &Builder = CGF.Builder; - - llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); - - // 0-7 are the eight integer registers; the order is different - // on Darwin (for EH), but the range is the same. - // 8 is %eip. - AssignToArrayRange(Builder, Address, Four8, 0, 8); - - if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { - // 12-16 are st(0..4). Not sure why we stop at 4. - // These have size 16, which is sizeof(long double) on - // platforms with 8-byte alignment for that type. - llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); - AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); - - } else { - // 9 is %eflags, which doesn't get a size on Darwin for some - // reason. - Builder.CreateAlignedStore( - Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), - CharUnits::One()); - - // 11-16 are st(0..5). Not sure why we stop at 5. - // These have size 12, which is sizeof(long double) on - // platforms with 4-byte alignment for that type. - llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); - AssignToArrayRange(Builder, Address, Twelve8, 11, 16); - } - - return false; -} - -//===----------------------------------------------------------------------===// -// X86-64 ABI Implementation -//===----------------------------------------------------------------------===// - - -namespace { -/// The AVX ABI level for X86 targets. -enum class X86AVXABILevel { - None, - AVX, - AVX512 -}; - -/// \p returns the size in bits of the largest (native) vector for \p AVXLevel. -static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { - switch (AVXLevel) { - case X86AVXABILevel::AVX512: - return 512; - case X86AVXABILevel::AVX: - return 256; - case X86AVXABILevel::None: - return 128; - } - llvm_unreachable("Unknown AVXLevel"); -} - -/// X86_64ABIInfo - The X86_64 ABI information. -class X86_64ABIInfo : public ABIInfo { - enum Class { - Integer = 0, - SSE, - SSEUp, - X87, - X87Up, - ComplexX87, - NoClass, - Memory - }; - - /// merge - Implement the X86_64 ABI merging algorithm. - /// - /// Merge an accumulating classification \arg Accum with a field - /// classification \arg Field. - /// - /// \param Accum - The accumulating classification. This should - /// always be either NoClass or the result of a previous merge - /// call. In addition, this should never be Memory (the caller - /// should just return Memory for the aggregate). - static Class merge(Class Accum, Class Field); - - /// postMerge - Implement the X86_64 ABI post merging algorithm. - /// - /// Post merger cleanup, reduces a malformed Hi and Lo pair to - /// final MEMORY or SSE classes when necessary. - /// - /// \param AggregateSize - The size of the current aggregate in - /// the classification process. - /// - /// \param Lo - The classification for the parts of the type - /// residing in the low word of the containing object. - /// - /// \param Hi - The classification for the parts of the type - /// residing in the higher words of the containing object. - /// - void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; - - /// classify - Determine the x86_64 register classes in which the - /// given type T should be passed. - /// - /// \param Lo - The classification for the parts of the type - /// residing in the low word of the containing object. - /// - /// \param Hi - The classification for the parts of the type - /// residing in the high word of the containing object. - /// - /// \param OffsetBase - The bit offset of this type in the - /// containing object. Some parameters are classified different - /// depending on whether they straddle an eightbyte boundary. - /// - /// \param isNamedArg - Whether the argument in question is a "named" - /// argument, as used in AMD64-ABI 3.5.7. - /// - /// \param IsRegCall - Whether the calling conversion is regcall. - /// - /// If a word is unused its result will be NoClass; if a type should - /// be passed in Memory then at least the classification of \arg Lo - /// will be Memory. - /// - /// The \arg Lo class will be NoClass iff the argument is ignored. - /// - /// If the \arg Lo class is ComplexX87, then the \arg Hi class will - /// also be ComplexX87. - void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, - bool isNamedArg, bool IsRegCall = false) const; - - llvm::Type *GetByteVectorType(QualType Ty) const; - llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, - unsigned IROffset, QualType SourceTy, - unsigned SourceOffset) const; - llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, - unsigned IROffset, QualType SourceTy, - unsigned SourceOffset) const; - - /// getIndirectResult - Give a source type \arg Ty, return a suitable result - /// such that the argument will be returned in memory. - ABIArgInfo getIndirectReturnResult(QualType Ty) const; - - /// getIndirectResult - Give a source type \arg Ty, return a suitable result - /// such that the argument will be passed in memory. - /// - /// \param freeIntRegs - The number of free integer registers remaining - /// available. - ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; - - ABIArgInfo classifyReturnType(QualType RetTy) const; - - ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, - unsigned &neededInt, unsigned &neededSSE, - bool isNamedArg, - bool IsRegCall = false) const; - - ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE, - unsigned &MaxVectorWidth) const; - - ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE, - unsigned &MaxVectorWidth) const; - - bool IsIllegalVectorType(QualType Ty) const; - - /// The 0.98 ABI revision clarified a lot of ambiguities, - /// unfortunately in ways that were not always consistent with - /// certain previous compilers. In particular, platforms which - /// required strict binary compatibility with older versions of GCC - /// may need to exempt themselves. - bool honorsRevision0_98() const { - return !getTarget().getTriple().isOSDarwin(); - } - - /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to - /// classify it as INTEGER (for compatibility with older clang compilers). - bool classifyIntegerMMXAsSSE() const { - // Clang <= 3.8 did not do this. - if (getContext().getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver3_8) - return false; - - const llvm::Triple &Triple = getTarget().getTriple(); - if (Triple.isOSDarwin() || Triple.isPS()) - return false; - if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10) - return false; - return true; - } - - // GCC classifies vectors of __int128 as memory. - bool passInt128VectorsInMem() const { - // Clang <= 9.0 did not do this. - if (getContext().getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver9) - return false; - - const llvm::Triple &T = getTarget().getTriple(); - return T.isOSLinux() || T.isOSNetBSD(); - } - - X86AVXABILevel AVXLevel; - // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on - // 64-bit hardware. - bool Has64BitPointers; - -public: - X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : ABIInfo(CGT), AVXLevel(AVXLevel), - Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} - - bool isPassedUsingAVXType(QualType type) const { - unsigned neededInt, neededSSE; - // The freeIntRegs argument doesn't matter here. - ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, - /*isNamedArg*/true); - if (info.isDirect()) { - llvm::Type *ty = info.getCoerceToType(); - if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) - return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; - } - return false; - } - - void computeInfo(CGFunctionInfo &FI) const override; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - bool has64BitPointers() const { - return Has64BitPointers; - } -}; - -/// WinX86_64ABIInfo - The Windows X86_64 ABI information. -class WinX86_64ABIInfo : public ABIInfo { -public: - WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : ABIInfo(CGT), AVXLevel(AVXLevel), - IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} - - void computeInfo(CGFunctionInfo &FI) const override; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - bool isHomogeneousAggregateBaseType(QualType Ty) const override { - // FIXME: Assumes vectorcall is in use. - return isX86VectorTypeForVectorCall(getContext(), Ty); - } - - bool isHomogeneousAggregateSmallEnough(const Type *Ty, - uint64_t NumMembers) const override { - // FIXME: Assumes vectorcall is in use. - return isX86VectorCallAggregateSmallEnough(NumMembers); - } - -private: - ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, - bool IsVectorCall, bool IsRegCall) const; - ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, - const ABIArgInfo ¤t) const; - - X86AVXABILevel AVXLevel; - - bool IsMingw64; -}; - -class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { -public: - X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { - SwiftInfo = - std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); - } - - const X86_64ABIInfo &getABIInfo() const { - return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); - } - - /// Disable tail call on x86-64. The epilogue code before the tail jump blocks - /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. - bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { - return 7; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); - - // 0-15 are the 16 integer registers. - // 16 is %rip. - AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); - return false; - } - - llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, - StringRef Constraint, - llvm::Type* Ty) const override { - return X86AdjustInlineAsmType(CGF, Constraint, Ty); - } - - bool isNoProtoCallVariadic(const CallArgList &args, - const FunctionNoProtoType *fnType) const override { - // The default CC on x86-64 sets %al to the number of SSA - // registers used, and GCC sets this when calling an unprototyped - // function, so we override the default behavior. However, don't do - // that when AVX types are involved: the ABI explicitly states it is - // undefined, and it doesn't work in practice because of how the ABI - // defines varargs anyway. - if (fnType->getCallConv() == CC_C) { - bool HasAVXType = false; - for (CallArgList::const_iterator - it = args.begin(), ie = args.end(); it != ie; ++it) { - if (getABIInfo().isPassedUsingAVXType(it->Ty)) { - HasAVXType = true; - break; - } - } - - if (!HasAVXType) - return true; - } - - return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); - } - - llvm::Constant * - getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { - unsigned Sig = (0xeb << 0) | // jmp rel8 - (0x06 << 8) | // .+0x08 - ('v' << 16) | - ('2' << 24); - return llvm::ConstantInt::get(CGM.Int32Ty, Sig); - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - if (GV->isDeclaration()) - return; - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - llvm::Function *Fn = cast<llvm::Function>(GV); - Fn->addFnAttr("stackrealign"); - } - - addX86InterruptAttrs(FD, GV, CGM); - } - } - - void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, - const FunctionDecl *Caller, - const FunctionDecl *Callee, - const CallArgList &Args) const override; -}; - -static void initFeatureMaps(const ASTContext &Ctx, - llvm::StringMap<bool> &CallerMap, - const FunctionDecl *Caller, - llvm::StringMap<bool> &CalleeMap, - const FunctionDecl *Callee) { - if (CalleeMap.empty() && CallerMap.empty()) { - // The caller is potentially nullptr in the case where the call isn't in a - // function. In this case, the getFunctionFeatureMap ensures we just get - // the TU level setting (since it cannot be modified by 'target'.. - Ctx.getFunctionFeatureMap(CallerMap, Caller); - Ctx.getFunctionFeatureMap(CalleeMap, Callee); - } -} - -static bool checkAVXParamFeature(DiagnosticsEngine &Diag, - SourceLocation CallLoc, - const llvm::StringMap<bool> &CallerMap, - const llvm::StringMap<bool> &CalleeMap, - QualType Ty, StringRef Feature, - bool IsArgument) { - bool CallerHasFeat = CallerMap.lookup(Feature); - bool CalleeHasFeat = CalleeMap.lookup(Feature); - if (!CallerHasFeat && !CalleeHasFeat) - return Diag.Report(CallLoc, diag::warn_avx_calling_convention) - << IsArgument << Ty << Feature; - - // Mixing calling conventions here is very clearly an error. - if (!CallerHasFeat || !CalleeHasFeat) - return Diag.Report(CallLoc, diag::err_avx_calling_convention) - << IsArgument << Ty << Feature; - - // Else, both caller and callee have the required feature, so there is no need - // to diagnose. - return false; -} - -static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, - SourceLocation CallLoc, - const llvm::StringMap<bool> &CallerMap, - const llvm::StringMap<bool> &CalleeMap, QualType Ty, - bool IsArgument) { - uint64_t Size = Ctx.getTypeSize(Ty); - if (Size > 256) - return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, - "avx512f", IsArgument); - - if (Size > 128) - return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", - IsArgument); - - return false; -} - -void X86_64TargetCodeGenInfo::checkFunctionCallABI( - CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, - const FunctionDecl *Callee, const CallArgList &Args) const { - llvm::StringMap<bool> CallerMap; - llvm::StringMap<bool> CalleeMap; - unsigned ArgIndex = 0; - - // We need to loop through the actual call arguments rather than the - // function's parameters, in case this variadic. - for (const CallArg &Arg : Args) { - // The "avx" feature changes how vectors >128 in size are passed. "avx512f" - // additionally changes how vectors >256 in size are passed. Like GCC, we - // warn when a function is called with an argument where this will change. - // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, - // the caller and callee features are mismatched. - // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can - // change its ABI with attribute-target after this call. - if (Arg.getType()->isVectorType() && - CGM.getContext().getTypeSize(Arg.getType()) > 128) { - initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); - QualType Ty = Arg.getType(); - // The CallArg seems to have desugared the type already, so for clearer - // diagnostics, replace it with the type in the FunctionDecl if possible. - if (ArgIndex < Callee->getNumParams()) - Ty = Callee->getParamDecl(ArgIndex)->getType(); - - if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, - CalleeMap, Ty, /*IsArgument*/ true)) - return; - } - ++ArgIndex; - } - - // Check return always, as we don't have a good way of knowing in codegen - // whether this value is used, tail-called, etc. - if (Callee->getReturnType()->isVectorType() && - CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { - initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); - checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, - CalleeMap, Callee->getReturnType(), - /*IsArgument*/ false); - } -} - -static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { - // If the argument does not end in .lib, automatically add the suffix. - // If the argument contains a space, enclose it in quotes. - // This matches the behavior of MSVC. - bool Quote = Lib.contains(' '); - std::string ArgStr = Quote ? "\"" : ""; - ArgStr += Lib; - if (!Lib.endswith_insensitive(".lib") && !Lib.endswith_insensitive(".a")) - ArgStr += ".lib"; - ArgStr += Quote ? "\"" : ""; - return ArgStr; -} - -class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { -public: - WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, - bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters) - : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, - Win32StructABI, NumRegisterParameters, false) {} - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; - - void getDependentLibraryOption(llvm::StringRef Lib, - llvm::SmallString<24> &Opt) const override { - Opt = "/DEFAULTLIB:"; - Opt += qualifyWindowsLibrary(Lib); - } - - void getDetectMismatchOption(llvm::StringRef Name, - llvm::StringRef Value, - llvm::SmallString<32> &Opt) const override { - Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; - } -}; - -static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) { if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) { - if (CGM.getCodeGenOpts().StackProbeSize != 4096) Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize)); @@ -2704,9701 +169,13 @@ static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV, } } -void WinX86_32TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - if (GV->isDeclaration()) - return; - addStackProbeTargetAttributes(D, GV, CGM); -} - -class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { -public: - WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, - X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { - SwiftInfo = - std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { - return 7; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); - - // 0-15 are the 16 integer registers. - // 16 is %rip. - AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); - return false; - } - - void getDependentLibraryOption(llvm::StringRef Lib, - llvm::SmallString<24> &Opt) const override { - Opt = "/DEFAULTLIB:"; - Opt += qualifyWindowsLibrary(Lib); - } - - void getDetectMismatchOption(llvm::StringRef Name, - llvm::StringRef Value, - llvm::SmallString<32> &Opt) const override { - Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; - } -}; - -void WinX86_64TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - if (GV->isDeclaration()) - return; - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - llvm::Function *Fn = cast<llvm::Function>(GV); - Fn->addFnAttr("stackrealign"); - } - - addX86InterruptAttrs(FD, GV, CGM); - } - - addStackProbeTargetAttributes(D, GV, CGM); -} -} - -void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, - Class &Hi) const { - // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: - // - // (a) If one of the classes is Memory, the whole argument is passed in - // memory. - // - // (b) If X87UP is not preceded by X87, the whole argument is passed in - // memory. - // - // (c) If the size of the aggregate exceeds two eightbytes and the first - // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole - // argument is passed in memory. NOTE: This is necessary to keep the - // ABI working for processors that don't support the __m256 type. - // - // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. - // - // Some of these are enforced by the merging logic. Others can arise - // only with unions; for example: - // union { _Complex double; unsigned; } - // - // Note that clauses (b) and (c) were added in 0.98. - // - if (Hi == Memory) - Lo = Memory; - if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) - Lo = Memory; - if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) - Lo = Memory; - if (Hi == SSEUp && Lo != SSE) - Hi = SSE; -} - -X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { - // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is - // classified recursively so that always two fields are - // considered. The resulting class is calculated according to - // the classes of the fields in the eightbyte: - // - // (a) If both classes are equal, this is the resulting class. - // - // (b) If one of the classes is NO_CLASS, the resulting class is - // the other class. - // - // (c) If one of the classes is MEMORY, the result is the MEMORY - // class. - // - // (d) If one of the classes is INTEGER, the result is the - // INTEGER. - // - // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, - // MEMORY is used as class. - // - // (f) Otherwise class SSE is used. - - // Accum should never be memory (we should have returned) or - // ComplexX87 (because this cannot be passed in a structure). - assert((Accum != Memory && Accum != ComplexX87) && - "Invalid accumulated classification during merge."); - if (Accum == Field || Field == NoClass) - return Accum; - if (Field == Memory) - return Memory; - if (Accum == NoClass) - return Field; - if (Accum == Integer || Field == Integer) - return Integer; - if (Field == X87 || Field == X87Up || Field == ComplexX87 || - Accum == X87 || Accum == X87Up) - return Memory; - return SSE; -} - -void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, - Class &Hi, bool isNamedArg, bool IsRegCall) const { - // FIXME: This code can be simplified by introducing a simple value class for - // Class pairs with appropriate constructor methods for the various - // situations. - - // FIXME: Some of the split computations are wrong; unaligned vectors - // shouldn't be passed in registers for example, so there is no chance they - // can straddle an eightbyte. Verify & simplify. - - Lo = Hi = NoClass; - - Class &Current = OffsetBase < 64 ? Lo : Hi; - Current = Memory; - - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - BuiltinType::Kind k = BT->getKind(); - - if (k == BuiltinType::Void) { - Current = NoClass; - } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { - Lo = Integer; - Hi = Integer; - } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { - Current = Integer; - } else if (k == BuiltinType::Float || k == BuiltinType::Double || - k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { - Current = SSE; - } else if (k == BuiltinType::LongDouble) { - const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::IEEEquad()) { - Lo = SSE; - Hi = SSEUp; - } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { - Lo = X87; - Hi = X87Up; - } else if (LDF == &llvm::APFloat::IEEEdouble()) { - Current = SSE; - } else - llvm_unreachable("unexpected long double representation!"); - } - // FIXME: _Decimal32 and _Decimal64 are SSE. - // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). - return; - } - - if (const EnumType *ET = Ty->getAs<EnumType>()) { - // Classify the underlying integer type. - classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); - return; - } - - if (Ty->hasPointerRepresentation()) { - Current = Integer; - return; - } - - if (Ty->isMemberPointerType()) { - if (Ty->isMemberFunctionPointerType()) { - if (Has64BitPointers) { - // If Has64BitPointers, this is an {i64, i64}, so classify both - // Lo and Hi now. - Lo = Hi = Integer; - } else { - // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that - // straddles an eightbyte boundary, Hi should be classified as well. - uint64_t EB_FuncPtr = (OffsetBase) / 64; - uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; - if (EB_FuncPtr != EB_ThisAdj) { - Lo = Hi = Integer; - } else { - Current = Integer; - } - } - } else { - Current = Integer; - } - return; - } - - if (const VectorType *VT = Ty->getAs<VectorType>()) { - uint64_t Size = getContext().getTypeSize(VT); - if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { - // gcc passes the following as integer: - // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> - // 2 bytes - <2 x char>, <1 x short> - // 1 byte - <1 x char> - Current = Integer; - - // If this type crosses an eightbyte boundary, it should be - // split. - uint64_t EB_Lo = (OffsetBase) / 64; - uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; - if (EB_Lo != EB_Hi) - Hi = Lo; - } else if (Size == 64) { - QualType ElementType = VT->getElementType(); - - // gcc passes <1 x double> in memory. :( - if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) - return; - - // gcc passes <1 x long long> as SSE but clang used to unconditionally - // pass them as integer. For platforms where clang is the de facto - // platform compiler, we must continue to use integer. - if (!classifyIntegerMMXAsSSE() && - (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || - ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || - ElementType->isSpecificBuiltinType(BuiltinType::Long) || - ElementType->isSpecificBuiltinType(BuiltinType::ULong))) - Current = Integer; - else - Current = SSE; - - // If this type crosses an eightbyte boundary, it should be - // split. - if (OffsetBase && OffsetBase != 64) - Hi = Lo; - } else if (Size == 128 || - (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { - QualType ElementType = VT->getElementType(); - - // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( - if (passInt128VectorsInMem() && Size != 128 && - (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || - ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) - return; - - // Arguments of 256-bits are split into four eightbyte chunks. The - // least significant one belongs to class SSE and all the others to class - // SSEUP. The original Lo and Hi design considers that types can't be - // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. - // This design isn't correct for 256-bits, but since there're no cases - // where the upper parts would need to be inspected, avoid adding - // complexity and just consider Hi to match the 64-256 part. - // - // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in - // registers if they are "named", i.e. not part of the "..." of a - // variadic function. - // - // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are - // split into eight eightbyte chunks, one SSE and seven SSEUP. - Lo = SSE; - Hi = SSEUp; - } - return; - } - - if (const ComplexType *CT = Ty->getAs<ComplexType>()) { - QualType ET = getContext().getCanonicalType(CT->getElementType()); - - uint64_t Size = getContext().getTypeSize(Ty); - if (ET->isIntegralOrEnumerationType()) { - if (Size <= 64) - Current = Integer; - else if (Size <= 128) - Lo = Hi = Integer; - } else if (ET->isFloat16Type() || ET == getContext().FloatTy || - ET->isBFloat16Type()) { - Current = SSE; - } else if (ET == getContext().DoubleTy) { - Lo = Hi = SSE; - } else if (ET == getContext().LongDoubleTy) { - const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::IEEEquad()) - Current = Memory; - else if (LDF == &llvm::APFloat::x87DoubleExtended()) - Current = ComplexX87; - else if (LDF == &llvm::APFloat::IEEEdouble()) - Lo = Hi = SSE; - else - llvm_unreachable("unexpected long double representation!"); - } - - // If this complex type crosses an eightbyte boundary then it - // should be split. - uint64_t EB_Real = (OffsetBase) / 64; - uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; - if (Hi == NoClass && EB_Real != EB_Imag) - Hi = Lo; - - return; - } - - if (const auto *EITy = Ty->getAs<BitIntType>()) { - if (EITy->getNumBits() <= 64) - Current = Integer; - else if (EITy->getNumBits() <= 128) - Lo = Hi = Integer; - // Larger values need to get passed in memory. - return; - } - - if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { - // Arrays are treated like structures. - - uint64_t Size = getContext().getTypeSize(Ty); - - // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than eight eightbytes, ..., it has class MEMORY. - // regcall ABI doesn't have limitation to an object. The only limitation - // is the free registers, which will be checked in computeInfo. - if (!IsRegCall && Size > 512) - return; - - // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned - // fields, it has class MEMORY. - // - // Only need to check alignment of array base. - if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) - return; - - // Otherwise implement simplified merge. We could be smarter about - // this, but it isn't worth it and would be harder to verify. - Current = NoClass; - uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); - uint64_t ArraySize = AT->getSize().getZExtValue(); - - // The only case a 256-bit wide vector could be used is when the array - // contains a single 256-bit element. Since Lo and Hi logic isn't extended - // to work for sizes wider than 128, early check and fallback to memory. - // - if (Size > 128 && - (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) - return; - - for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { - Class FieldLo, FieldHi; - classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); - Lo = merge(Lo, FieldLo); - Hi = merge(Hi, FieldHi); - if (Lo == Memory || Hi == Memory) - break; - } - - postMerge(Size, Lo, Hi); - assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); - return; - } - - if (const RecordType *RT = Ty->getAs<RecordType>()) { - uint64_t Size = getContext().getTypeSize(Ty); - - // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than eight eightbytes, ..., it has class MEMORY. - if (Size > 512) - return; - - // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial - // copy constructor or a non-trivial destructor, it is passed by invisible - // reference. - if (getRecordArgABI(RT, getCXXABI())) - return; - - const RecordDecl *RD = RT->getDecl(); - - // Assume variable sized types are passed in memory. - if (RD->hasFlexibleArrayMember()) - return; - - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); - - // Reset Lo class, this will be recomputed. - Current = NoClass; - - // If this is a C++ record, classify the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - for (const auto &I : CXXRD->bases()) { - assert(!I.isVirtual() && !I.getType()->isDependentType() && - "Unexpected base class!"); - const auto *Base = - cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); - - // Classify this field. - // - // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a - // single eightbyte, each is classified separately. Each eightbyte gets - // initialized to class NO_CLASS. - Class FieldLo, FieldHi; - uint64_t Offset = - OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); - classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); - Lo = merge(Lo, FieldLo); - Hi = merge(Hi, FieldHi); - if (Lo == Memory || Hi == Memory) { - postMerge(Size, Lo, Hi); - return; - } - } - } - - // Classify the fields one at a time, merging the results. - unsigned idx = 0; - bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver11 || - getContext().getTargetInfo().getTriple().isPS(); - bool IsUnion = RT->isUnionType() && !UseClang11Compat; - - for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); - i != e; ++i, ++idx) { - uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); - bool BitField = i->isBitField(); - - // Ignore padding bit-fields. - if (BitField && i->isUnnamedBitfield()) - continue; - - // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than - // eight eightbytes, or it contains unaligned fields, it has class MEMORY. - // - // The only case a 256-bit or a 512-bit wide vector could be used is when - // the struct contains a single 256-bit or 512-bit element. Early check - // and fallback to memory. - // - // FIXME: Extended the Lo and Hi logic properly to work for size wider - // than 128. - if (Size > 128 && - ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || - Size > getNativeVectorSizeForAVXABI(AVXLevel))) { - Lo = Memory; - postMerge(Size, Lo, Hi); - return; - } - // Note, skip this test for bit-fields, see below. - if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { - Lo = Memory; - postMerge(Size, Lo, Hi); - return; - } - - // Classify this field. - // - // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate - // exceeds a single eightbyte, each is classified - // separately. Each eightbyte gets initialized to class - // NO_CLASS. - Class FieldLo, FieldHi; - - // Bit-fields require special handling, they do not force the - // structure to be passed in memory even if unaligned, and - // therefore they can straddle an eightbyte. - if (BitField) { - assert(!i->isUnnamedBitfield()); - uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); - uint64_t Size = i->getBitWidthValue(getContext()); - - uint64_t EB_Lo = Offset / 64; - uint64_t EB_Hi = (Offset + Size - 1) / 64; - - if (EB_Lo) { - assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); - FieldLo = NoClass; - FieldHi = Integer; - } else { - FieldLo = Integer; - FieldHi = EB_Hi ? Integer : NoClass; - } - } else - classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); - Lo = merge(Lo, FieldLo); - Hi = merge(Hi, FieldHi); - if (Lo == Memory || Hi == Memory) - break; - } - - postMerge(Size, Lo, Hi); - } -} - -ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { - // If this is a scalar LLVM value then assume LLVM will pass it in the right - // place naturally. - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - if (Ty->isBitIntType()) - return getNaturalAlignIndirect(Ty); - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); - } - - return getNaturalAlignIndirect(Ty); -} - -bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { - if (const VectorType *VecTy = Ty->getAs<VectorType>()) { - uint64_t Size = getContext().getTypeSize(VecTy); - unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); - if (Size <= 64 || Size > LargestVector) - return true; - QualType EltTy = VecTy->getElementType(); - if (passInt128VectorsInMem() && - (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || - EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) - return true; - } - - return false; -} - -ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, - unsigned freeIntRegs) const { - // If this is a scalar LLVM value then assume LLVM will pass it in the right - // place naturally. - // - // This assumption is optimistic, as there could be free registers available - // when we need to pass this argument in memory, and LLVM could try to pass - // the argument in the free register. This does not seem to happen currently, - // but this code would be much safer if we could mark the argument with - // 'onstack'. See PR12193. - if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && - !Ty->isBitIntType()) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); - } - - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - // Compute the byval alignment. We specify the alignment of the byval in all - // cases so that the mid-level optimizer knows the alignment of the byval. - unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); - - // Attempt to avoid passing indirect results using byval when possible. This - // is important for good codegen. - // - // We do this by coercing the value into a scalar type which the backend can - // handle naturally (i.e., without using byval). - // - // For simplicity, we currently only do this when we have exhausted all of the - // free integer registers. Doing this when there are free integer registers - // would require more care, as we would have to ensure that the coerced value - // did not claim the unused register. That would require either reording the - // arguments to the function (so that any subsequent inreg values came first), - // or only doing this optimization when there were no following arguments that - // might be inreg. - // - // We currently expect it to be rare (particularly in well written code) for - // arguments to be passed on the stack when there are still free integer - // registers available (this would typically imply large structs being passed - // by value), so this seems like a fair tradeoff for now. - // - // We can revisit this if the backend grows support for 'onstack' parameter - // attributes. See PR12193. - if (freeIntRegs == 0) { - uint64_t Size = getContext().getTypeSize(Ty); - - // If this type fits in an eightbyte, coerce it into the matching integral - // type, which will end up on the stack (with alignment 8). - if (Align == 8 && Size <= 64) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), - Size)); - } - - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); -} - -/// The ABI specifies that a value should be passed in a full vector XMM/YMM -/// register. Pick an LLVM IR type that will be passed as a vector register. -llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { - // Wrapper structs/arrays that only contain vectors are passed just like - // vectors; strip them off if present. - if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) - Ty = QualType(InnerTy, 0); - - llvm::Type *IRType = CGT.ConvertType(Ty); - if (isa<llvm::VectorType>(IRType)) { - // Don't pass vXi128 vectors in their native type, the backend can't - // legalize them. - if (passInt128VectorsInMem() && - cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { - // Use a vXi64 vector. - uint64_t Size = getContext().getTypeSize(Ty); - return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), - Size / 64); - } - - return IRType; - } - - if (IRType->getTypeID() == llvm::Type::FP128TyID) - return IRType; - - // We couldn't find the preferred IR vector type for 'Ty'. - uint64_t Size = getContext().getTypeSize(Ty); - assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); - - - // Return a LLVM IR vector type based on the size of 'Ty'. - return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), - Size / 64); -} - -/// BitsContainNoUserData - Return true if the specified [start,end) bit range -/// is known to either be off the end of the specified type or being in -/// alignment padding. The user type specified is known to be at most 128 bits -/// in size, and have passed through X86_64ABIInfo::classify with a successful -/// classification that put one of the two halves in the INTEGER class. -/// -/// It is conservatively correct to return false. -static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, - unsigned EndBit, ASTContext &Context) { - // If the bytes being queried are off the end of the type, there is no user - // data hiding here. This handles analysis of builtins, vectors and other - // types that don't contain interesting padding. - unsigned TySize = (unsigned)Context.getTypeSize(Ty); - if (TySize <= StartBit) - return true; - - if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { - unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); - unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); - - // Check each element to see if the element overlaps with the queried range. - for (unsigned i = 0; i != NumElts; ++i) { - // If the element is after the span we care about, then we're done.. - unsigned EltOffset = i*EltSize; - if (EltOffset >= EndBit) break; - - unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; - if (!BitsContainNoUserData(AT->getElementType(), EltStart, - EndBit-EltOffset, Context)) - return false; - } - // If it overlaps no elements, then it is safe to process as padding. - return true; - } - - if (const RecordType *RT = Ty->getAs<RecordType>()) { - const RecordDecl *RD = RT->getDecl(); - const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - for (const auto &I : CXXRD->bases()) { - assert(!I.isVirtual() && !I.getType()->isDependentType() && - "Unexpected base class!"); - const auto *Base = - cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); - - // If the base is after the span we care about, ignore it. - unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); - if (BaseOffset >= EndBit) continue; - - unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; - if (!BitsContainNoUserData(I.getType(), BaseStart, - EndBit-BaseOffset, Context)) - return false; - } - } - - // Verify that no field has data that overlaps the region of interest. Yes - // this could be sped up a lot by being smarter about queried fields, - // however we're only looking at structs up to 16 bytes, so we don't care - // much. - unsigned idx = 0; - for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); - i != e; ++i, ++idx) { - unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); - - // If we found a field after the region we care about, then we're done. - if (FieldOffset >= EndBit) break; - - unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; - if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, - Context)) - return false; - } - - // If nothing in this record overlapped the area of interest, then we're - // clean. - return true; - } - - return false; -} - -/// getFPTypeAtOffset - Return a floating point type at the specified offset. -static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, - const llvm::DataLayout &TD) { - if (IROffset == 0 && IRType->isFloatingPointTy()) - return IRType; - - // If this is a struct, recurse into the field at the specified offset. - if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { - if (!STy->getNumContainedTypes()) - return nullptr; - - const llvm::StructLayout *SL = TD.getStructLayout(STy); - unsigned Elt = SL->getElementContainingOffset(IROffset); - IROffset -= SL->getElementOffset(Elt); - return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); - } - - // If this is an array, recurse into the field at the specified offset. - if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { - llvm::Type *EltTy = ATy->getElementType(); - unsigned EltSize = TD.getTypeAllocSize(EltTy); - IROffset -= IROffset / EltSize * EltSize; - return getFPTypeAtOffset(EltTy, IROffset, TD); - } - - return nullptr; -} - -/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the -/// low 8 bytes of an XMM register, corresponding to the SSE class. -llvm::Type *X86_64ABIInfo:: -GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, - QualType SourceTy, unsigned SourceOffset) const { - const llvm::DataLayout &TD = getDataLayout(); - unsigned SourceSize = - (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; - llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); - if (!T0 || T0->isDoubleTy()) - return llvm::Type::getDoubleTy(getVMContext()); - - // Get the adjacent FP type. - llvm::Type *T1 = nullptr; - unsigned T0Size = TD.getTypeAllocSize(T0); - if (SourceSize > T0Size) - T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); - if (T1 == nullptr) { - // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due - // to its alignment. - if (T0->is16bitFPTy() && SourceSize > 4) - T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); - // If we can't get a second FP type, return a simple half or float. - // avx512fp16-abi.c:pr51813_2 shows it works to return float for - // {float, i8} too. - if (T1 == nullptr) - return T0; - } - - if (T0->isFloatTy() && T1->isFloatTy()) - return llvm::FixedVectorType::get(T0, 2); - - if (T0->is16bitFPTy() && T1->is16bitFPTy()) { - llvm::Type *T2 = nullptr; - if (SourceSize > 4) - T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); - if (T2 == nullptr) - return llvm::FixedVectorType::get(T0, 2); - return llvm::FixedVectorType::get(T0, 4); - } - - if (T0->is16bitFPTy() || T1->is16bitFPTy()) - return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); - - return llvm::Type::getDoubleTy(getVMContext()); -} - - -/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in -/// an 8-byte GPR. This means that we either have a scalar or we are talking -/// about the high or low part of an up-to-16-byte struct. This routine picks -/// the best LLVM IR type to represent this, which may be i64 or may be anything -/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, -/// etc). -/// -/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for -/// the source type. IROffset is an offset in bytes into the LLVM IR type that -/// the 8-byte value references. PrefType may be null. -/// -/// SourceTy is the source-level type for the entire argument. SourceOffset is -/// an offset into this that we're processing (which is always either 0 or 8). -/// -llvm::Type *X86_64ABIInfo:: -GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, - QualType SourceTy, unsigned SourceOffset) const { - // If we're dealing with an un-offset LLVM IR type, then it means that we're - // returning an 8-byte unit starting with it. See if we can safely use it. - if (IROffset == 0) { - // Pointers and int64's always fill the 8-byte unit. - if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || - IRType->isIntegerTy(64)) - return IRType; - - // If we have a 1/2/4-byte integer, we can use it only if the rest of the - // goodness in the source type is just tail padding. This is allowed to - // kick in for struct {double,int} on the int, but not on - // struct{double,int,int} because we wouldn't return the second int. We - // have to do this analysis on the source type because we can't depend on - // unions being lowered a specific way etc. - if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || - IRType->isIntegerTy(32) || - (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { - unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : - cast<llvm::IntegerType>(IRType)->getBitWidth(); - - if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, - SourceOffset*8+64, getContext())) - return IRType; - } - } - - if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { - // If this is a struct, recurse into the field at the specified offset. - const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); - if (IROffset < SL->getSizeInBytes()) { - unsigned FieldIdx = SL->getElementContainingOffset(IROffset); - IROffset -= SL->getElementOffset(FieldIdx); - - return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, - SourceTy, SourceOffset); - } - } - - if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { - llvm::Type *EltTy = ATy->getElementType(); - unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); - unsigned EltOffset = IROffset/EltSize*EltSize; - return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, - SourceOffset); - } - - // Okay, we don't have any better idea of what to pass, so we pass this in an - // integer register that isn't too big to fit the rest of the struct. - unsigned TySizeInBytes = - (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); - - assert(TySizeInBytes != SourceOffset && "Empty field?"); - - // It is always safe to classify this as an integer type up to i64 that - // isn't larger than the structure. - return llvm::IntegerType::get(getVMContext(), - std::min(TySizeInBytes-SourceOffset, 8U)*8); -} - - -/// GetX86_64ByValArgumentPair - Given a high and low type that can ideally -/// be used as elements of a two register pair to pass or return, return a -/// first class aggregate to represent them. For example, if the low part of -/// a by-value argument should be passed as i32* and the high part as float, -/// return {i32*, float}. -static llvm::Type * -GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, - const llvm::DataLayout &TD) { - // In order to correctly satisfy the ABI, we need to the high part to start - // at offset 8. If the high and low parts we inferred are both 4-byte types - // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have - // the second element at offset 8. Check for this: - unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); - llvm::Align HiAlign = TD.getABITypeAlign(Hi); - unsigned HiStart = llvm::alignTo(LoSize, HiAlign); - assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); - - // To handle this, we have to increase the size of the low part so that the - // second element will start at an 8 byte offset. We can't increase the size - // of the second element because it might make us access off the end of the - // struct. - if (HiStart != 8) { - // There are usually two sorts of types the ABI generation code can produce - // for the low part of a pair that aren't 8 bytes in size: half, float or - // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and - // NaCl). - // Promote these to a larger type. - if (Lo->isHalfTy() || Lo->isFloatTy()) - Lo = llvm::Type::getDoubleTy(Lo->getContext()); - else { - assert((Lo->isIntegerTy() || Lo->isPointerTy()) - && "Invalid/unknown lo type"); - Lo = llvm::Type::getInt64Ty(Lo->getContext()); - } - } - - llvm::StructType *Result = llvm::StructType::get(Lo, Hi); - - // Verify that the second element is at an 8-byte offset. - assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && - "Invalid x86-64 argument pair!"); - return Result; -} - -ABIArgInfo X86_64ABIInfo:: -classifyReturnType(QualType RetTy) const { - // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the - // classification algorithm. - X86_64ABIInfo::Class Lo, Hi; - classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); - - // Check some invariants. - assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); - assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); - - llvm::Type *ResType = nullptr; - switch (Lo) { - case NoClass: - if (Hi == NoClass) - return ABIArgInfo::getIgnore(); - // If the low part is just padding, it takes no register, leave ResType - // null. - assert((Hi == SSE || Hi == Integer || Hi == X87Up) && - "Unknown missing lo part"); - break; - - case SSEUp: - case X87Up: - llvm_unreachable("Invalid classification for lo word."); - - // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via - // hidden argument. - case Memory: - return getIndirectReturnResult(RetTy); - - // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next - // available register of the sequence %rax, %rdx is used. - case Integer: - ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); - - // If we have a sign or zero extended integer, make sure to return Extend - // so that the parameter gets the right LLVM IR attributes. - if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - if (RetTy->isIntegralOrEnumerationType() && - isPromotableIntegerTypeForABI(RetTy)) - return ABIArgInfo::getExtend(RetTy); - } - break; - - // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next - // available SSE register of the sequence %xmm0, %xmm1 is used. - case SSE: - ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); - break; - - // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is - // returned on the X87 stack in %st0 as 80-bit x87 number. - case X87: - ResType = llvm::Type::getX86_FP80Ty(getVMContext()); - break; - - // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real - // part of the value is returned in %st0 and the imaginary part in - // %st1. - case ComplexX87: - assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); - ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), - llvm::Type::getX86_FP80Ty(getVMContext())); - break; - } - - llvm::Type *HighPart = nullptr; - switch (Hi) { - // Memory was handled previously and X87 should - // never occur as a hi class. - case Memory: - case X87: - llvm_unreachable("Invalid classification for hi word."); - - case ComplexX87: // Previously handled. - case NoClass: - break; - - case Integer: - HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); - if (Lo == NoClass) // Return HighPart at offset 8 in memory. - return ABIArgInfo::getDirect(HighPart, 8); - break; - case SSE: - HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); - if (Lo == NoClass) // Return HighPart at offset 8 in memory. - return ABIArgInfo::getDirect(HighPart, 8); - break; - - // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte - // is passed in the next available eightbyte chunk if the last used - // vector register. - // - // SSEUP should always be preceded by SSE, just widen. - case SSEUp: - assert(Lo == SSE && "Unexpected SSEUp classification."); - ResType = GetByteVectorType(RetTy); - break; - - // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is - // returned together with the previous X87 value in %st0. - case X87Up: - // If X87Up is preceded by X87, we don't need to do - // anything. However, in some cases with unions it may not be - // preceded by X87. In such situations we follow gcc and pass the - // extra bits in an SSE reg. - if (Lo != X87) { - HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); - if (Lo == NoClass) // Return HighPart at offset 8 in memory. - return ABIArgInfo::getDirect(HighPart, 8); - } - break; - } - - // If a high part was specified, merge it together with the low part. It is - // known to pass in the high eightbyte of the result. We do this by forming a - // first class struct aggregate with the high and low part: {low, high} - if (HighPart) - ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); - - return ABIArgInfo::getDirect(ResType); -} - -ABIArgInfo -X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, - unsigned &neededInt, unsigned &neededSSE, - bool isNamedArg, bool IsRegCall) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - X86_64ABIInfo::Class Lo, Hi; - classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); - - // Check some invariants. - // FIXME: Enforce these by construction. - assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); - assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); - - neededInt = 0; - neededSSE = 0; - llvm::Type *ResType = nullptr; - switch (Lo) { - case NoClass: - if (Hi == NoClass) - return ABIArgInfo::getIgnore(); - // If the low part is just padding, it takes no register, leave ResType - // null. - assert((Hi == SSE || Hi == Integer || Hi == X87Up) && - "Unknown missing lo part"); - break; - - // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument - // on the stack. - case Memory: - - // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or - // COMPLEX_X87, it is passed in memory. - case X87: - case ComplexX87: - if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) - ++neededInt; - return getIndirectResult(Ty, freeIntRegs); - - case SSEUp: - case X87Up: - llvm_unreachable("Invalid classification for lo word."); - - // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next - // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 - // and %r9 is used. - case Integer: - ++neededInt; - - // Pick an 8-byte type based on the preferred type. - ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); - - // If we have a sign or zero extended integer, make sure to return Extend - // so that the parameter gets the right LLVM IR attributes. - if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - if (Ty->isIntegralOrEnumerationType() && - isPromotableIntegerTypeForABI(Ty)) - return ABIArgInfo::getExtend(Ty); - } - - break; - - // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next - // available SSE register is used, the registers are taken in the - // order from %xmm0 to %xmm7. - case SSE: { - llvm::Type *IRType = CGT.ConvertType(Ty); - ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); - ++neededSSE; - break; - } - } - - llvm::Type *HighPart = nullptr; - switch (Hi) { - // Memory was handled previously, ComplexX87 and X87 should - // never occur as hi classes, and X87Up must be preceded by X87, - // which is passed in memory. - case Memory: - case X87: - case ComplexX87: - llvm_unreachable("Invalid classification for hi word."); - - case NoClass: break; - - case Integer: - ++neededInt; - // Pick an 8-byte type based on the preferred type. - HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); - - if (Lo == NoClass) // Pass HighPart at offset 8 in memory. - return ABIArgInfo::getDirect(HighPart, 8); - break; - - // X87Up generally doesn't occur here (long double is passed in - // memory), except in situations involving unions. - case X87Up: - case SSE: - HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); - - if (Lo == NoClass) // Pass HighPart at offset 8 in memory. - return ABIArgInfo::getDirect(HighPart, 8); - - ++neededSSE; - break; - - // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the - // eightbyte is passed in the upper half of the last used SSE - // register. This only happens when 128-bit vectors are passed. - case SSEUp: - assert(Lo == SSE && "Unexpected SSEUp classification"); - ResType = GetByteVectorType(Ty); - break; - } - - // If a high part was specified, merge it together with the low part. It is - // known to pass in the high eightbyte of the result. We do this by forming a - // first class struct aggregate with the high and low part: {low, high} - if (HighPart) - ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); - - return ABIArgInfo::getDirect(ResType); -} - -ABIArgInfo -X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE, - unsigned &MaxVectorWidth) const { - auto RT = Ty->getAs<RecordType>(); - assert(RT && "classifyRegCallStructType only valid with struct types"); - - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectReturnResult(Ty); - - // Sum up bases - if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { - if (CXXRD->isDynamicClass()) { - NeededInt = NeededSSE = 0; - return getIndirectReturnResult(Ty); - } - - for (const auto &I : CXXRD->bases()) - if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, - MaxVectorWidth) - .isIndirect()) { - NeededInt = NeededSSE = 0; - return getIndirectReturnResult(Ty); - } - } - - // Sum up members - for (const auto *FD : RT->getDecl()->fields()) { - QualType MTy = FD->getType(); - if (MTy->isRecordType() && !MTy->isUnionType()) { - if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, - MaxVectorWidth) - .isIndirect()) { - NeededInt = NeededSSE = 0; - return getIndirectReturnResult(Ty); - } - } else { - unsigned LocalNeededInt, LocalNeededSSE; - if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, - true, true) - .isIndirect()) { - NeededInt = NeededSSE = 0; - return getIndirectReturnResult(Ty); - } - if (const auto *AT = getContext().getAsConstantArrayType(MTy)) - MTy = AT->getElementType(); - if (const auto *VT = MTy->getAs<VectorType>()) - if (getContext().getTypeSize(VT) > MaxVectorWidth) - MaxVectorWidth = getContext().getTypeSize(VT); - NeededInt += LocalNeededInt; - NeededSSE += LocalNeededSSE; - } - } - - return ABIArgInfo::getDirect(); -} - -ABIArgInfo -X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE, - unsigned &MaxVectorWidth) const { - - NeededInt = 0; - NeededSSE = 0; - MaxVectorWidth = 0; - - return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, - MaxVectorWidth); -} - -void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - - const unsigned CallingConv = FI.getCallingConvention(); - // It is possible to force Win64 calling convention on any x86_64 target by - // using __attribute__((ms_abi)). In such case to correctly emit Win64 - // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. - if (CallingConv == llvm::CallingConv::Win64) { - WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); - Win64ABIInfo.computeInfo(FI); - return; - } - - bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; - - // Keep track of the number of assigned registers. - unsigned FreeIntRegs = IsRegCall ? 11 : 6; - unsigned FreeSSERegs = IsRegCall ? 16 : 8; - unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; - - if (!::classifyReturnType(getCXXABI(), FI, *this)) { - if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && - !FI.getReturnType()->getTypePtr()->isUnionType()) { - FI.getReturnInfo() = classifyRegCallStructType( - FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); - if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { - FreeIntRegs -= NeededInt; - FreeSSERegs -= NeededSSE; - } else { - FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); - } - } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && - getContext().getCanonicalType(FI.getReturnType() - ->getAs<ComplexType>() - ->getElementType()) == - getContext().LongDoubleTy) - // Complex Long Double Type is passed in Memory when Regcall - // calling convention is used. - FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); - else - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - } - - // If the return value is indirect, then the hidden argument is consuming one - // integer register. - if (FI.getReturnInfo().isIndirect()) - --FreeIntRegs; - else if (NeededSSE && MaxVectorWidth > 0) - FI.setMaxVectorWidth(MaxVectorWidth); - - // The chain argument effectively gives us another free register. - if (FI.isChainCall()) - ++FreeIntRegs; - - unsigned NumRequiredArgs = FI.getNumRequiredArgs(); - // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers - // get assigned (in left-to-right order) for passing as follows... - unsigned ArgNo = 0; - for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); - it != ie; ++it, ++ArgNo) { - bool IsNamedArg = ArgNo < NumRequiredArgs; - - if (IsRegCall && it->type->isStructureOrClassType()) - it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, - MaxVectorWidth); - else - it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, - NeededSSE, IsNamedArg); - - // AMD64-ABI 3.2.3p3: If there are no registers available for any - // eightbyte of an argument, the whole argument is passed on the - // stack. If registers have already been assigned for some - // eightbytes of such an argument, the assignments get reverted. - if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { - FreeIntRegs -= NeededInt; - FreeSSERegs -= NeededSSE; - if (MaxVectorWidth > FI.getMaxVectorWidth()) - FI.setMaxVectorWidth(MaxVectorWidth); - } else { - it->info = getIndirectResult(it->type, FreeIntRegs); - } - } -} - -static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, - Address VAListAddr, QualType Ty) { - Address overflow_arg_area_p = - CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); - llvm::Value *overflow_arg_area = - CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); - - // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 - // byte boundary if alignment needed by type exceeds 8 byte boundary. - // It isn't stated explicitly in the standard, but in practice we use - // alignment greater than 16 where necessary. - CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); - if (Align > CharUnits::fromQuantity(8)) { - overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, - Align); - } - - // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. - llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); - llvm::Value *Res = - CGF.Builder.CreateBitCast(overflow_arg_area, - llvm::PointerType::getUnqual(LTy)); - - // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: - // l->overflow_arg_area + sizeof(type). - // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to - // an 8 byte boundary. - - uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; - llvm::Value *Offset = - llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); - overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, - Offset, "overflow_arg_area.next"); - CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); - - // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. - return Address(Res, LTy, Align); -} - -Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - // Assume that va_list type is correct; should be pointer to LLVM type: - // struct { - // i32 gp_offset; - // i32 fp_offset; - // i8* overflow_arg_area; - // i8* reg_save_area; - // }; - unsigned neededInt, neededSSE; - - Ty = getContext().getCanonicalType(Ty); - ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, - /*isNamedArg*/false); - - // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed - // in the registers. If not go to step 7. - if (!neededInt && !neededSSE) - return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); - - // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of - // general purpose registers needed to pass type and num_fp to hold - // the number of floating point registers needed. - - // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into - // registers. In the case: l->gp_offset > 48 - num_gp * 8 or - // l->fp_offset > 304 - num_fp * 16 go to step 7. - // - // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of - // register save space). - - llvm::Value *InRegs = nullptr; - Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); - llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; - if (neededInt) { - gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); - gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); - InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); - InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); - } - - if (neededSSE) { - fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); - fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); - llvm::Value *FitsInFP = - llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); - FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); - InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; - } - - llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); - llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); - llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); - CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); - - // Emit code to load the value if it was passed in registers. - - CGF.EmitBlock(InRegBlock); - - // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with - // an offset of l->gp_offset and/or l->fp_offset. This may require - // copying to a temporary location in case the parameter is passed - // in different register classes or requires an alignment greater - // than 8 for general purpose registers and 16 for XMM registers. - // - // FIXME: This really results in shameful code when we end up needing to - // collect arguments from different places; often what should result in a - // simple assembling of a structure from scattered addresses has many more - // loads than necessary. Can we clean this up? - llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); - llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); - - Address RegAddr = Address::invalid(); - if (neededInt && neededSSE) { - // FIXME: Cleanup. - assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); - llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); - Address Tmp = CGF.CreateMemTemp(Ty); - Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); - assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); - llvm::Type *TyLo = ST->getElementType(0); - llvm::Type *TyHi = ST->getElementType(1); - assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && - "Unexpected ABI info for mixed regs"); - llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); - llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); - llvm::Value *GPAddr = - CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); - llvm::Value *FPAddr = - CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); - llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; - llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; - - // Copy the first element. - // FIXME: Our choice of alignment here and below is probably pessimistic. - llvm::Value *V = CGF.Builder.CreateAlignedLoad( - TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), - CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); - CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); - - // Copy the second element. - V = CGF.Builder.CreateAlignedLoad( - TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), - CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); - CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); - - RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); - } else if (neededInt) { - RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), - CGF.Int8Ty, CharUnits::fromQuantity(8)); - RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); - - // Copy to a temporary if necessary to ensure the appropriate alignment. - auto TInfo = getContext().getTypeInfoInChars(Ty); - uint64_t TySize = TInfo.Width.getQuantity(); - CharUnits TyAlign = TInfo.Align; - - // Copy into a temporary if the type is more aligned than the - // register save area. - if (TyAlign.getQuantity() > 8) { - Address Tmp = CGF.CreateMemTemp(Ty); - CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); - RegAddr = Tmp; - } - - } else if (neededSSE == 1) { - RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), - CGF.Int8Ty, CharUnits::fromQuantity(16)); - RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); - } else { - assert(neededSSE == 2 && "Invalid number of needed registers!"); - // SSE registers are spaced 16 bytes apart in the register save - // area, we need to collect the two eightbytes together. - // The ABI isn't explicit about this, but it seems reasonable - // to assume that the slots are 16-byte aligned, since the stack is - // naturally 16-byte aligned and the prologue is expected to store - // all the SSE registers to the RSA. - Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, - fp_offset), - CGF.Int8Ty, CharUnits::fromQuantity(16)); - Address RegAddrHi = - CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, - CharUnits::fromQuantity(16)); - llvm::Type *ST = AI.canHaveCoerceToType() - ? AI.getCoerceToType() - : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); - llvm::Value *V; - Address Tmp = CGF.CreateMemTemp(Ty); - Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); - V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( - RegAddrLo, ST->getStructElementType(0))); - CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); - V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( - RegAddrHi, ST->getStructElementType(1))); - CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); - - RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); - } - - // AMD64-ABI 3.5.7p5: Step 5. Set: - // l->gp_offset = l->gp_offset + num_gp * 8 - // l->fp_offset = l->fp_offset + num_fp * 16. - if (neededInt) { - llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); - CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), - gp_offset_p); - } - if (neededSSE) { - llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); - CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), - fp_offset_p); - } - CGF.EmitBranch(ContBlock); - - // Emit code to load the value if it was passed in memory. - - CGF.EmitBlock(InMemBlock); - Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); - - // Return the appropriate result. - - CGF.EmitBlock(ContBlock); - Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, - "vaarg.addr"); - return ResAddr; -} - -Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is - // not 1, 2, 4, or 8 bytes, must be passed by reference." - uint64_t Width = getContext().getTypeSize(Ty); - bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, - CGF.getContext().getTypeInfoInChars(Ty), - CharUnits::fromQuantity(8), - /*allowHigherAlign*/ false); -} - -ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( - QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { - const Type *Base = nullptr; - uint64_t NumElts = 0; - - if (!Ty->isBuiltinType() && !Ty->isVectorType() && - isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { - FreeSSERegs -= NumElts; - return getDirectX86Hva(); - } - return current; -} - -ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType, bool IsVectorCall, - bool IsRegCall) const { - - if (Ty->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - TypeInfo Info = getContext().getTypeInfo(Ty); - uint64_t Width = Info.Width; - CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); - - const RecordType *RT = Ty->getAs<RecordType>(); - if (RT) { - if (!IsReturnType) { - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - } - - if (RT->getDecl()->hasFlexibleArrayMember()) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - - } - - const Type *Base = nullptr; - uint64_t NumElts = 0; - // vectorcall adds the concept of a homogenous vector aggregate, similar to - // other targets. - if ((IsVectorCall || IsRegCall) && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (IsRegCall) { - if (FreeSSERegs >= NumElts) { - FreeSSERegs -= NumElts; - if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); - } - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - } else if (IsVectorCall) { - if (FreeSSERegs >= NumElts && - (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { - FreeSSERegs -= NumElts; - return ABIArgInfo::getDirect(); - } else if (IsReturnType) { - return ABIArgInfo::getExpand(); - } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { - // HVAs are delayed and reclassified in the 2nd step. - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - } - } - } - - if (Ty->isMemberPointerType()) { - // If the member pointer is represented by an LLVM int or ptr, pass it - // directly. - llvm::Type *LLTy = CGT.ConvertType(Ty); - if (LLTy->isPointerTy() || LLTy->isIntegerTy()) - return ABIArgInfo::getDirect(); - } - - if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { - // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is - // not 1, 2, 4, or 8 bytes, must be passed by reference." - if (Width > 64 || !llvm::isPowerOf2_64(Width)) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - - // Otherwise, coerce it to a small integer. - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); - } - - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - switch (BT->getKind()) { - case BuiltinType::Bool: - // Bool type is always extended to the ABI, other builtin types are not - // extended. - return ABIArgInfo::getExtend(Ty); - - case BuiltinType::LongDouble: - // Mingw64 GCC uses the old 80 bit extended precision floating point - // unit. It passes them indirectly through memory. - if (IsMingw64) { - const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::x87DoubleExtended()) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - } - break; - - case BuiltinType::Int128: - case BuiltinType::UInt128: - // If it's a parameter type, the normal ABI rule is that arguments larger - // than 8 bytes are passed indirectly. GCC follows it. We follow it too, - // even though it isn't particularly efficient. - if (!IsReturnType) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - - // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. - // Clang matches them for compatibility. - return ABIArgInfo::getDirect(llvm::FixedVectorType::get( - llvm::Type::getInt64Ty(getVMContext()), 2)); - - default: - break; - } - } - - if (Ty->isBitIntType()) { - // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is - // not 1, 2, 4, or 8 bytes, must be passed by reference." - // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, - // or 8 bytes anyway as long is it fits in them, so we don't have to check - // the power of 2. - if (Width <= 64) - return ABIArgInfo::getDirect(); - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - } - - return ABIArgInfo::getDirect(); -} - -void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - const unsigned CC = FI.getCallingConvention(); - bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; - bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; - - // If __attribute__((sysv_abi)) is in use, use the SysV argument - // classification rules. - if (CC == llvm::CallingConv::X86_64_SysV) { - X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); - SysVABIInfo.computeInfo(FI); - return; - } - - unsigned FreeSSERegs = 0; - if (IsVectorCall) { - // We can use up to 4 SSE return registers with vectorcall. - FreeSSERegs = 4; - } else if (IsRegCall) { - // RegCall gives us 16 SSE registers. - FreeSSERegs = 16; - } - - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, - IsVectorCall, IsRegCall); - - if (IsVectorCall) { - // We can use up to 6 SSE register parameters with vectorcall. - FreeSSERegs = 6; - } else if (IsRegCall) { - // RegCall gives us 16 SSE registers, we can reuse the return registers. - FreeSSERegs = 16; - } - - unsigned ArgNum = 0; - unsigned ZeroSSERegs = 0; - for (auto &I : FI.arguments()) { - // Vectorcall in x64 only permits the first 6 arguments to be passed as - // XMM/YMM registers. After the sixth argument, pretend no vector - // registers are left. - unsigned *MaybeFreeSSERegs = - (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; - I.info = - classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); - ++ArgNum; - } - - if (IsVectorCall) { - // For vectorcall, assign aggregate HVAs to any free vector registers in a - // second pass. - for (auto &I : FI.arguments()) - I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); - } -} - -Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is - // not 1, 2, 4, or 8 bytes, must be passed by reference." - uint64_t Width = getContext().getTypeSize(Ty); - bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, - CGF.getContext().getTypeInfoInChars(Ty), - CharUnits::fromQuantity(8), - /*allowHigherAlign*/ false); -} - -static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address, bool Is64Bit, - bool IsAIX) { - // This is calculated from the LLVM and GCC tables and verified - // against gcc output. AFAIK all PPC ABIs use the same encoding. - - CodeGen::CGBuilderTy &Builder = CGF.Builder; - - llvm::IntegerType *i8 = CGF.Int8Ty; - llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); - llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); - llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); - - // 0-31: r0-31, the 4-byte or 8-byte general-purpose registers - AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31); - - // 32-63: fp0-31, the 8-byte floating-point registers - AssignToArrayRange(Builder, Address, Eight8, 32, 63); - - // 64-67 are various 4-byte or 8-byte special-purpose registers: - // 64: mq - // 65: lr - // 66: ctr - // 67: ap - AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67); - - // 68-76 are various 4-byte special-purpose registers: - // 68-75 cr0-7 - // 76: xer - AssignToArrayRange(Builder, Address, Four8, 68, 76); - - // 77-108: v0-31, the 16-byte vector registers - AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); - - // 109: vrsave - // 110: vscr - AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110); - - // AIX does not utilize the rest of the registers. - if (IsAIX) - return false; - - // 111: spe_acc - // 112: spefscr - // 113: sfp - AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113); - - if (!Is64Bit) - return false; - - // TODO: Need to verify if these registers are used on 64 bit AIX with Power8 - // or above CPU. - // 64-bit only registers: - // 114: tfhar - // 115: tfiar - // 116: texasr - AssignToArrayRange(Builder, Address, Eight8, 114, 116); - - return false; -} - -// AIX -namespace { -/// AIXABIInfo - The AIX XCOFF ABI information. -class AIXABIInfo : public ABIInfo { - const bool Is64Bit; - const unsigned PtrByteSize; - CharUnits getParamTypeAlignment(QualType Ty) const; - -public: - AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) - : ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {} - - bool isPromotableTypeForABI(QualType Ty) const; - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty) const; - - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - } - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -class AIXTargetCodeGenInfo : public TargetCodeGenInfo { - const bool Is64Bit; - -public: - AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) - : TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)), - Is64Bit(Is64Bit) {} - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 1; // r1 is the dedicated stack pointer - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; -}; -} // namespace - -// Return true if the ABI requires Ty to be passed sign- or zero- -// extended to 32/64 bits. -bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // Promotable integer types are required to be promoted by the ABI. - if (getContext().isPromotableIntegerType(Ty)) - return true; - - if (!Is64Bit) - return false; - - // For 64 bit mode, in addition to the usual promotable integer types, we also - // need to extend all 32-bit types, since the ABI requires promotion to 64 - // bits. - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) - switch (BT->getKind()) { - case BuiltinType::Int: - case BuiltinType::UInt: - return true; - default: - break; - } - - return false; -} - -ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isAnyComplexType()) - return ABIArgInfo::getDirect(); - - if (RetTy->isVectorType()) - return ABIArgInfo::getDirect(); - - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (isAggregateTypeForABI(RetTy)) - return getNaturalAlignIndirect(RetTy); - - return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - if (Ty->isAnyComplexType()) - return ABIArgInfo::getDirect(); - - if (Ty->isVectorType()) - return ABIArgInfo::getDirect(); - - if (isAggregateTypeForABI(Ty)) { - // Records with non-trivial destructors/copy-constructors should not be - // passed by value. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - CharUnits CCAlign = getParamTypeAlignment(Ty); - CharUnits TyAlign = getContext().getTypeAlignInChars(Ty); - - return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true, - /*Realign*/ TyAlign > CCAlign); - } - - return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); -} - -CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const { - // Complex types are passed just like their elements. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - if (Ty->isVectorType()) - return CharUnits::fromQuantity(16); - - // If the structure contains a vector type, the alignment is 16. - if (isRecordWithSIMDVectorType(getContext(), Ty)) - return CharUnits::fromQuantity(16); - - return CharUnits::fromQuantity(PtrByteSize); -} - -Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - - auto TypeInfo = getContext().getTypeInfoInChars(Ty); - TypeInfo.Align = getParamTypeAlignment(Ty); - - CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize); - - // If we have a complex type and the base type is smaller than the register - // size, the ABI calls for the real and imaginary parts to be right-adjusted - // in separate words in 32bit mode or doublewords in 64bit mode. However, - // Clang expects us to produce a pointer to a structure with the two parts - // packed tightly. So generate loads of the real and imaginary parts relative - // to the va_list pointer, and store them to a temporary structure. We do the - // same as the PPC64ABI here. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { - CharUnits EltSize = TypeInfo.Width / 2; - if (EltSize < SlotSize) - return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); - } - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, - SlotSize, /*AllowHigher*/ true); -} - -bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable( - CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { - return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true); -} - -// PowerPC-32 -namespace { -/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. -class PPC32_SVR4_ABIInfo : public DefaultABIInfo { - bool IsSoftFloatABI; - bool IsRetSmallStructInRegABI; - - CharUnits getParamTypeAlignment(QualType Ty) const; - -public: - PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, - bool RetSmallStructInRegABI) - : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), - IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const; - - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - } - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { -public: - PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, - bool RetSmallStructInRegABI) - : TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>( - CGT, SoftFloatABI, RetSmallStructInRegABI)) {} - - static bool isStructReturnInRegABI(const llvm::Triple &Triple, - const CodeGenOptions &Opts); - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - // This is recovered from gcc output. - return 1; // r1 is the dedicated stack pointer - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; -}; -} - -CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { - // Complex types are passed just like their elements. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - if (Ty->isVectorType()) - return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 - : 4); - - // For single-element float/vector structs, we consider the whole type - // to have the same alignment requirements as its single element. - const Type *AlignTy = nullptr; - if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { - const BuiltinType *BT = EltType->getAs<BuiltinType>(); - if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || - (BT && BT->isFloatingPoint())) - AlignTy = EltType; - } - - if (AlignTy) - return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); - return CharUnits::fromQuantity(4); -} - -ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { - uint64_t Size; - - // -msvr4-struct-return puts small aggregates in GPR3 and GPR4. - if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI && - (Size = getContext().getTypeSize(RetTy)) <= 64) { - // System V ABI (1995), page 3-22, specified: - // > A structure or union whose size is less than or equal to 8 bytes - // > shall be returned in r3 and r4, as if it were first stored in the - // > 8-byte aligned memory area and then the low addressed word were - // > loaded into r3 and the high-addressed word into r4. Bits beyond - // > the last member of the structure or union are not defined. - // - // GCC for big-endian PPC32 inserts the pad before the first member, - // not "beyond the last member" of the struct. To stay compatible - // with GCC, we coerce the struct to an integer of the same size. - // LLVM will extend it and return i32 in r3, or i64 in r3:r4. - if (Size == 0) - return ABIArgInfo::getIgnore(); - else { - llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size); - return ABIArgInfo::getDirect(CoerceTy); - } - } - - return DefaultABIInfo::classifyReturnType(RetTy); -} - -// TODO: this implementation is now likely redundant with -// DefaultABIInfo::EmitVAArg. -Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, - QualType Ty) const { - if (getTarget().getTriple().isOSDarwin()) { - auto TI = getContext().getTypeInfoInChars(Ty); - TI.Align = getParamTypeAlignment(Ty); - - CharUnits SlotSize = CharUnits::fromQuantity(4); - return emitVoidPtrVAArg(CGF, VAList, Ty, - classifyArgumentType(Ty).isIndirect(), TI, SlotSize, - /*AllowHigherAlign=*/true); - } - - const unsigned OverflowLimit = 8; - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { - // TODO: Implement this. For now ignore. - (void)CTy; - return Address::invalid(); // FIXME? - } - - // struct __va_list_tag { - // unsigned char gpr; - // unsigned char fpr; - // unsigned short reserved; - // void *overflow_arg_area; - // void *reg_save_area; - // }; - - bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; - bool isInt = !Ty->isFloatingType(); - bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; - - // All aggregates are passed indirectly? That doesn't seem consistent - // with the argument-lowering code. - bool isIndirect = isAggregateTypeForABI(Ty); - - CGBuilderTy &Builder = CGF.Builder; - - // The calling convention either uses 1-2 GPRs or 1 FPR. - Address NumRegsAddr = Address::invalid(); - if (isInt || IsSoftFloatABI) { - NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr"); - } else { - NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr"); - } - - llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); - - // "Align" the register count when TY is i64. - if (isI64 || (isF64 && IsSoftFloatABI)) { - NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1)); - NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U)); - } - - llvm::Value *CC = - Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond"); - - llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs"); - llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow"); - llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); - - Builder.CreateCondBr(CC, UsingRegs, UsingOverflow); - - llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy; - if (isIndirect) DirectTy = DirectTy->getPointerTo(0); - - // Case 1: consume registers. - Address RegAddr = Address::invalid(); - { - CGF.EmitBlock(UsingRegs); - - Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4); - RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CGF.Int8Ty, - CharUnits::fromQuantity(8)); - assert(RegAddr.getElementType() == CGF.Int8Ty); - - // Floating-point registers start after the general-purpose registers. - if (!(isInt || IsSoftFloatABI)) { - RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr, - CharUnits::fromQuantity(32)); - } - - // Get the address of the saved value by scaling the number of - // registers we've used by the number of - CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8); - llvm::Value *RegOffset = - Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity())); - RegAddr = Address( - Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset), - CGF.Int8Ty, RegAddr.getAlignment().alignmentOfArrayElement(RegSize)); - RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy); - - // Increase the used-register count. - NumRegs = - Builder.CreateAdd(NumRegs, - Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1)); - Builder.CreateStore(NumRegs, NumRegsAddr); - - CGF.EmitBranch(Cont); - } - - // Case 2: consume space in the overflow area. - Address MemAddr = Address::invalid(); - { - CGF.EmitBlock(UsingOverflow); - - Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr); - - // Everything in the overflow area is rounded up to a size of at least 4. - CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4); - - CharUnits Size; - if (!isIndirect) { - auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); - Size = TypeInfo.Width.alignTo(OverflowAreaAlign); - } else { - Size = CGF.getPointerSize(); - } - - Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3); - Address OverflowArea = - Address(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), CGF.Int8Ty, - OverflowAreaAlign); - // Round up address of argument to alignment - CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); - if (Align > OverflowAreaAlign) { - llvm::Value *Ptr = OverflowArea.getPointer(); - OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), - OverflowArea.getElementType(), Align); - } - - MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); - - // Increase the overflow area. - OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size); - Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr); - CGF.EmitBranch(Cont); - } - - CGF.EmitBlock(Cont); - - // Merge the cases with a phi. - Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow, - "vaarg.addr"); - - // Load the pointer if the argument was passed indirectly. - if (isIndirect) { - Result = Address(Builder.CreateLoad(Result, "aggr"), ElementTy, - getContext().getTypeAlignInChars(Ty)); - } - - return Result; -} - -bool PPC32TargetCodeGenInfo::isStructReturnInRegABI( - const llvm::Triple &Triple, const CodeGenOptions &Opts) { - assert(Triple.isPPC32()); - - switch (Opts.getStructReturnConvention()) { - case CodeGenOptions::SRCK_Default: - break; - case CodeGenOptions::SRCK_OnStack: // -maix-struct-return - return false; - case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return - return true; - } - - if (Triple.isOSBinFormatELF() && !Triple.isOSLinux()) - return true; - - return false; -} - -bool -PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false, - /*IsAIX*/ false); -} - -// PowerPC-64 - -namespace { -/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. -class PPC64_SVR4_ABIInfo : public ABIInfo { -public: - enum ABIKind { - ELFv1 = 0, - ELFv2 - }; - -private: - static const unsigned GPRBits = 64; - ABIKind Kind; - bool IsSoftFloatABI; - -public: - PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, - bool SoftFloatABI) - : ABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {} - - bool isPromotableTypeForABI(QualType Ty) const; - CharUnits getParamTypeAlignment(QualType Ty) const; - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty) const; - - bool isHomogeneousAggregateBaseType(QualType Ty) const override; - bool isHomogeneousAggregateSmallEnough(const Type *Ty, - uint64_t Members) const override; - - // TODO: We can add more logic to computeInfo to improve performance. - // Example: For aggregate arguments that fit in a register, we could - // use getDirectInReg (as is done below for structs containing a single - // floating-point value) to avoid pushing them to memory on function - // entry. This would require changing the logic in PPCISelLowering - // when lowering the parameters in the caller and args in the callee. - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) { - // We rely on the default argument classification for the most part. - // One exception: An aggregate containing a single floating-point - // or vector item must be passed in a register if one is available. - const Type *T = isSingleElementStruct(I.type, getContext()); - if (T) { - const BuiltinType *BT = T->getAs<BuiltinType>(); - if ((T->isVectorType() && getContext().getTypeSize(T) == 128) || - (BT && BT->isFloatingPoint())) { - QualType QT(T, 0); - I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT)); - continue; - } - } - I.info = classifyArgumentType(I.type); - } - } - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { - -public: - PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, - PPC64_SVR4_ABIInfo::ABIKind Kind, - bool SoftFloatABI) - : TargetCodeGenInfo( - std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) { - SwiftInfo = - std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); - } - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - // This is recovered from gcc output. - return 1; // r1 is the dedicated stack pointer - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; -}; - -class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo { -public: - PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - // This is recovered from gcc output. - return 1; // r1 is the dedicated stack pointer - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; -}; - -} - -// Return true if the ABI requires Ty to be passed sign- or zero- -// extended to 64 bits. -bool -PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // Promotable integer types are required to be promoted by the ABI. - if (isPromotableIntegerTypeForABI(Ty)) - return true; - - // In addition to the usual promotable integer types, we also need to - // extend all 32-bit types, since the ABI requires promotion to 64 bits. - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) - switch (BT->getKind()) { - case BuiltinType::Int: - case BuiltinType::UInt: - return true; - default: - break; - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() < 64) - return true; - - return false; -} - -/// isAlignedParamType - Determine whether a type requires 16-byte or -/// higher alignment in the parameter area. Always returns at least 8. -CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { - // Complex types are passed just like their elements. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - auto FloatUsesVector = [this](QualType Ty){ - return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics( - Ty) == &llvm::APFloat::IEEEquad(); - }; - - // Only vector types of size 16 bytes need alignment (larger types are - // passed via reference, smaller types are not aligned). - if (Ty->isVectorType()) { - return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); - } else if (FloatUsesVector(Ty)) { - // According to ABI document section 'Optional Save Areas': If extended - // precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION - // format are supported, map them to a single quadword, quadword aligned. - return CharUnits::fromQuantity(16); - } - - // For single-element float/vector structs, we consider the whole type - // to have the same alignment requirements as its single element. - const Type *AlignAsType = nullptr; - const Type *EltType = isSingleElementStruct(Ty, getContext()); - if (EltType) { - const BuiltinType *BT = EltType->getAs<BuiltinType>(); - if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || - (BT && BT->isFloatingPoint())) - AlignAsType = EltType; - } - - // Likewise for ELFv2 homogeneous aggregates. - const Type *Base = nullptr; - uint64_t Members = 0; - if (!AlignAsType && Kind == ELFv2 && - isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members)) - AlignAsType = Base; - - // With special case aggregates, only vector base types need alignment. - if (AlignAsType) { - bool UsesVector = AlignAsType->isVectorType() || - FloatUsesVector(QualType(AlignAsType, 0)); - return CharUnits::fromQuantity(UsesVector ? 16 : 8); - } - - // Otherwise, we only need alignment for any aggregate type that - // has an alignment requirement of >= 16 bytes. - if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) { - return CharUnits::fromQuantity(16); - } - - return CharUnits::fromQuantity(8); -} - -/// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous -/// aggregate. Base is set to the base element type, and Members is set -/// to the number of base elements. -bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, - uint64_t &Members) const { - if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { - uint64_t NElements = AT->getSize().getZExtValue(); - if (NElements == 0) - return false; - if (!isHomogeneousAggregate(AT->getElementType(), Base, Members)) - return false; - Members *= NElements; - } else if (const RecordType *RT = Ty->getAs<RecordType>()) { - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return false; - - Members = 0; - - // If this is a C++ record, check the properties of the record such as - // bases and ABI specific restrictions - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!getCXXABI().isPermittedToBeHomogeneousAggregate(CXXRD)) - return false; - - for (const auto &I : CXXRD->bases()) { - // Ignore empty records. - if (isEmptyRecord(getContext(), I.getType(), true)) - continue; - - uint64_t FldMembers; - if (!isHomogeneousAggregate(I.getType(), Base, FldMembers)) - return false; - - Members += FldMembers; - } - } - - for (const auto *FD : RD->fields()) { - // Ignore (non-zero arrays of) empty records. - QualType FT = FD->getType(); - while (const ConstantArrayType *AT = - getContext().getAsConstantArrayType(FT)) { - if (AT->getSize().getZExtValue() == 0) - return false; - FT = AT->getElementType(); - } - if (isEmptyRecord(getContext(), FT, true)) - continue; - - if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() && - FD->isZeroLengthBitField(getContext())) - continue; - - uint64_t FldMembers; - if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers)) - return false; - - Members = (RD->isUnion() ? - std::max(Members, FldMembers) : Members + FldMembers); - } - - if (!Base) - return false; - - // Ensure there is no padding. - if (getContext().getTypeSize(Base) * Members != - getContext().getTypeSize(Ty)) - return false; - } else { - Members = 1; - if (const ComplexType *CT = Ty->getAs<ComplexType>()) { - Members = 2; - Ty = CT->getElementType(); - } - - // Most ABIs only support float, double, and some vector type widths. - if (!isHomogeneousAggregateBaseType(Ty)) - return false; - - // The base type must be the same for all members. Types that - // agree in both total size and mode (float vs. vector) are - // treated as being equivalent here. - const Type *TyPtr = Ty.getTypePtr(); - if (!Base) { - Base = TyPtr; - // If it's a non-power-of-2 vector, its size is already a power-of-2, - // so make sure to widen it explicitly. - if (const VectorType *VT = Base->getAs<VectorType>()) { - QualType EltTy = VT->getElementType(); - unsigned NumElements = - getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); - Base = getContext() - .getVectorType(EltTy, NumElements, VT->getVectorKind()) - .getTypePtr(); - } - } - - if (Base->isVectorType() != TyPtr->isVectorType() || - getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) - return false; - } - return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members); -} - -bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { - // Homogeneous aggregates for ELFv2 must have base types of float, - // double, long double, or 128-bit vectors. - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->getKind() == BuiltinType::Float || - BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble || - BT->getKind() == BuiltinType::Ibm128 || - (getContext().getTargetInfo().hasFloat128Type() && - (BT->getKind() == BuiltinType::Float128))) { - if (IsSoftFloatABI) - return false; - return true; - } - } - if (const VectorType *VT = Ty->getAs<VectorType>()) { - if (getContext().getTypeSize(VT) == 128) - return true; - } - return false; -} - -bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( - const Type *Base, uint64_t Members) const { - // Vector and fp128 types require one register, other floating point types - // require one or two registers depending on their size. - uint32_t NumRegs = - ((getContext().getTargetInfo().hasFloat128Type() && - Base->isFloat128Type()) || - Base->isVectorType()) ? 1 - : (getContext().getTypeSize(Base) + 63) / 64; - - // Homogeneous Aggregates may occupy at most 8 registers. - return Members * NumRegs <= 8; -} - -ABIArgInfo -PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - if (Ty->isAnyComplexType()) - return ABIArgInfo::getDirect(); - - // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes) - // or via reference (larger than 16 bytes). - if (Ty->isVectorType()) { - uint64_t Size = getContext().getTypeSize(Ty); - if (Size > 128) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - else if (Size < 128) { - llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); - return ABIArgInfo::getDirect(CoerceTy); - } - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > 128) - return getNaturalAlignIndirect(Ty, /*ByVal=*/true); - - if (isAggregateTypeForABI(Ty)) { - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity(); - uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); - - // ELFv2 homogeneous aggregates are passed as array types. - const Type *Base = nullptr; - uint64_t Members = 0; - if (Kind == ELFv2 && - isHomogeneousAggregate(Ty, Base, Members)) { - llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); - llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); - return ABIArgInfo::getDirect(CoerceTy); - } - - // If an aggregate may end up fully in registers, we do not - // use the ByVal method, but pass the aggregate as array. - // This is usually beneficial since we avoid forcing the - // back-end to store the argument to memory. - uint64_t Bits = getContext().getTypeSize(Ty); - if (Bits > 0 && Bits <= 8 * GPRBits) { - llvm::Type *CoerceTy; - - // Types up to 8 bytes are passed as integer type (which will be - // properly aligned in the argument save area doubleword). - if (Bits <= GPRBits) - CoerceTy = - llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); - // Larger types are passed as arrays, with the base type selected - // according to the required alignment in the save area. - else { - uint64_t RegBits = ABIAlign * 8; - uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits; - llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); - CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); - } - - return ABIArgInfo::getDirect(CoerceTy); - } - - // All other aggregates are passed ByVal. - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), - /*ByVal=*/true, - /*Realign=*/TyAlign > ABIAlign); - } - - return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); -} - -ABIArgInfo -PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (RetTy->isAnyComplexType()) - return ABIArgInfo::getDirect(); - - // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes) - // or via reference (larger than 16 bytes). - if (RetTy->isVectorType()) { - uint64_t Size = getContext().getTypeSize(RetTy); - if (Size > 128) - return getNaturalAlignIndirect(RetTy); - else if (Size < 128) { - llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); - return ABIArgInfo::getDirect(CoerceTy); - } - } - - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > 128) - return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); - - if (isAggregateTypeForABI(RetTy)) { - // ELFv2 homogeneous aggregates are returned as array types. - const Type *Base = nullptr; - uint64_t Members = 0; - if (Kind == ELFv2 && - isHomogeneousAggregate(RetTy, Base, Members)) { - llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); - llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); - return ABIArgInfo::getDirect(CoerceTy); - } - - // ELFv2 small aggregates are returned in up to two registers. - uint64_t Bits = getContext().getTypeSize(RetTy); - if (Kind == ELFv2 && Bits <= 2 * GPRBits) { - if (Bits == 0) - return ABIArgInfo::getIgnore(); - - llvm::Type *CoerceTy; - if (Bits > GPRBits) { - CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); - CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); - } else - CoerceTy = - llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); - return ABIArgInfo::getDirect(CoerceTy); - } - - // All other aggregates are returned indirectly. - return getNaturalAlignIndirect(RetTy); - } - - return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -// Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. -Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - auto TypeInfo = getContext().getTypeInfoInChars(Ty); - TypeInfo.Align = getParamTypeAlignment(Ty); - - CharUnits SlotSize = CharUnits::fromQuantity(8); - - // If we have a complex type and the base type is smaller than 8 bytes, - // the ABI calls for the real and imaginary parts to be right-adjusted - // in separate doublewords. However, Clang expects us to produce a - // pointer to a structure with the two parts packed tightly. So generate - // loads of the real and imaginary parts relative to the va_list pointer, - // and store them to a temporary structure. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { - CharUnits EltSize = TypeInfo.Width / 2; - if (EltSize < SlotSize) - return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); - } - - // Otherwise, just use the general rule. - // - // The PPC64 ABI passes some arguments in integer registers, even to variadic - // functions. To allow va_list to use the simple "void*" representation, - // variadic calls allocate space in the argument area for the integer argument - // registers, and variadic functions spill their integer argument registers to - // this area in their prologues. When aggregates smaller than a register are - // passed this way, they are passed in the least significant bits of the - // register, which means that after spilling on big-endian targets they will - // be right-aligned in their argument slot. This is uncommon; for a variety of - // reasons, other big-endian targets don't end up right-aligning aggregate - // types this way, and so right-alignment only applies to fundamental types. - // So on PPC64, we must force the use of right-alignment even for aggregates. - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, - SlotSize, /*AllowHigher*/ true, - /*ForceRightAdjust*/ true); -} - -bool -PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( - CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, - /*IsAIX*/ false); -} - -bool -PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, - /*IsAIX*/ false); -} - -//===----------------------------------------------------------------------===// -// AArch64 ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class AArch64ABIInfo : public ABIInfo { -public: - enum ABIKind { - AAPCS = 0, - DarwinPCS, - Win64 - }; - -private: - ABIKind Kind; - -public: - AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {} - -private: - ABIKind getABIKind() const { return Kind; } - bool isDarwinPCS() const { return Kind == DarwinPCS; } - - ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic, - unsigned CallingConvention) const; - ABIArgInfo coerceIllegalVector(QualType Ty) const; - bool isHomogeneousAggregateBaseType(QualType Ty) const override; - bool isHomogeneousAggregateSmallEnough(const Type *Ty, - uint64_t Members) const override; - bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; - - bool isIllegalVectorType(QualType Ty) const; - - void computeInfo(CGFunctionInfo &FI) const override { - if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = - classifyReturnType(FI.getReturnType(), FI.isVariadic()); - - for (auto &it : FI.arguments()) - it.info = classifyArgumentType(it.type, FI.isVariadic(), - FI.getCallingConvention()); - } - - Address EmitDarwinVAArg(Address VAListAddr, QualType Ty, - CodeGenFunction &CGF) const; - - Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, - CodeGenFunction &CGF) const; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override { - llvm::Type *BaseTy = CGF.ConvertType(Ty); - if (isa<llvm::ScalableVectorType>(BaseTy)) - llvm::report_fatal_error("Passing SVE types to variadic functions is " - "currently not supported"); - - return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) - : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) - : EmitAAPCSVAArg(VAListAddr, Ty, CGF); - } - - Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - bool allowBFloatArgsAndRet() const override { - return getTarget().hasBFloat16Type(); - } -}; - -class AArch64SwiftABIInfo : public SwiftABIInfo { -public: - explicit AArch64SwiftABIInfo(CodeGenTypes &CGT) - : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} - - bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, - unsigned NumElts) const override; -}; - -class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { -public: - AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) - : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) { - SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT); - } - - StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; - } - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 31; - } - - bool doesReturnSlotInterfereWithArgs() const override { return false; } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) - return; - - const auto *TA = FD->getAttr<TargetAttr>(); - if (TA == nullptr) - return; - - ParsedTargetAttr Attr = - CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); - if (Attr.BranchProtection.empty()) - return; - - TargetInfo::BranchProtectionInfo BPI; - StringRef Error; - (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, - Attr.CPU, BPI, Error); - assert(Error.empty()); - - auto *Fn = cast<llvm::Function>(GV); - static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; - Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); - - if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { - Fn->addFnAttr("sign-return-address-key", - BPI.SignKey == LangOptions::SignReturnAddressKeyKind::AKey - ? "a_key" - : "b_key"); - } - - Fn->addFnAttr("branch-target-enforcement", - BPI.BranchTargetEnforcement ? "true" : "false"); - } - - bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF, - llvm::Type *Ty) const override { - if (CGF.getTarget().hasFeature("ls64")) { - auto *ST = dyn_cast<llvm::StructType>(Ty); - if (ST && ST->getNumElements() == 1) { - auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0)); - if (AT && AT->getNumElements() == 8 && - AT->getElementType()->isIntegerTy(64)) - return true; - } - } - return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty); - } -}; - -class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { -public: - WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) - : AArch64TargetCodeGenInfo(CGT, K) {} - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; - - void getDependentLibraryOption(llvm::StringRef Lib, - llvm::SmallString<24> &Opt) const override { - Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); - } - - void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, - llvm::SmallString<32> &Opt) const override { - Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; - } -}; - -void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - if (GV->isDeclaration()) - return; - addStackProbeTargetAttributes(D, GV, CGM); -} -} - -ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { - assert(Ty->isVectorType() && "expected vector type!"); - - const auto *VT = Ty->castAs<VectorType>(); - if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) { - assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - assert(VT->getElementType()->castAs<BuiltinType>()->getKind() == - BuiltinType::UChar && - "unexpected builtin type for SVE predicate!"); - return ABIArgInfo::getDirect(llvm::ScalableVectorType::get( - llvm::Type::getInt1Ty(getVMContext()), 16)); - } - - if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) { - assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - - const auto *BT = VT->getElementType()->castAs<BuiltinType>(); - llvm::ScalableVectorType *ResType = nullptr; - switch (BT->getKind()) { - default: - llvm_unreachable("unexpected builtin type for SVE vector!"); - case BuiltinType::SChar: - case BuiltinType::UChar: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getInt8Ty(getVMContext()), 16); - break; - case BuiltinType::Short: - case BuiltinType::UShort: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getInt16Ty(getVMContext()), 8); - break; - case BuiltinType::Int: - case BuiltinType::UInt: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getInt32Ty(getVMContext()), 4); - break; - case BuiltinType::Long: - case BuiltinType::ULong: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getInt64Ty(getVMContext()), 2); - break; - case BuiltinType::Half: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getHalfTy(getVMContext()), 8); - break; - case BuiltinType::Float: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getFloatTy(getVMContext()), 4); - break; - case BuiltinType::Double: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getDoubleTy(getVMContext()), 2); - break; - case BuiltinType::BFloat16: - ResType = llvm::ScalableVectorType::get( - llvm::Type::getBFloatTy(getVMContext()), 8); - break; - } - return ABIArgInfo::getDirect(ResType); - } - - uint64_t Size = getContext().getTypeSize(Ty); - // Android promotes <2 x i8> to i16, not i32 - if (isAndroid() && (Size <= 16)) { - llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (Size <= 32) { - llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 64) { - auto *ResType = - llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 128) { - auto *ResType = - llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); - return ABIArgInfo::getDirect(ResType); - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); -} - -ABIArgInfo -AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, - unsigned CallingConvention) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - // Handle illegal vector types here. - if (isIllegalVectorType(Ty)) - return coerceIllegalVector(Ty); - - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > 128) - return getNaturalAlignIndirect(Ty); - - return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS() - ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); - } - - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always indirect. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == - CGCXXABI::RAA_DirectInMemory); - } - - // Empty records are always ignored on Darwin, but actually passed in C++ mode - // elsewhere for GNU compatibility. - uint64_t Size = getContext().getTypeSize(Ty); - bool IsEmpty = isEmptyRecord(getContext(), Ty, true); - if (IsEmpty || Size == 0) { - if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) - return ABIArgInfo::getIgnore(); - - // GNU C mode. The only argument that gets ignored is an empty one with size - // 0. - if (IsEmpty && Size == 0) - return ABIArgInfo::getIgnore(); - return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); - } - - // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. - const Type *Base = nullptr; - uint64_t Members = 0; - bool IsWin64 = Kind == Win64 || CallingConvention == llvm::CallingConv::Win64; - bool IsWinVariadic = IsWin64 && IsVariadic; - // In variadic functions on Windows, all composite types are treated alike, - // no special handling of HFAs/HVAs. - if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) { - if (Kind != AArch64ABIInfo::AAPCS) - return ABIArgInfo::getDirect( - llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); - - // For alignment adjusted HFAs, cap the argument alignment to 16, leave it - // default otherwise. - unsigned Align = - getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); - unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); - Align = (Align > BaseAlign && Align >= 16) ? 16 : 0; - return ABIArgInfo::getDirect( - llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0, - nullptr, true, Align); - } - - // Aggregates <= 16 bytes are passed directly in registers or on the stack. - if (Size <= 128) { - // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(Ty, getContext(), getVMContext()); - } - unsigned Alignment; - if (Kind == AArch64ABIInfo::AAPCS) { - Alignment = getContext().getTypeUnadjustedAlign(Ty); - Alignment = Alignment < 128 ? 64 : 128; - } else { - Alignment = - std::max(getContext().getTypeAlign(Ty), - (unsigned)getTarget().getPointerWidth(LangAS::Default)); - } - Size = llvm::alignTo(Size, Alignment); - - // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. - // For aggregates with 16-byte alignment, we use i128. - llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment); - return ABIArgInfo::getDirect( - Size == Alignment ? BaseTy - : llvm::ArrayType::get(BaseTy, Size / Alignment)); - } - - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); -} - -ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, - bool IsVariadic) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (const auto *VT = RetTy->getAs<VectorType>()) { - if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || - VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) - return coerceIllegalVector(RetTy); - } - - // Large vector types should be returned via memory. - if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) - return getNaturalAlignIndirect(RetTy); - - if (!isAggregateTypeForABI(RetTy)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > 128) - return getNaturalAlignIndirect(RetTy); - - return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS() - ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); - } - - uint64_t Size = getContext().getTypeSize(RetTy); - if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) - return ABIArgInfo::getIgnore(); - - const Type *Base = nullptr; - uint64_t Members = 0; - if (isHomogeneousAggregate(RetTy, Base, Members) && - !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && - IsVariadic)) - // Homogeneous Floating-point Aggregates (HFAs) are returned directly. - return ABIArgInfo::getDirect(); - - // Aggregates <= 16 bytes are returned directly in registers or on the stack. - if (Size <= 128) { - // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(RetTy, getContext(), getVMContext()); - } - - if (Size <= 64 && getDataLayout().isLittleEndian()) { - // Composite types are returned in lower bits of a 64-bit register for LE, - // and in higher bits for BE. However, integer types are always returned - // in lower bits for both LE and BE, and they are not rounded up to - // 64-bits. We can skip rounding up of composite types for LE, but not for - // BE, otherwise composite types will be indistinguishable from integer - // types. - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), Size)); - } - - unsigned Alignment = getContext().getTypeAlign(RetTy); - Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes - - // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. - // For aggregates with 16-byte alignment, we use i128. - if (Alignment < 128 && Size == 128) { - llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); - } - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); - } - - return getNaturalAlignIndirect(RetTy); -} - -/// isIllegalVectorType - check whether the vector type is legal for AArch64. -bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { - if (const VectorType *VT = Ty->getAs<VectorType>()) { - // Check whether VT is a fixed-length SVE vector. These types are - // represented as scalable vectors in function args/return and must be - // coerced from fixed vectors. - if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || - VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) - return true; - - // Check whether VT is legal. - unsigned NumElements = VT->getNumElements(); - uint64_t Size = getContext().getTypeSize(VT); - // NumElements should be power of 2. - if (!llvm::isPowerOf2_32(NumElements)) - return true; - - // arm64_32 has to be compatible with the ARM logic here, which allows huge - // vectors for some reason. - llvm::Triple Triple = getTarget().getTriple(); - if (Triple.getArch() == llvm::Triple::aarch64_32 && - Triple.isOSBinFormatMachO()) - return Size <= 32; - - return Size != 64 && (Size != 128 || NumElements == 1); - } - return false; -} - -bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, - llvm::Type *EltTy, - unsigned NumElts) const { - if (!llvm::isPowerOf2_32(NumElts)) - return false; - if (VectorSize.getQuantity() != 8 && - (VectorSize.getQuantity() != 16 || NumElts == 1)) - return false; - return true; -} - -bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { - // Homogeneous aggregates for AAPCS64 must have base types of a floating - // point type or a short-vector type. This is the same as the 32-bit ABI, - // but with the difference that any floating-point type is allowed, - // including __fp16. - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint()) - return true; - } else if (const VectorType *VT = Ty->getAs<VectorType>()) { - unsigned VecSize = getContext().getTypeSize(VT); - if (VecSize == 64 || VecSize == 128) - return true; - } - return false; -} - -bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, - uint64_t Members) const { - return Members <= 4; -} - -bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() - const { - // AAPCS64 says that the rule for whether something is a homogeneous - // aggregate is applied to the output of the data layout decision. So - // anything that doesn't affect the data layout also does not affect - // homogeneity. In particular, zero-length bitfields don't stop a struct - // being homogeneous. - return true; -} - -Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true, - CGF.CurFnInfo->getCallingConvention()); - // Empty records are ignored for parameter passing purposes. - if (AI.isIgnore()) { - uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; - CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); - VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); - auto *Load = CGF.Builder.CreateLoad(VAListAddr); - Address Addr = Address(Load, CGF.Int8Ty, SlotSize); - return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - } - - bool IsIndirect = AI.isIndirect(); - - llvm::Type *BaseTy = CGF.ConvertType(Ty); - if (IsIndirect) - BaseTy = llvm::PointerType::getUnqual(BaseTy); - else if (AI.getCoerceToType()) - BaseTy = AI.getCoerceToType(); - - unsigned NumRegs = 1; - if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) { - BaseTy = ArrTy->getElementType(); - NumRegs = ArrTy->getNumElements(); - } - bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); - - // The AArch64 va_list type and handling is specified in the Procedure Call - // Standard, section B.4: - // - // struct { - // void *__stack; - // void *__gr_top; - // void *__vr_top; - // int __gr_offs; - // int __vr_offs; - // }; - - llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); - llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); - llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); - llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); - - CharUnits TySize = getContext().getTypeSizeInChars(Ty); - CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty); - - Address reg_offs_p = Address::invalid(); - llvm::Value *reg_offs = nullptr; - int reg_top_index; - int RegSize = IsIndirect ? 8 : TySize.getQuantity(); - if (!IsFPR) { - // 3 is the field number of __gr_offs - reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); - reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); - reg_top_index = 1; // field number for __gr_top - RegSize = llvm::alignTo(RegSize, 8); - } else { - // 4 is the field number of __vr_offs. - reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); - reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); - reg_top_index = 2; // field number for __vr_top - RegSize = 16 * NumRegs; - } - - //======================================= - // Find out where argument was passed - //======================================= - - // If reg_offs >= 0 we're already using the stack for this type of - // argument. We don't want to keep updating reg_offs (in case it overflows, - // though anyone passing 2GB of arguments, each at most 16 bytes, deserves - // whatever they get). - llvm::Value *UsingStack = nullptr; - UsingStack = CGF.Builder.CreateICmpSGE( - reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0)); - - CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); - - // Otherwise, at least some kind of argument could go in these registers, the - // question is whether this particular type is too big. - CGF.EmitBlock(MaybeRegBlock); - - // Integer arguments may need to correct register alignment (for example a - // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we - // align __gr_offs to calculate the potential address. - if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) { - int Align = TyAlign.getQuantity(); - - reg_offs = CGF.Builder.CreateAdd( - reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), - "align_regoffs"); - reg_offs = CGF.Builder.CreateAnd( - reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align), - "aligned_regoffs"); - } - - // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. - // The fact that this is done unconditionally reflects the fact that - // allocating an argument to the stack also uses up all the remaining - // registers of the appropriate kind. - llvm::Value *NewOffset = nullptr; - NewOffset = CGF.Builder.CreateAdd( - reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs"); - CGF.Builder.CreateStore(NewOffset, reg_offs_p); - - // Now we're in a position to decide whether this argument really was in - // registers or not. - llvm::Value *InRegs = nullptr; - InRegs = CGF.Builder.CreateICmpSLE( - NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg"); - - CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); - - //======================================= - // Argument was in registers - //======================================= - - // Now we emit the code for if the argument was originally passed in - // registers. First start the appropriate block: - CGF.EmitBlock(InRegBlock); - - llvm::Value *reg_top = nullptr; - Address reg_top_p = - CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); - reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); - Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs), - CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8)); - Address RegAddr = Address::invalid(); - llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy; - - if (IsIndirect) { - // If it's been passed indirectly (actually a struct), whatever we find from - // stored registers or on the stack will actually be a struct **. - MemTy = llvm::PointerType::getUnqual(MemTy); - } - - const Type *Base = nullptr; - uint64_t NumMembers = 0; - bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); - if (IsHFA && NumMembers > 1) { - // Homogeneous aggregates passed in registers will have their elements split - // and stored 16-bytes apart regardless of size (they're notionally in qN, - // qN+1, ...). We reload and store into a temporary local variable - // contiguously. - assert(!IsIndirect && "Homogeneous aggregates should be passed directly"); - auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0)); - llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); - llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); - Address Tmp = CGF.CreateTempAlloca(HFATy, - std::max(TyAlign, BaseTyInfo.Align)); - - // On big-endian platforms, the value will be right-aligned in its slot. - int Offset = 0; - if (CGF.CGM.getDataLayout().isBigEndian() && - BaseTyInfo.Width.getQuantity() < 16) - Offset = 16 - BaseTyInfo.Width.getQuantity(); - - for (unsigned i = 0; i < NumMembers; ++i) { - CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset); - Address LoadAddr = - CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); - LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy); - - Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i); - - llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); - CGF.Builder.CreateStore(Elem, StoreAddr); - } - - RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy); - } else { - // Otherwise the object is contiguous in memory. - - // It might be right-aligned in its slot. - CharUnits SlotSize = BaseAddr.getAlignment(); - if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && - (IsHFA || !isAggregateTypeForABI(Ty)) && - TySize < SlotSize) { - CharUnits Offset = SlotSize - TySize; - BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); - } - - RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy); - } - - CGF.EmitBranch(ContBlock); - - //======================================= - // Argument was on the stack - //======================================= - CGF.EmitBlock(OnStackBlock); - - Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); - llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); - - // Again, stack arguments may need realignment. In this case both integer and - // floating-point ones might be affected. - if (!IsIndirect && TyAlign.getQuantity() > 8) { - int Align = TyAlign.getQuantity(); - - OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty); - - OnStackPtr = CGF.Builder.CreateAdd( - OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), - "align_stack"); - OnStackPtr = CGF.Builder.CreateAnd( - OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align), - "align_stack"); - - OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy); - } - Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty, - std::max(CharUnits::fromQuantity(8), TyAlign)); - - // All stack slots are multiples of 8 bytes. - CharUnits StackSlotSize = CharUnits::fromQuantity(8); - CharUnits StackSize; - if (IsIndirect) - StackSize = StackSlotSize; - else - StackSize = TySize.alignTo(StackSlotSize); - - llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); - llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP( - CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack"); - - // Write the new value of __stack for the next call to va_arg - CGF.Builder.CreateStore(NewStack, stack_p); - - if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && - TySize < StackSlotSize) { - CharUnits Offset = StackSlotSize - TySize; - OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); - } - - OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy); - - CGF.EmitBranch(ContBlock); - - //======================================= - // Tidy up - //======================================= - CGF.EmitBlock(ContBlock); - - Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr, - OnStackBlock, "vaargs.addr"); - - if (IsIndirect) - return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy, - TyAlign); - - return ResAddr; -} - -Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - // The backend's lowering doesn't support va_arg for aggregates or - // illegal vector types. Lower VAArg here for these cases and use - // the LLVM va_arg instruction for everything else. - if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) - return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); - - uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; - CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); - - // Empty records are ignored for parameter passing purposes. - if (isEmptyRecord(getContext(), Ty, true)) { - Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), - getVAListElementType(CGF), SlotSize); - Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - return Addr; - } - - // The size of the actual thing passed, which might end up just - // being a pointer for indirect types. - auto TyInfo = getContext().getTypeInfoInChars(Ty); - - // Arguments bigger than 16 bytes which aren't homogeneous - // aggregates should be passed indirectly. - bool IsIndirect = false; - if (TyInfo.Width.getQuantity() > 16) { - const Type *Base = nullptr; - uint64_t Members = 0; - IsIndirect = !isHomogeneousAggregate(Ty, Base, Members); - } - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, - TyInfo, SlotSize, /*AllowHigherAlign*/ true); -} - -Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - bool IsIndirect = false; - - // Composites larger than 16 bytes are passed by reference. - if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) - IsIndirect = true; - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, - CGF.getContext().getTypeInfoInChars(Ty), - CharUnits::fromQuantity(8), - /*allowHigherAlign*/ false); -} - -//===----------------------------------------------------------------------===// -// ARM ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class ARMABIInfo : public ABIInfo { -public: - enum ABIKind { - APCS = 0, - AAPCS = 1, - AAPCS_VFP = 2, - AAPCS16_VFP = 3, - }; - -private: - ABIKind Kind; - bool IsFloatABISoftFP; - -public: - ARMABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) { - setCCs(); - IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" || - CGT.getCodeGenOpts().FloatABI == ""; // default - } - - bool isEABI() const { - switch (getTarget().getTriple().getEnvironment()) { - case llvm::Triple::Android: - case llvm::Triple::EABI: - case llvm::Triple::EABIHF: - case llvm::Triple::GNUEABI: - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABI: - case llvm::Triple::MuslEABIHF: - return true; - default: - return false; - } - } - - bool isEABIHF() const { - switch (getTarget().getTriple().getEnvironment()) { - case llvm::Triple::EABIHF: - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABIHF: - return true; - default: - return false; - } - } - - ABIKind getABIKind() const { return Kind; } - - bool allowBFloatArgsAndRet() const override { - return !IsFloatABISoftFP && getTarget().hasBFloat16Type(); - } - -private: - ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, - unsigned functionCallConv) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, - unsigned functionCallConv) const; - ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, - uint64_t Members) const; - ABIArgInfo coerceIllegalVector(QualType Ty) const; - bool isIllegalVectorType(QualType Ty) const; - bool containsAnyFP16Vectors(QualType Ty) const; - - bool isHomogeneousAggregateBaseType(QualType Ty) const override; - bool isHomogeneousAggregateSmallEnough(const Type *Ty, - uint64_t Members) const override; - bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; - - bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; - - void computeInfo(CGFunctionInfo &FI) const override; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - llvm::CallingConv::ID getLLVMDefaultCC() const; - llvm::CallingConv::ID getABIDefaultCC() const; - void setCCs(); -}; - -class ARMSwiftABIInfo : public SwiftABIInfo { -public: - explicit ARMSwiftABIInfo(CodeGenTypes &CGT) - : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} - - bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, - unsigned NumElts) const override; -}; - -class ARMTargetCodeGenInfo : public TargetCodeGenInfo { -public: - ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) - : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) { - SwiftInfo = std::make_unique<ARMSwiftABIInfo>(CGT); - } - - const ARMABIInfo &getABIInfo() const { - return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo()); - } - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 13; - } - - StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); - - // 0-15 are the 16 integer registers. - AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15); - return false; - } - - unsigned getSizeOfUnwindException() const override { - if (getABIInfo().isEABI()) return 88; - return TargetCodeGenInfo::getSizeOfUnwindException(); - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - if (GV->isDeclaration()) - return; - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) - return; - auto *Fn = cast<llvm::Function>(GV); - - if (const auto *TA = FD->getAttr<TargetAttr>()) { - ParsedTargetAttr Attr = - CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); - if (!Attr.BranchProtection.empty()) { - TargetInfo::BranchProtectionInfo BPI; - StringRef DiagMsg; - StringRef Arch = - Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU; - if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection, - Arch, BPI, DiagMsg)) { - CGM.getDiags().Report( - D->getLocation(), - diag::warn_target_unsupported_branch_protection_attribute) - << Arch; - } else { - static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; - assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 && - "Unexpected SignReturnAddressScopeKind"); - Fn->addFnAttr( - "sign-return-address", - SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); - - Fn->addFnAttr("branch-target-enforcement", - BPI.BranchTargetEnforcement ? "true" : "false"); - } - } else if (CGM.getLangOpts().BranchTargetEnforcement || - CGM.getLangOpts().hasSignReturnAddress()) { - // If the Branch Protection attribute is missing, validate the target - // Architecture attribute against Branch Protection command line - // settings. - if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.CPU)) - CGM.getDiags().Report( - D->getLocation(), - diag::warn_target_unsupported_branch_protection_attribute) - << Attr.CPU; - } - } - - const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>(); - if (!Attr) - return; - - const char *Kind; - switch (Attr->getInterrupt()) { - case ARMInterruptAttr::Generic: Kind = ""; break; - case ARMInterruptAttr::IRQ: Kind = "IRQ"; break; - case ARMInterruptAttr::FIQ: Kind = "FIQ"; break; - case ARMInterruptAttr::SWI: Kind = "SWI"; break; - case ARMInterruptAttr::ABORT: Kind = "ABORT"; break; - case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; - } - - Fn->addFnAttr("interrupt", Kind); - - ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind(); - if (ABI == ARMABIInfo::APCS) - return; - - // AAPCS guarantees that sp will be 8-byte aligned on any public interface, - // however this is not necessarily true on taking any interrupt. Instruct - // the backend to perform a realignment as part of the function prologue. - llvm::AttrBuilder B(Fn->getContext()); - B.addStackAlignmentAttr(8); - Fn->addFnAttrs(B); - } -}; - -class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo { -public: - WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) - : ARMTargetCodeGenInfo(CGT, K) {} - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; - - void getDependentLibraryOption(llvm::StringRef Lib, - llvm::SmallString<24> &Opt) const override { - Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); - } - - void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, - llvm::SmallString<32> &Opt) const override { - Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; - } -}; - -void WindowsARMTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - if (GV->isDeclaration()) - return; - addStackProbeTargetAttributes(D, GV, CGM); -} -} - -void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), - FI.getCallingConvention()); - - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, FI.isVariadic(), - FI.getCallingConvention()); - - - // Always honor user-specified calling convention. - if (FI.getCallingConvention() != llvm::CallingConv::C) - return; - - llvm::CallingConv::ID cc = getRuntimeCC(); - if (cc != llvm::CallingConv::C) - FI.setEffectiveCallingConvention(cc); -} - -/// Return the default calling convention that LLVM will use. -llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { - // The default calling convention that LLVM will infer. - if (isEABIHF() || getTarget().getTriple().isWatchABI()) - return llvm::CallingConv::ARM_AAPCS_VFP; - else if (isEABI()) - return llvm::CallingConv::ARM_AAPCS; - else - return llvm::CallingConv::ARM_APCS; -} - -/// Return the calling convention that our ABI would like us to use -/// as the C calling convention. -llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { - switch (getABIKind()) { - case APCS: return llvm::CallingConv::ARM_APCS; - case AAPCS: return llvm::CallingConv::ARM_AAPCS; - case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; - case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; - } - llvm_unreachable("bad ABI kind"); -} - -void ARMABIInfo::setCCs() { - assert(getRuntimeCC() == llvm::CallingConv::C); - - // Don't muddy up the IR with a ton of explicit annotations if - // they'd just match what LLVM will infer from the triple. - llvm::CallingConv::ID abiCC = getABIDefaultCC(); - if (abiCC != getLLVMDefaultCC()) - RuntimeCC = abiCC; -} - -ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 32) { - llvm::Type *ResType = - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 64 || Size == 128) { - auto *ResType = llvm::FixedVectorType::get( - llvm::Type::getInt32Ty(getVMContext()), Size / 32); - return ABIArgInfo::getDirect(ResType); - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); -} - -ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, - const Type *Base, - uint64_t Members) const { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Base can be a floating-point or a vector. - if (const VectorType *VT = Base->getAs<VectorType>()) { - // FP16 vectors should be converted to integer vectors - if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { - uint64_t Size = getContext().getTypeSize(VT); - auto *NewVecTy = llvm::FixedVectorType::get( - llvm::Type::getInt32Ty(getVMContext()), Size / 32); - llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); - return ABIArgInfo::getDirect(Ty, 0, nullptr, false); - } - } - unsigned Align = 0; - if (getABIKind() == ARMABIInfo::AAPCS || - getABIKind() == ARMABIInfo::AAPCS_VFP) { - // For alignment adjusted HFAs, cap the argument alignment to 8, leave it - // default otherwise. - Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); - unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); - Align = (Align > BaseAlign && Align >= 8) ? 8 : 0; - } - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align); -} - -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, - unsigned functionCallConv) const { - // 6.1.2.1 The following argument types are VFP CPRCs: - // A single-precision floating-point type (including promoted - // half-precision types); A double-precision floating-point type; - // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate - // with a Base Type of a single- or double-precision floating-point type, - // 64-bit containerized vectors or 128-bit containerized vectors with one - // to four Elements. - // Variadic functions should always marshal to the base standard. - bool IsAAPCS_VFP = - !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); - - Ty = useFirstFieldIfTransparentUnion(Ty); - - // Handle illegal vector types here. - if (isIllegalVectorType(Ty)) - return coerceIllegalVector(Ty); - - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) { - Ty = EnumTy->getDecl()->getIntegerType(); - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > 64) - return getNaturalAlignIndirect(Ty, /*ByVal=*/true); - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); - } - - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - } - - // Ignore empty records. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - if (IsAAPCS_VFP) { - // Homogeneous Aggregates need to be expanded when we can fit the aggregate - // into VFP registers. - const Type *Base = nullptr; - uint64_t Members = 0; - if (isHomogeneousAggregate(Ty, Base, Members)) - return classifyHomogeneousAggregate(Ty, Base, Members); - } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { - // WatchOS does have homogeneous aggregates. Note that we intentionally use - // this convention even for a variadic function: the backend will use GPRs - // if needed. - const Type *Base = nullptr; - uint64_t Members = 0; - if (isHomogeneousAggregate(Ty, Base, Members)) { - assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); - llvm::Type *Ty = - llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); - return ABIArgInfo::getDirect(Ty, 0, nullptr, false); - } - } - - if (getABIKind() == ARMABIInfo::AAPCS16_VFP && - getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) { - // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're - // bigger than 128-bits, they get placed in space allocated by the caller, - // and a pointer is passed. - return ABIArgInfo::getIndirect( - CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); - } - - // Support byval for ARM. - // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at - // most 8-byte. We realign the indirect argument if type alignment is bigger - // than ABI alignment. - uint64_t ABIAlign = 4; - uint64_t TyAlign; - if (getABIKind() == ARMABIInfo::AAPCS_VFP || - getABIKind() == ARMABIInfo::AAPCS) { - TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); - ABIAlign = std::clamp(TyAlign, (uint64_t)4, (uint64_t)8); - } else { - TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); - } - if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { - assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval"); - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), - /*ByVal=*/true, - /*Realign=*/TyAlign > ABIAlign); - } - - // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(Ty, getContext(), getVMContext()); - } - - // Otherwise, pass by coercing to a structure of the appropriate size. - llvm::Type* ElemTy; - unsigned SizeRegs; - // FIXME: Try to match the types of the arguments more accurately where - // we can. - if (TyAlign <= 4) { - ElemTy = llvm::Type::getInt32Ty(getVMContext()); - SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; - } else { - ElemTy = llvm::Type::getInt64Ty(getVMContext()); - SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; - } - - return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs)); -} - -static bool isIntegerLikeType(QualType Ty, ASTContext &Context, - llvm::LLVMContext &VMContext) { - // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure - // is called integer-like if its size is less than or equal to one word, and - // the offset of each of its addressable sub-fields is zero. - - uint64_t Size = Context.getTypeSize(Ty); - - // Check that the type fits in a word. - if (Size > 32) - return false; - - // FIXME: Handle vector types! - if (Ty->isVectorType()) - return false; - - // Float types are never treated as "integer like". - if (Ty->isRealFloatingType()) - return false; - - // If this is a builtin or pointer type then it is ok. - if (Ty->getAs<BuiltinType>() || Ty->isPointerType()) - return true; - - // Small complex integer types are "integer like". - if (const ComplexType *CT = Ty->getAs<ComplexType>()) - return isIntegerLikeType(CT->getElementType(), Context, VMContext); - - // Single element and zero sized arrays should be allowed, by the definition - // above, but they are not. - - // Otherwise, it must be a record type. - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) return false; - - // Ignore records with flexible arrays. - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return false; - - // Check that all sub-fields are at offset 0, and are themselves "integer - // like". - const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); - - bool HadField = false; - unsigned idx = 0; - for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); - i != e; ++i, ++idx) { - const FieldDecl *FD = *i; - - // Bit-fields are not addressable, we only need to verify they are "integer - // like". We still have to disallow a subsequent non-bitfield, for example: - // struct { int : 0; int x } - // is non-integer like according to gcc. - if (FD->isBitField()) { - if (!RD->isUnion()) - HadField = true; - - if (!isIntegerLikeType(FD->getType(), Context, VMContext)) - return false; - - continue; - } - - // Check if this field is at offset 0. - if (Layout.getFieldOffset(idx) != 0) - return false; - - if (!isIntegerLikeType(FD->getType(), Context, VMContext)) - return false; - - // Only allow at most one field in a structure. This doesn't match the - // wording above, but follows gcc in situations with a field following an - // empty structure. - if (!RD->isUnion()) { - if (HadField) - return false; - - HadField = true; - } - } - - return true; -} - -ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, - unsigned functionCallConv) const { - - // Variadic functions should always marshal to the base standard. - bool IsAAPCS_VFP = - !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); - - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (const VectorType *VT = RetTy->getAs<VectorType>()) { - // Large vector types should be returned via memory. - if (getContext().getTypeSize(RetTy) > 128) - return getNaturalAlignIndirect(RetTy); - // TODO: FP16/BF16 vectors should be converted to integer vectors - // This check is similar to isIllegalVectorType - refactor? - if ((!getTarget().hasLegalHalfType() && - (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isHalfType())) || - (IsFloatABISoftFP && - VT->getElementType()->isBFloat16Type())) - return coerceIllegalVector(RetTy); - } - - if (!isAggregateTypeForABI(RetTy)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > 64) - return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); - - return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect(); - } - - // Are we following APCS? - if (getABIKind() == APCS) { - if (isEmptyRecord(getContext(), RetTy, false)) - return ABIArgInfo::getIgnore(); - - // Complex types are all returned as packed integers. - // - // FIXME: Consider using 2 x vector types if the back end handles them - // correctly. - if (RetTy->isAnyComplexType()) - return ABIArgInfo::getDirect(llvm::IntegerType::get( - getVMContext(), getContext().getTypeSize(RetTy))); - - // Integer like structures are returned in r0. - if (isIntegerLikeType(RetTy, getContext(), getVMContext())) { - // Return in the smallest viable integer type. - uint64_t Size = getContext().getTypeSize(RetTy); - if (Size <= 8) - return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - } - - // Otherwise return in memory. - return getNaturalAlignIndirect(RetTy); - } - - // Otherwise this is an AAPCS variant. - - if (isEmptyRecord(getContext(), RetTy, true)) - return ABIArgInfo::getIgnore(); - - // Check for homogeneous aggregates with AAPCS-VFP. - if (IsAAPCS_VFP) { - const Type *Base = nullptr; - uint64_t Members = 0; - if (isHomogeneousAggregate(RetTy, Base, Members)) - return classifyHomogeneousAggregate(RetTy, Base, Members); - } - - // Aggregates <= 4 bytes are returned in r0; other aggregates - // are returned indirectly. - uint64_t Size = getContext().getTypeSize(RetTy); - if (Size <= 32) { - // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(RetTy, getContext(), getVMContext()); - } - if (getDataLayout().isBigEndian()) - // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - - // Return in the smallest viable integer type. - if (Size <= 8) - return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) { - llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); - llvm::Type *CoerceTy = - llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); - return ABIArgInfo::getDirect(CoerceTy); - } - - return getNaturalAlignIndirect(RetTy); -} - -/// isIllegalVector - check whether Ty is an illegal vector type. -bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { - if (const VectorType *VT = Ty->getAs<VectorType> ()) { - // On targets that don't support half, fp16 or bfloat, they are expanded - // into float, and we don't want the ABI to depend on whether or not they - // are supported in hardware. Thus return false to coerce vectors of these - // types into integer vectors. - // We do not depend on hasLegalHalfType for bfloat as it is a - // separate IR type. - if ((!getTarget().hasLegalHalfType() && - (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isHalfType())) || - (IsFloatABISoftFP && - VT->getElementType()->isBFloat16Type())) - return true; - if (isAndroid()) { - // Android shipped using Clang 3.1, which supported a slightly different - // vector ABI. The primary differences were that 3-element vector types - // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path - // accepts that legacy behavior for Android only. - // Check whether VT is legal. - unsigned NumElements = VT->getNumElements(); - // NumElements should be power of 2 or equal to 3. - if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3) - return true; - } else { - // Check whether VT is legal. - unsigned NumElements = VT->getNumElements(); - uint64_t Size = getContext().getTypeSize(VT); - // NumElements should be power of 2. - if (!llvm::isPowerOf2_32(NumElements)) - return true; - // Size should be greater than 32 bits. - return Size <= 32; - } - } - return false; -} - -/// Return true if a type contains any 16-bit floating point vectors -bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { - if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { - uint64_t NElements = AT->getSize().getZExtValue(); - if (NElements == 0) - return false; - return containsAnyFP16Vectors(AT->getElementType()); - } else if (const RecordType *RT = Ty->getAs<RecordType>()) { - const RecordDecl *RD = RT->getDecl(); - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) { - return containsAnyFP16Vectors(B.getType()); - })) - return true; - - if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) { - return FD && containsAnyFP16Vectors(FD->getType()); - })) - return true; - - return false; - } else { - if (const VectorType *VT = Ty->getAs<VectorType>()) - return (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isBFloat16Type() || - VT->getElementType()->isHalfType()); - return false; - } -} - -bool ARMSwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, - unsigned NumElts) const { - if (!llvm::isPowerOf2_32(NumElts)) - return false; - unsigned size = CGT.getDataLayout().getTypeStoreSizeInBits(EltTy); - if (size > 64) - return false; - if (VectorSize.getQuantity() != 8 && - (VectorSize.getQuantity() != 16 || NumElts == 1)) - return false; - return true; -} - -bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { - // Homogeneous aggregates for AAPCS-VFP must have base types of float, - // double, or 64-bit or 128-bit vectors. - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->getKind() == BuiltinType::Float || - BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) - return true; - } else if (const VectorType *VT = Ty->getAs<VectorType>()) { - unsigned VecSize = getContext().getTypeSize(VT); - if (VecSize == 64 || VecSize == 128) - return true; - } - return false; -} - -bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, - uint64_t Members) const { - return Members <= 4; -} - -bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { - // AAPCS32 says that the rule for whether something is a homogeneous - // aggregate is applied to the output of the data layout decision. So - // anything that doesn't affect the data layout also does not affect - // homogeneity. In particular, zero-length bitfields don't stop a struct - // being homogeneous. - return true; -} - -bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, - bool acceptHalf) const { - // Give precedence to user-specified calling conventions. - if (callConvention != llvm::CallingConv::C) - return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); - else - return (getABIKind() == AAPCS_VFP) || - (acceptHalf && (getABIKind() == AAPCS16_VFP)); -} - -Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - CharUnits SlotSize = CharUnits::fromQuantity(4); - - // Empty records are ignored for parameter passing purposes. - if (isEmptyRecord(getContext(), Ty, true)) { - VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); - auto *Load = CGF.Builder.CreateLoad(VAListAddr); - Address Addr = Address(Load, CGF.Int8Ty, SlotSize); - return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - } - - CharUnits TySize = getContext().getTypeSizeInChars(Ty); - CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty); - - // Use indirect if size of the illegal vector is bigger than 16 bytes. - bool IsIndirect = false; - const Type *Base = nullptr; - uint64_t Members = 0; - if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { - IsIndirect = true; - - // ARMv7k passes structs bigger than 16 bytes indirectly, in space - // allocated by the caller. - } else if (TySize > CharUnits::fromQuantity(16) && - getABIKind() == ARMABIInfo::AAPCS16_VFP && - !isHomogeneousAggregate(Ty, Base, Members)) { - IsIndirect = true; - - // Otherwise, bound the type's ABI alignment. - // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for - // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. - // Our callers should be prepared to handle an under-aligned address. - } else if (getABIKind() == ARMABIInfo::AAPCS_VFP || - getABIKind() == ARMABIInfo::AAPCS) { - TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); - TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8)); - } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { - // ARMv7k allows type alignment up to 16 bytes. - TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); - TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16)); - } else { - TyAlignForABI = CharUnits::fromQuantity(4); - } - - TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None); - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, - SlotSize, /*AllowHigherAlign*/ true); -} - -//===----------------------------------------------------------------------===// -// NVPTX ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class NVPTXTargetCodeGenInfo; - -class NVPTXABIInfo : public ABIInfo { - NVPTXTargetCodeGenInfo &CGInfo; - -public: - NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) - : ABIInfo(CGT), CGInfo(Info) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty) const; - - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - bool isUnsupportedType(QualType T) const; - ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; -}; - -class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { -public: - NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; - bool shouldEmitStaticExternCAliases() const override; - - llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { - // On the device side, surface reference is represented as an object handle - // in 64-bit integer. - return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); - } - - llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { - // On the device side, texture reference is represented as an object handle - // in 64-bit integer. - return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); - } - - bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, - LValue Src) const override { - emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); - return true; - } - - bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, - LValue Src) const override { - emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); - return true; - } - -private: - // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the - // resulting MDNode to the nvvm.annotations MDNode. - static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand); - - static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, - LValue Src) { - llvm::Value *Handle = nullptr; - llvm::Constant *C = - llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer()); - // Lookup `addrspacecast` through the constant pointer if any. - if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) - C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); - if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { - // Load the handle from the specific global variable using - // `nvvm.texsurf.handle.internal` intrinsic. - Handle = CGF.EmitRuntimeCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, - {GV->getType()}), - {GV}, "texsurf_handle"); - } else - Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); - CGF.EmitStoreOfScalar(Handle, Dst); - } -}; - -/// Checks if the type is unsupported directly by the current target. -bool NVPTXABIInfo::isUnsupportedType(QualType T) const { - ASTContext &Context = getContext(); - if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) - return true; - if (!Context.getTargetInfo().hasFloat128Type() && - (T->isFloat128Type() || - (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) - return true; - if (const auto *EIT = T->getAs<BitIntType>()) - return EIT->getNumBits() > - (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); - if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && - Context.getTypeSize(T) > 64U) - return true; - if (const auto *AT = T->getAsArrayTypeUnsafe()) - return isUnsupportedType(AT->getElementType()); - const auto *RT = T->getAs<RecordType>(); - if (!RT) - return false; - const RecordDecl *RD = RT->getDecl(); - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - for (const CXXBaseSpecifier &I : CXXRD->bases()) - if (isUnsupportedType(I.getType())) - return true; - - for (const FieldDecl *I : RD->fields()) - if (isUnsupportedType(I->getType())) - return true; - return false; -} - -/// Coerce the given type into an array with maximum allowed size of elements. -ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, - unsigned MaxSize) const { - // Alignment and Size are measured in bits. - const uint64_t Size = getContext().getTypeSize(Ty); - const uint64_t Alignment = getContext().getTypeAlign(Ty); - const unsigned Div = std::min<unsigned>(MaxSize, Alignment); - llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); - const uint64_t NumElements = (Size + Div - 1) / Div; - return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); -} - -ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (getContext().getLangOpts().OpenMP && - getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy)) - return coerceToIntArrayWithLimit(RetTy, 64); - - // note: this is different from default ABI - if (!RetTy->isScalarType()) - return ABIArgInfo::getDirect(); - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // Return aggregates type as indirect by value - if (isAggregateTypeForABI(Ty)) { - // Under CUDA device compilation, tex/surf builtin types are replaced with - // object types and passed directly. - if (getContext().getLangOpts().CUDAIsDevice) { - if (Ty->isCUDADeviceBuiltinSurfaceType()) - return ABIArgInfo::getDirect( - CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); - if (Ty->isCUDADeviceBuiltinTextureType()) - return ABIArgInfo::getDirect( - CGInfo.getCUDADeviceBuiltinTextureDeviceType()); - } - return getNaturalAlignIndirect(Ty, /* byval */ true); - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) { - if ((EIT->getNumBits() > 128) || - (!getContext().getTargetInfo().hasInt128Type() && - EIT->getNumBits() > 64)) - return getNaturalAlignIndirect(Ty, /* byval */ true); - } - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); -} - -void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - - // Always honor user-specified calling convention. - if (FI.getCallingConvention() != llvm::CallingConv::C) - return; - - FI.setEffectiveCallingConvention(getRuntimeCC()); -} - -Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - llvm_unreachable("NVPTX does not support varargs"); -} - -void NVPTXTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (GV->isDeclaration()) - return; - const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); - if (VD) { - if (M.getLangOpts().CUDA) { - if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) - addNVVMMetadata(GV, "surface", 1); - else if (VD->getType()->isCUDADeviceBuiltinTextureType()) - addNVVMMetadata(GV, "texture", 1); - return; - } - } - - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) return; - - llvm::Function *F = cast<llvm::Function>(GV); - - // Perform special handling in OpenCL mode - if (M.getLangOpts().OpenCL) { - // Use OpenCL function attributes to check for kernel functions - // By default, all functions are device functions - if (FD->hasAttr<OpenCLKernelAttr>()) { - // OpenCL __kernel functions get kernel metadata - // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1); - // And kernel functions are not subject to inlining - F->addFnAttr(llvm::Attribute::NoInline); - } - } - - // Perform special handling in CUDA mode. - if (M.getLangOpts().CUDA) { - // CUDA __global__ functions get a kernel metadata entry. Since - // __global__ functions cannot be called from the device, we do not - // need to set the noinline attribute. - if (FD->hasAttr<CUDAGlobalAttr>()) { - // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1); - } - if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) { - // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node - llvm::APSInt MaxThreads(32); - MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); - if (MaxThreads > 0) - addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); - - // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was - // not specified in __launch_bounds__ or if the user specified a 0 value, - // we don't have to add a PTX directive. - if (Attr->getMinBlocks()) { - llvm::APSInt MinBlocks(32); - MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); - if (MinBlocks > 0) - // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node - addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); - } - } - } -} - -void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, - StringRef Name, int Operand) { - llvm::Module *M = GV->getParent(); - llvm::LLVMContext &Ctx = M->getContext(); - - // Get "nvvm.annotations" metadata node - llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); - - llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; - // Append metadata to nvvm.annotations - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); -} - -bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { - return false; -} -} - -//===----------------------------------------------------------------------===// -// SystemZ ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class SystemZABIInfo : public ABIInfo { - bool HasVector; - bool IsSoftFloatABI; - -public: - SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF) - : ABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {} - - bool isPromotableIntegerTypeForABI(QualType Ty) const; - bool isCompoundType(QualType Ty) const; - bool isVectorArgumentType(QualType Ty) const; - bool isFPArgumentType(QualType Ty) const; - QualType GetSingleElementType(QualType Ty) const; - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType ArgTy) const; - - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { - // These are used for speeding up the search for a visible vector ABI. - mutable bool HasVisibleVecABIFlag = false; - mutable std::set<const Type *> SeenTypes; - - // Returns true (the first time) if Ty is or found to make use of a vector - // type (e.g. as a function argument). - bool isVectorTypeBased(const Type *Ty) const; - -public: - SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI) - : TargetCodeGenInfo( - std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) { - SwiftInfo = - std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); - } - - // The vector ABI is different when the vector facility is present and when - // a module e.g. defines an externally visible vector variable, a flag - // indicating a visible vector ABI is added. Eventually this will result in - // a GNU attribute indicating the vector ABI of the module. Ty is the type - // of a variable or function parameter that is globally visible. - void handleExternallyVisibleObjABI(const Type *Ty, - CodeGen::CodeGenModule &M) const { - if (!HasVisibleVecABIFlag && isVectorTypeBased(Ty)) { - M.getModule().addModuleFlag(llvm::Module::Warning, - "s390x-visible-vector-ABI", 1); - HasVisibleVecABIFlag = true; - } - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override { - if (!D) - return; - - // Check if the vector ABI becomes visible by an externally visible - // variable or function. - if (const auto *VD = dyn_cast<VarDecl>(D)) { - if (VD->isExternallyVisible()) - handleExternallyVisibleObjABI(VD->getType().getTypePtr(), M); - } - else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { - if (FD->isExternallyVisible()) - handleExternallyVisibleObjABI(FD->getType().getTypePtr(), M); - } - } - - llvm::Value *testFPKind(llvm::Value *V, unsigned BuiltinID, - CGBuilderTy &Builder, - CodeGenModule &CGM) const override { - assert(V->getType()->isFloatingPointTy() && "V should have an FP type."); - // Only use TDC in constrained FP mode. - if (!Builder.getIsFPConstrained()) - return nullptr; - - llvm::Type *Ty = V->getType(); - if (Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isFP128Ty()) { - llvm::Module &M = CGM.getModule(); - auto &Ctx = M.getContext(); - llvm::Function *TDCFunc = - llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::s390_tdc, Ty); - unsigned TDCBits = 0; - switch (BuiltinID) { - case Builtin::BI__builtin_isnan: - TDCBits = 0xf; - break; - case Builtin::BIfinite: - case Builtin::BI__finite: - case Builtin::BIfinitef: - case Builtin::BI__finitef: - case Builtin::BIfinitel: - case Builtin::BI__finitel: - case Builtin::BI__builtin_isfinite: - TDCBits = 0xfc0; - break; - case Builtin::BI__builtin_isinf: - TDCBits = 0x30; - break; - default: - break; - } - if (TDCBits) - return Builder.CreateCall( - TDCFunc, - {V, llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), TDCBits)}); - } - return nullptr; - } -}; -} - -bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // Promotable integer types are required to be promoted by the ABI. - if (ABIInfo::isPromotableIntegerTypeForABI(Ty)) - return true; - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() < 64) - return true; - - // 32-bit values must also be promoted. - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) - switch (BT->getKind()) { - case BuiltinType::Int: - case BuiltinType::UInt: - return true; - default: - return false; - } - return false; -} - -bool SystemZABIInfo::isCompoundType(QualType Ty) const { - return (Ty->isAnyComplexType() || - Ty->isVectorType() || - isAggregateTypeForABI(Ty)); -} - -bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { - return (HasVector && - Ty->isVectorType() && - getContext().getTypeSize(Ty) <= 128); -} - -bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { - if (IsSoftFloatABI) - return false; - - if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) - switch (BT->getKind()) { - case BuiltinType::Float: - case BuiltinType::Double: - return true; - default: - return false; - } - - return false; -} - -QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { - const RecordType *RT = Ty->getAs<RecordType>(); - - if (RT && RT->isStructureOrClassType()) { - const RecordDecl *RD = RT->getDecl(); - QualType Found; - - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - for (const auto &I : CXXRD->bases()) { - QualType Base = I.getType(); - - // Empty bases don't affect things either way. - if (isEmptyRecord(getContext(), Base, true)) - continue; - - if (!Found.isNull()) - return Ty; - Found = GetSingleElementType(Base); - } - - // Check the fields. - for (const auto *FD : RD->fields()) { - // Unlike isSingleElementStruct(), empty structure and array fields - // do count. So do anonymous bitfields that aren't zero-sized. - - // Like isSingleElementStruct(), ignore C++20 empty data members. - if (FD->hasAttr<NoUniqueAddressAttr>() && - isEmptyRecord(getContext(), FD->getType(), true)) - continue; - - // Unlike isSingleElementStruct(), arrays do not count. - // Nested structures still do though. - if (!Found.isNull()) - return Ty; - Found = GetSingleElementType(FD->getType()); - } - - // Unlike isSingleElementStruct(), trailing padding is allowed. - // An 8-byte aligned struct s { float f; } is passed as a double. - if (!Found.isNull()) - return Found; - } - - return Ty; -} - -Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - // Assume that va_list type is correct; should be pointer to LLVM type: - // struct { - // i64 __gpr; - // i64 __fpr; - // i8 *__overflow_arg_area; - // i8 *__reg_save_area; - // }; - - // Every non-vector argument occupies 8 bytes and is passed by preference - // in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are - // always passed on the stack. - const SystemZTargetCodeGenInfo &SZCGI = - static_cast<const SystemZTargetCodeGenInfo &>( - CGT.getCGM().getTargetCodeGenInfo()); - Ty = getContext().getCanonicalType(Ty); - auto TyInfo = getContext().getTypeInfoInChars(Ty); - llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty); - llvm::Type *DirectTy = ArgTy; - ABIArgInfo AI = classifyArgumentType(Ty); - bool IsIndirect = AI.isIndirect(); - bool InFPRs = false; - bool IsVector = false; - CharUnits UnpaddedSize; - CharUnits DirectAlign; - SZCGI.handleExternallyVisibleObjABI(Ty.getTypePtr(), CGT.getCGM()); - if (IsIndirect) { - DirectTy = llvm::PointerType::getUnqual(DirectTy); - UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8); - } else { - if (AI.getCoerceToType()) - ArgTy = AI.getCoerceToType(); - InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy())); - IsVector = ArgTy->isVectorTy(); - UnpaddedSize = TyInfo.Width; - DirectAlign = TyInfo.Align; - } - CharUnits PaddedSize = CharUnits::fromQuantity(8); - if (IsVector && UnpaddedSize > PaddedSize) - PaddedSize = CharUnits::fromQuantity(16); - assert((UnpaddedSize <= PaddedSize) && "Invalid argument size."); - - CharUnits Padding = (PaddedSize - UnpaddedSize); - - llvm::Type *IndexTy = CGF.Int64Ty; - llvm::Value *PaddedSizeV = - llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity()); - - if (IsVector) { - // Work out the address of a vector argument on the stack. - // Vector arguments are always passed in the high bits of a - // single (8 byte) or double (16 byte) stack slot. - Address OverflowArgAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); - Address OverflowArgArea = - Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), - CGF.Int8Ty, TyInfo.Align); - Address MemAddr = - CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr"); - - // Update overflow_arg_area_ptr pointer - llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP( - OverflowArgArea.getElementType(), OverflowArgArea.getPointer(), - PaddedSizeV, "overflow_arg_area"); - CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); - - return MemAddr; - } - - assert(PaddedSize.getQuantity() == 8); - - unsigned MaxRegs, RegCountField, RegSaveIndex; - CharUnits RegPadding; - if (InFPRs) { - MaxRegs = 4; // Maximum of 4 FPR arguments - RegCountField = 1; // __fpr - RegSaveIndex = 16; // save offset for f0 - RegPadding = CharUnits(); // floats are passed in the high bits of an FPR - } else { - MaxRegs = 5; // Maximum of 5 GPR arguments - RegCountField = 0; // __gpr - RegSaveIndex = 2; // save offset for r2 - RegPadding = Padding; // values are passed in the low bits of a GPR - } - - Address RegCountPtr = - CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr"); - llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); - llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); - llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, - "fits_in_regs"); - - llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); - llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); - llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); - CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); - - // Emit code to load the value if it was passed in registers. - CGF.EmitBlock(InRegBlock); - - // Work out the address of an argument register. - llvm::Value *ScaledRegCount = - CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count"); - llvm::Value *RegBase = - llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity() - + RegPadding.getQuantity()); - llvm::Value *RegOffset = - CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); - Address RegSaveAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr"); - llvm::Value *RegSaveArea = - CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); - Address RawRegAddr( - CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, RegOffset, "raw_reg_addr"), - CGF.Int8Ty, PaddedSize); - Address RegAddr = - CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr"); - - // Update the register count - llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1); - llvm::Value *NewRegCount = - CGF.Builder.CreateAdd(RegCount, One, "reg_count"); - CGF.Builder.CreateStore(NewRegCount, RegCountPtr); - CGF.EmitBranch(ContBlock); - - // Emit code to load the value if it was passed in memory. - CGF.EmitBlock(InMemBlock); - - // Work out the address of a stack argument. - Address OverflowArgAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); - Address OverflowArgArea = - Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), - CGF.Int8Ty, PaddedSize); - Address RawMemAddr = - CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr"); - Address MemAddr = - CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr"); - - // Update overflow_arg_area_ptr pointer - llvm::Value *NewOverflowArgArea = - CGF.Builder.CreateGEP(OverflowArgArea.getElementType(), - OverflowArgArea.getPointer(), PaddedSizeV, - "overflow_arg_area"); - CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); - CGF.EmitBranch(ContBlock); - - // Return the appropriate result. - CGF.EmitBlock(ContBlock); - Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, - "va_arg.addr"); - - if (IsIndirect) - ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), ArgTy, - TyInfo.Align); - - return ResAddr; -} - -ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - if (isVectorArgumentType(RetTy)) - return ABIArgInfo::getDirect(); - if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) - return getNaturalAlignIndirect(RetTy); - return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); -} - -ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { - // Handle the generic C++ ABI. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - // Integers and enums are extended to full register width. - if (isPromotableIntegerTypeForABI(Ty)) - return ABIArgInfo::getExtend(Ty); - - // Handle vector types and vector-like structure types. Note that - // as opposed to float-like structure types, we do not allow any - // padding for vector-like structures, so verify the sizes match. - uint64_t Size = getContext().getTypeSize(Ty); - QualType SingleElementTy = GetSingleElementType(Ty); - if (isVectorArgumentType(SingleElementTy) && - getContext().getTypeSize(SingleElementTy) == Size) - return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy)); - - // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly. - if (Size != 8 && Size != 16 && Size != 32 && Size != 64) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - - // Handle small structures. - if (const RecordType *RT = Ty->getAs<RecordType>()) { - // Structures with flexible arrays have variable length, so really - // fail the size test above. - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - - // The structure is passed as an unextended integer, a float, or a double. - llvm::Type *PassTy; - if (isFPArgumentType(SingleElementTy)) { - assert(Size == 32 || Size == 64); - if (Size == 32) - PassTy = llvm::Type::getFloatTy(getVMContext()); - else - PassTy = llvm::Type::getDoubleTy(getVMContext()); - } else - PassTy = llvm::IntegerType::get(getVMContext(), Size); - return ABIArgInfo::getDirect(PassTy); - } - - // Non-structure compounds are passed indirectly. - if (isCompoundType(Ty)) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - - return ABIArgInfo::getDirect(nullptr); -} - -void SystemZABIInfo::computeInfo(CGFunctionInfo &FI) const { - const SystemZTargetCodeGenInfo &SZCGI = - static_cast<const SystemZTargetCodeGenInfo &>( - CGT.getCGM().getTargetCodeGenInfo()); - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - unsigned Idx = 0; - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type); - if (FI.isVariadic() && Idx++ >= FI.getNumRequiredArgs()) - // Check if a vararg vector argument is passed, in which case the - // vector ABI becomes visible as the va_list could be passed on to - // other functions. - SZCGI.handleExternallyVisibleObjABI(I.type.getTypePtr(), CGT.getCGM()); - } -} - -bool SystemZTargetCodeGenInfo::isVectorTypeBased(const Type *Ty) const { - while (Ty->isPointerType() || Ty->isArrayType()) - Ty = Ty->getPointeeOrArrayElementType(); - if (!SeenTypes.insert(Ty).second) - return false; - if (Ty->isVectorType()) - return true; - if (const auto *RecordTy = Ty->getAs<RecordType>()) { - const RecordDecl *RD = RecordTy->getDecl(); - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - if (CXXRD->hasDefinition()) - for (const auto &I : CXXRD->bases()) - if (isVectorTypeBased(I.getType().getTypePtr())) - return true; - for (const auto *FD : RD->fields()) - if (isVectorTypeBased(FD->getType().getTypePtr())) - return true; - } - if (const auto *FT = Ty->getAs<FunctionType>()) - if (isVectorTypeBased(FT->getReturnType().getTypePtr())) - return true; - if (const FunctionProtoType *Proto = Ty->getAs<FunctionProtoType>()) - for (auto ParamType : Proto->getParamTypes()) - if (isVectorTypeBased(ParamType.getTypePtr())) - return true; - return false; -} - -//===----------------------------------------------------------------------===// -// MSP430 ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class MSP430ABIInfo : public DefaultABIInfo { - static ABIArgInfo complexArgInfo() { - ABIArgInfo Info = ABIArgInfo::getDirect(); - Info.setCanBeFlattened(false); - return Info; - } - -public: - MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const { - if (RetTy->isAnyComplexType()) - return complexArgInfo(); - - return DefaultABIInfo::classifyReturnType(RetTy); - } - - ABIArgInfo classifyArgumentType(QualType RetTy) const { - if (RetTy->isAnyComplexType()) - return complexArgInfo(); - - return DefaultABIInfo::classifyArgumentType(RetTy); - } - - // Just copy the original implementations because - // DefaultABIInfo::classify{Return,Argument}Type() are not virtual - void computeInfo(CGFunctionInfo &FI) const override { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - } - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override { - return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); - } -}; - -class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { -public: - MSP430TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {} - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; -}; - -} - -void MSP430TargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (GV->isDeclaration()) - return; - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>(); - if (!InterruptAttr) - return; - - // Handle 'interrupt' attribute: - llvm::Function *F = cast<llvm::Function>(GV); - - // Step 1: Set ISR calling convention. - F->setCallingConv(llvm::CallingConv::MSP430_INTR); - - // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attribute::NoInline); - F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); - } -} - -//===----------------------------------------------------------------------===// -// MIPS ABI Implementation. This works for both little-endian and -// big-endian variants. -//===----------------------------------------------------------------------===// - -namespace { -class MipsABIInfo : public ABIInfo { - bool IsO32; - const unsigned MinABIStackAlignInBytes, StackAlignInBytes; - void CoerceToIntArgs(uint64_t TySize, - SmallVectorImpl<llvm::Type *> &ArgList) const; - llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; - llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const; - llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; -public: - MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) : - ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8), - StackAlignInBytes(IsO32 ? 8 : 16) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const; - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - ABIArgInfo extendType(QualType Ty) const; -}; - -class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { - unsigned SizeOfUnwindException; -public: - MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) - : TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)), - SizeOfUnwindException(IsO32 ? 24 : 32) {} - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { - return 29; - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) return; - llvm::Function *Fn = cast<llvm::Function>(GV); - - if (FD->hasAttr<MipsLongCallAttr>()) - Fn->addFnAttr("long-call"); - else if (FD->hasAttr<MipsShortCallAttr>()) - Fn->addFnAttr("short-call"); - - // Other attributes do not have a meaning for declarations. - if (GV->isDeclaration()) - return; - - if (FD->hasAttr<Mips16Attr>()) { - Fn->addFnAttr("mips16"); - } - else if (FD->hasAttr<NoMips16Attr>()) { - Fn->addFnAttr("nomips16"); - } - - if (FD->hasAttr<MicroMipsAttr>()) - Fn->addFnAttr("micromips"); - else if (FD->hasAttr<NoMicroMipsAttr>()) - Fn->addFnAttr("nomicromips"); - - const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>(); - if (!Attr) - return; - - const char *Kind; - switch (Attr->getInterrupt()) { - case MipsInterruptAttr::eic: Kind = "eic"; break; - case MipsInterruptAttr::sw0: Kind = "sw0"; break; - case MipsInterruptAttr::sw1: Kind = "sw1"; break; - case MipsInterruptAttr::hw0: Kind = "hw0"; break; - case MipsInterruptAttr::hw1: Kind = "hw1"; break; - case MipsInterruptAttr::hw2: Kind = "hw2"; break; - case MipsInterruptAttr::hw3: Kind = "hw3"; break; - case MipsInterruptAttr::hw4: Kind = "hw4"; break; - case MipsInterruptAttr::hw5: Kind = "hw5"; break; - } - - Fn->addFnAttr("interrupt", Kind); - - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; - - unsigned getSizeOfUnwindException() const override { - return SizeOfUnwindException; - } -}; -} - -void MipsABIInfo::CoerceToIntArgs( - uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const { - llvm::IntegerType *IntTy = - llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); - - // Add (TySize / MinABIStackAlignInBytes) args of IntTy. - for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) - ArgList.push_back(IntTy); - - // If necessary, add one more integer type to ArgList. - unsigned R = TySize % (MinABIStackAlignInBytes * 8); - - if (R) - ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); -} - -// In N32/64, an aligned double precision floating point field is passed in -// a register. -llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { - SmallVector<llvm::Type*, 8> ArgList, IntArgList; - - if (IsO32) { - CoerceToIntArgs(TySize, ArgList); - return llvm::StructType::get(getVMContext(), ArgList); - } - - if (Ty->isComplexType()) - return CGT.ConvertType(Ty); - - const RecordType *RT = Ty->getAs<RecordType>(); - - // Unions/vectors are passed in integer registers. - if (!RT || !RT->isStructureOrClassType()) { - CoerceToIntArgs(TySize, ArgList); - return llvm::StructType::get(getVMContext(), ArgList); - } - - const RecordDecl *RD = RT->getDecl(); - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); - assert(!(TySize % 8) && "Size of structure must be multiple of 8."); - - uint64_t LastOffset = 0; - unsigned idx = 0; - llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); - - // Iterate over fields in the struct/class and check if there are any aligned - // double fields. - for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); - i != e; ++i, ++idx) { - const QualType Ty = i->getType(); - const BuiltinType *BT = Ty->getAs<BuiltinType>(); - - if (!BT || BT->getKind() != BuiltinType::Double) - continue; - - uint64_t Offset = Layout.getFieldOffset(idx); - if (Offset % 64) // Ignore doubles that are not aligned. - continue; - - // Add ((Offset - LastOffset) / 64) args of type i64. - for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) - ArgList.push_back(I64); - - // Add double type. - ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); - LastOffset = Offset + 64; - } - - CoerceToIntArgs(TySize - LastOffset, IntArgList); - ArgList.append(IntArgList.begin(), IntArgList.end()); - - return llvm::StructType::get(getVMContext(), ArgList); -} - -llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset, - uint64_t Offset) const { - if (OrigOffset + MinABIStackAlignInBytes > Offset) - return nullptr; - - return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); -} - -ABIArgInfo -MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - uint64_t OrigOffset = Offset; - uint64_t TySize = getContext().getTypeSize(Ty); - uint64_t Align = getContext().getTypeAlign(Ty) / 8; - - Align = std::clamp(Align, (uint64_t)MinABIStackAlignInBytes, - (uint64_t)StackAlignInBytes); - unsigned CurrOffset = llvm::alignTo(Offset, Align); - Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; - - if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { - // Ignore empty aggregates. - if (TySize == 0) - return ABIArgInfo::getIgnore(); - - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - Offset = OrigOffset + MinABIStackAlignInBytes; - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - } - - // If we have reached here, aggregates are passed directly by coercing to - // another structure type. Padding is inserted if the offset of the - // aggregate is unaligned. - ABIArgInfo ArgInfo = - ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, - getPaddingType(OrigOffset, CurrOffset)); - ArgInfo.setInReg(true); - return ArgInfo; - } - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // Make sure we pass indirectly things that are too large. - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > 128 || - (EIT->getNumBits() > 64 && - !getContext().getTargetInfo().hasInt128Type())) - return getNaturalAlignIndirect(Ty); - - // All integral types are promoted to the GPR width. - if (Ty->isIntegralOrEnumerationType()) - return extendType(Ty); - - return ABIArgInfo::getDirect( - nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); -} - -llvm::Type* -MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { - const RecordType *RT = RetTy->getAs<RecordType>(); - SmallVector<llvm::Type*, 8> RTList; - - if (RT && RT->isStructureOrClassType()) { - const RecordDecl *RD = RT->getDecl(); - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); - unsigned FieldCnt = Layout.getFieldCount(); - - // N32/64 returns struct/classes in floating point registers if the - // following conditions are met: - // 1. The size of the struct/class is no larger than 128-bit. - // 2. The struct/class has one or two fields all of which are floating - // point types. - // 3. The offset of the first field is zero (this follows what gcc does). - // - // Any other composite results are returned in integer registers. - // - if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { - RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); - for (; b != e; ++b) { - const BuiltinType *BT = b->getType()->getAs<BuiltinType>(); - - if (!BT || !BT->isFloatingPoint()) - break; - - RTList.push_back(CGT.ConvertType(b->getType())); - } - - if (b == e) - return llvm::StructType::get(getVMContext(), RTList, - RD->hasAttr<PackedAttr>()); - - RTList.clear(); - } - } - - CoerceToIntArgs(Size, RTList); - return llvm::StructType::get(getVMContext(), RTList); -} - -ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { - uint64_t Size = getContext().getTypeSize(RetTy); - - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - // O32 doesn't treat zero-sized structs differently from other structs. - // However, N32/N64 ignores zero sized return values. - if (!IsO32 && Size == 0) - return ABIArgInfo::getIgnore(); - - if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { - if (Size <= 128) { - if (RetTy->isAnyComplexType()) - return ABIArgInfo::getDirect(); - - // O32 returns integer vectors in registers and N32/N64 returns all small - // aggregates in registers. - if (!IsO32 || - (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { - ABIArgInfo ArgInfo = - ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); - ArgInfo.setInReg(true); - return ArgInfo; - } - } - - return getNaturalAlignIndirect(RetTy); - } - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - // Make sure we pass indirectly things that are too large. - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > 128 || - (EIT->getNumBits() > 64 && - !getContext().getTargetInfo().hasInt128Type())) - return getNaturalAlignIndirect(RetTy); - - if (isPromotableIntegerTypeForABI(RetTy)) - return ABIArgInfo::getExtend(RetTy); - - if ((RetTy->isUnsignedIntegerOrEnumerationType() || - RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32) - return ABIArgInfo::getSignExtend(RetTy); - - return ABIArgInfo::getDirect(); -} - -void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { - ABIArgInfo &RetInfo = FI.getReturnInfo(); - if (!getCXXABI().classifyReturnType(FI)) - RetInfo = classifyReturnType(FI.getReturnType()); - - // Check if a pointer to an aggregate is passed as a hidden argument. - uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; - - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, Offset); -} - -Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType OrigTy) const { - QualType Ty = OrigTy; - - // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64. - // Pointers are also promoted in the same way but this only matters for N32. - unsigned SlotSizeInBits = IsO32 ? 32 : 64; - unsigned PtrWidth = getTarget().getPointerWidth(LangAS::Default); - bool DidPromote = false; - if ((Ty->isIntegerType() && - getContext().getIntWidth(Ty) < SlotSizeInBits) || - (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) { - DidPromote = true; - Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits, - Ty->isSignedIntegerType()); - } - - auto TyInfo = getContext().getTypeInfoInChars(Ty); - - // The alignment of things in the argument area is never larger than - // StackAlignInBytes. - TyInfo.Align = - std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes)); - - // MinABIStackAlignInBytes is the size of argument slots on the stack. - CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); - - Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, - TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true); - - - // If there was a promotion, "unpromote" into a temporary. - // TODO: can we just use a pointer into a subset of the original slot? - if (DidPromote) { - Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); - llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); - - // Truncate down to the right width. - llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType() - : CGF.IntPtrTy); - llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); - if (OrigTy->isPointerType()) - V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); - - CGF.Builder.CreateStore(V, Temp); - Addr = Temp; - } - - return Addr; -} - -ABIArgInfo MipsABIInfo::extendType(QualType Ty) const { - int TySize = getContext().getTypeSize(Ty); - - // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. - if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) - return ABIArgInfo::getSignExtend(Ty); - - return ABIArgInfo::getExtend(Ty); -} - -bool -MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - // This information comes from gcc's implementation, which seems to - // as canonical as it gets. - - // Everything on MIPS is 4 bytes. Double-precision FP registers - // are aliased to pairs of single-precision FP registers. - llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); - - // 0-31 are the general purpose registers, $0 - $31. - // 32-63 are the floating-point registers, $f0 - $f31. - // 64 and 65 are the multiply/divide registers, $hi and $lo. - // 66 is the (notional, I think) register for signal-handler return. - AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); - - // 67-74 are the floating-point status registers, $fcc0 - $fcc7. - // They are one bit wide and ignored here. - - // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. - // (coprocessor 1 is the FP unit) - // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. - // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. - // 176-181 are the DSP accumulator registers. - AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); - return false; -} - -//===----------------------------------------------------------------------===// -// M68k ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class M68kTargetCodeGenInfo : public TargetCodeGenInfo { -public: - M68kTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; -}; - -} // namespace - -void M68kTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (const auto *attr = FD->getAttr<M68kInterruptAttr>()) { - // Handle 'interrupt' attribute: - llvm::Function *F = cast<llvm::Function>(GV); - - // Step 1: Set ISR calling convention. - F->setCallingConv(llvm::CallingConv::M68k_INTR); - - // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attribute::NoInline); - - // Step 3: Emit ISR vector alias. - unsigned Num = attr->getNumber() / 2; - llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, - "__isr_" + Twine(Num), F); - } - } -} - -//===----------------------------------------------------------------------===// -// AVR ABI Implementation. Documented at -// https://gcc.gnu.org/wiki/avr-gcc#Calling_Convention -// https://gcc.gnu.org/wiki/avr-gcc#Reduced_Tiny -//===----------------------------------------------------------------------===// - -namespace { -class AVRABIInfo : public DefaultABIInfo { -private: - // The total amount of registers can be used to pass parameters. It is 18 on - // AVR, or 6 on AVRTiny. - const unsigned ParamRegs; - // The total amount of registers can be used to pass return value. It is 8 on - // AVR, or 4 on AVRTiny. - const unsigned RetRegs; - -public: - AVRABIInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR) - : DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {} - - ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const { - // On AVR, a return struct with size less than or equals to 8 bytes is - // returned directly via registers R18-R25. On AVRTiny, a return struct - // with size less than or equals to 4 bytes is returned directly via - // registers R22-R25. - if (isAggregateTypeForABI(Ty) && - getContext().getTypeSize(Ty) <= RetRegs * 8) - return ABIArgInfo::getDirect(); - // A return value (struct or scalar) with larger size is returned via a - // stack slot, along with a pointer as the function's implicit argument. - if (getContext().getTypeSize(Ty) > RetRegs * 8) { - LargeRet = true; - return getNaturalAlignIndirect(Ty); - } - // An i8 return value should not be extended to i16, since AVR has 8-bit - // registers. - if (Ty->isIntegralOrEnumerationType() && getContext().getTypeSize(Ty) <= 8) - return ABIArgInfo::getDirect(); - // Otherwise we follow the default way which is compatible. - return DefaultABIInfo::classifyReturnType(Ty); - } - - ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegs) const { - unsigned TySize = getContext().getTypeSize(Ty); - - // An int8 type argument always costs two registers like an int16. - if (TySize == 8 && NumRegs >= 2) { - NumRegs -= 2; - return ABIArgInfo::getExtend(Ty); - } - - // If the argument size is an odd number of bytes, round up the size - // to the next even number. - TySize = llvm::alignTo(TySize, 16); - - // Any type including an array/struct type can be passed in rgisters, - // if there are enough registers left. - if (TySize <= NumRegs * 8) { - NumRegs -= TySize / 8; - return ABIArgInfo::getDirect(); - } - - // An argument is passed either completely in registers or completely in - // memory. Since there are not enough registers left, current argument - // and all other unprocessed arguments should be passed in memory. - // However we still need to return `ABIArgInfo::getDirect()` other than - // `ABIInfo::getNaturalAlignIndirect(Ty)`, otherwise an extra stack slot - // will be allocated, so the stack frame layout will be incompatible with - // avr-gcc. - NumRegs = 0; - return ABIArgInfo::getDirect(); - } - - void computeInfo(CGFunctionInfo &FI) const override { - // Decide the return type. - bool LargeRet = false; - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), LargeRet); - - // Decide each argument type. The total number of registers can be used for - // arguments depends on several factors: - // 1. Arguments of varargs functions are passed on the stack. This applies - // even to the named arguments. So no register can be used. - // 2. Total 18 registers can be used on avr and 6 ones on avrtiny. - // 3. If the return type is a struct with too large size, two registers - // (out of 18/6) will be cost as an implicit pointer argument. - unsigned NumRegs = ParamRegs; - if (FI.isVariadic()) - NumRegs = 0; - else if (LargeRet) - NumRegs -= 2; - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, NumRegs); - } -}; - -class AVRTargetCodeGenInfo : public TargetCodeGenInfo { -public: - AVRTargetCodeGenInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR) - : TargetCodeGenInfo(std::make_unique<AVRABIInfo>(CGT, NPR, NRR)) {} - - LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const override { - // Check if global/static variable is defined in address space - // 1~6 (__flash, __flash1, __flash2, __flash3, __flash4, __flash5) - // but not constant. - if (D) { - LangAS AS = D->getType().getAddressSpace(); - if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) && - toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified()) - CGM.getDiags().Report(D->getLocation(), - diag::err_verify_nonconst_addrspace) - << "__flash*"; - } - return TargetCodeGenInfo::getGlobalVarAddressSpace(CGM, D); - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - if (GV->isDeclaration()) - return; - const auto *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) return; - auto *Fn = cast<llvm::Function>(GV); - - if (FD->getAttr<AVRInterruptAttr>()) - Fn->addFnAttr("interrupt"); - - if (FD->getAttr<AVRSignalAttr>()) - Fn->addFnAttr("signal"); - } -}; -} - -//===----------------------------------------------------------------------===// -// TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. -// Currently subclassed only to implement custom OpenCL C function attribute -// handling. -//===----------------------------------------------------------------------===// - -namespace { - -class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo { -public: - TCETargetCodeGenInfo(CodeGenTypes &CGT) - : DefaultTargetCodeGenInfo(CGT) {} - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; -}; - -void TCETargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (GV->isDeclaration()) - return; - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) return; - - llvm::Function *F = cast<llvm::Function>(GV); - - if (M.getLangOpts().OpenCL) { - if (FD->hasAttr<OpenCLKernelAttr>()) { - // OpenCL C Kernel functions are not subject to inlining - F->addFnAttr(llvm::Attribute::NoInline); - const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>(); - if (Attr) { - // Convert the reqd_work_group_size() attributes to metadata. - llvm::LLVMContext &Context = F->getContext(); - llvm::NamedMDNode *OpenCLMetadata = - M.getModule().getOrInsertNamedMetadata( - "opencl.kernel_wg_size_info"); - - SmallVector<llvm::Metadata *, 5> Operands; - Operands.push_back(llvm::ConstantAsMetadata::get(F)); - - Operands.push_back( - llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( - M.Int32Ty, llvm::APInt(32, Attr->getXDim())))); - Operands.push_back( - llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( - M.Int32Ty, llvm::APInt(32, Attr->getYDim())))); - Operands.push_back( - llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( - M.Int32Ty, llvm::APInt(32, Attr->getZDim())))); - - // Add a boolean constant operand for "required" (true) or "hint" - // (false) for implementing the work_group_size_hint attr later. - // Currently always true as the hint is not yet implemented. - Operands.push_back( - llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context))); - OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands)); - } - } - } -} - -} - -//===----------------------------------------------------------------------===// -// Hexagon ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class HexagonABIInfo : public DefaultABIInfo { -public: - HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - -private: - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const; - - void computeInfo(CGFunctionInfo &FI) const override; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr, - QualType Ty) const; - Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr, - QualType Ty) const; - Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr, - QualType Ty) const; -}; - -class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { -public: - HexagonTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {} - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 29; - } - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &GCM) const override { - if (GV->isDeclaration()) - return; - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) - return; - } -}; - -} // namespace - -void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const { - unsigned RegsLeft = 6; - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, &RegsLeft); -} - -static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) { - assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits" - " through registers"); - - if (*RegsLeft == 0) - return false; - - if (Size <= 32) { - (*RegsLeft)--; - return true; - } - - if (2 <= (*RegsLeft & (~1U))) { - *RegsLeft = (*RegsLeft & (~1U)) - 2; - return true; - } - - // Next available register was r5 but candidate was greater than 32-bits so it - // has to go on the stack. However we still consume r5 - if (*RegsLeft == 1) - *RegsLeft = 0; - - return false; -} - -ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty, - unsigned *RegsLeft) const { - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 64) - HexagonAdjustRegsLeft(Size, RegsLeft); - - if (Size > 64 && Ty->isBitIntType()) - return getNaturalAlignIndirect(Ty, /*ByVal=*/true); - - return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect(); - } - - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - // Ignore empty records. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - uint64_t Size = getContext().getTypeSize(Ty); - unsigned Align = getContext().getTypeAlign(Ty); - - if (Size > 64) - return getNaturalAlignIndirect(Ty, /*ByVal=*/true); - - if (HexagonAdjustRegsLeft(Size, RegsLeft)) - Align = Size <= 32 ? 32 : 64; - if (Size <= Align) { - // Pass in the smallest viable integer type. - if (!llvm::isPowerOf2_64(Size)) - Size = llvm::NextPowerOf2(Size); - return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); - } - return DefaultABIInfo::classifyArgumentType(Ty); -} - -ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - const TargetInfo &T = CGT.getTarget(); - uint64_t Size = getContext().getTypeSize(RetTy); - - if (RetTy->getAs<VectorType>()) { - // HVX vectors are returned in vector registers or register pairs. - if (T.hasFeature("hvx")) { - assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b")); - uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8; - if (Size == VecSize || Size == 2*VecSize) - return ABIArgInfo::getDirectInReg(); - } - // Large vector types should be returned via memory. - if (Size > 64) - return getNaturalAlignIndirect(RetTy); - } - - if (!isAggregateTypeForABI(RetTy)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - if (Size > 64 && RetTy->isBitIntType()) - return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); - - return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect(); - } - - if (isEmptyRecord(getContext(), RetTy, true)) - return ABIArgInfo::getIgnore(); - - // Aggregates <= 8 bytes are returned in registers, other aggregates - // are returned indirectly. - if (Size <= 64) { - // Return in the smallest viable integer type. - if (!llvm::isPowerOf2_64(Size)) - Size = llvm::NextPowerOf2(Size); - return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); - } - return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); -} - -Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF, - Address VAListAddr, - QualType Ty) const { - // Load the overflow area pointer. - Address __overflow_area_pointer_p = - CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); - llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( - __overflow_area_pointer_p, "__overflow_area_pointer"); - - uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8; - if (Align > 4) { - // Alignment should be a power of 2. - assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!"); - - // overflow_arg_area = (overflow_arg_area + align - 1) & -align; - llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1); - - // Add offset to the current pointer to access the argument. - __overflow_area_pointer = - CGF.Builder.CreateGEP(CGF.Int8Ty, __overflow_area_pointer, Offset); - llvm::Value *AsInt = - CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); - - // Create a mask which should be "AND"ed - // with (overflow_arg_area + align - 1) - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align); - __overflow_area_pointer = CGF.Builder.CreateIntToPtr( - CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(), - "__overflow_area_pointer.align"); - } - - // Get the type of the argument from memory and bitcast - // overflow area pointer to the argument type. - llvm::Type *PTy = CGF.ConvertTypeForMem(Ty); - Address AddrTyped = CGF.Builder.CreateElementBitCast( - Address(__overflow_area_pointer, CGF.Int8Ty, - CharUnits::fromQuantity(Align)), - PTy); - - // Round up to the minimum stack alignment for varargs which is 4 bytes. - uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); - - __overflow_area_pointer = CGF.Builder.CreateGEP( - CGF.Int8Ty, __overflow_area_pointer, - llvm::ConstantInt::get(CGF.Int32Ty, Offset), - "__overflow_area_pointer.next"); - CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p); - - return AddrTyped; -} - -Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF, - Address VAListAddr, - QualType Ty) const { - // FIXME: Need to handle alignment - llvm::Type *BP = CGF.Int8PtrTy; - CGBuilderTy &Builder = CGF.Builder; - Address VAListAddrAsBPP = Builder.CreateElementBitCast(VAListAddr, BP, "ap"); - llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur"); - // Handle address alignment for type alignment > 32 bits - uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8; - if (TyAlign > 4) { - assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!"); - llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty); - AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1)); - AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1))); - Addr = Builder.CreateIntToPtr(AddrAsInt, BP); - } - Address AddrTyped = Builder.CreateElementBitCast( - Address(Addr, CGF.Int8Ty, CharUnits::fromQuantity(TyAlign)), - CGF.ConvertType(Ty)); - - uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); - llvm::Value *NextAddr = Builder.CreateGEP( - CGF.Int8Ty, Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next"); - Builder.CreateStore(NextAddr, VAListAddrAsBPP); - - return AddrTyped; -} - -Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF, - Address VAListAddr, - QualType Ty) const { - int ArgSize = CGF.getContext().getTypeSize(Ty) / 8; - - if (ArgSize > 8) - return EmitVAArgFromMemory(CGF, VAListAddr, Ty); - - // Here we have check if the argument is in register area or - // in overflow area. - // If the saved register area pointer + argsize rounded up to alignment > - // saved register area end pointer, argument is in overflow area. - unsigned RegsLeft = 6; - Ty = CGF.getContext().getCanonicalType(Ty); - (void)classifyArgumentType(Ty, &RegsLeft); - - llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); - llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); - llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); - llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); - - // Get rounded size of the argument.GCC does not allow vararg of - // size < 4 bytes. We follow the same logic here. - ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; - int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; - - // Argument may be in saved register area - CGF.EmitBlock(MaybeRegBlock); - - // Load the current saved register area pointer. - Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP( - VAListAddr, 0, "__current_saved_reg_area_pointer_p"); - llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad( - __current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer"); - - // Load the saved register area end pointer. - Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP( - VAListAddr, 1, "__saved_reg_area_end_pointer_p"); - llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad( - __saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer"); - - // If the size of argument is > 4 bytes, check if the stack - // location is aligned to 8 bytes - if (ArgAlign > 4) { - - llvm::Value *__current_saved_reg_area_pointer_int = - CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer, - CGF.Int32Ty); - - __current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd( - __current_saved_reg_area_pointer_int, - llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)), - "align_current_saved_reg_area_pointer"); - - __current_saved_reg_area_pointer_int = - CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int, - llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), - "align_current_saved_reg_area_pointer"); - - __current_saved_reg_area_pointer = - CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int, - __current_saved_reg_area_pointer->getType(), - "align_current_saved_reg_area_pointer"); - } - - llvm::Value *__new_saved_reg_area_pointer = - CGF.Builder.CreateGEP(CGF.Int8Ty, __current_saved_reg_area_pointer, - llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), - "__new_saved_reg_area_pointer"); - - llvm::Value *UsingStack = nullptr; - UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer, - __saved_reg_area_end_pointer); - - CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock); - - // Argument in saved register area - // Implement the block where argument is in register saved area - CGF.EmitBlock(InRegBlock); - - llvm::Type *PTy = CGF.ConvertType(Ty); - llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast( - __current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy)); - - CGF.Builder.CreateStore(__new_saved_reg_area_pointer, - __current_saved_reg_area_pointer_p); - - CGF.EmitBranch(ContBlock); - - // Argument in overflow area - // Implement the block where the argument is in overflow area. - CGF.EmitBlock(OnStackBlock); - - // Load the overflow area pointer - Address __overflow_area_pointer_p = - CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); - llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( - __overflow_area_pointer_p, "__overflow_area_pointer"); - - // Align the overflow area pointer according to the alignment of the argument - if (ArgAlign > 4) { - llvm::Value *__overflow_area_pointer_int = - CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); - - __overflow_area_pointer_int = - CGF.Builder.CreateAdd(__overflow_area_pointer_int, - llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1), - "align_overflow_area_pointer"); - - __overflow_area_pointer_int = - CGF.Builder.CreateAnd(__overflow_area_pointer_int, - llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), - "align_overflow_area_pointer"); - - __overflow_area_pointer = CGF.Builder.CreateIntToPtr( - __overflow_area_pointer_int, __overflow_area_pointer->getType(), - "align_overflow_area_pointer"); - } - - // Get the pointer for next argument in overflow area and store it - // to overflow area pointer. - llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP( - CGF.Int8Ty, __overflow_area_pointer, - llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), - "__overflow_area_pointer.next"); - - CGF.Builder.CreateStore(__new_overflow_area_pointer, - __overflow_area_pointer_p); - - CGF.Builder.CreateStore(__new_overflow_area_pointer, - __current_saved_reg_area_pointer_p); - - // Bitcast the overflow area pointer to the type of argument. - llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty); - llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast( - __overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy)); - - CGF.EmitBranch(ContBlock); - - // Get the correct pointer to load the variable argument - // Implement the ContBlock - CGF.EmitBlock(ContBlock); - - llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty); - llvm::Type *MemPTy = llvm::PointerType::getUnqual(MemTy); - llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr"); - ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock); - ArgAddr->addIncoming(__overflow_area_p, OnStackBlock); - - return Address(ArgAddr, MemTy, CharUnits::fromQuantity(ArgAlign)); -} - -Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - - if (getTarget().getTriple().isMusl()) - return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty); - - return EmitVAArgForHexagon(CGF, VAListAddr, Ty); -} - -//===----------------------------------------------------------------------===// -// Lanai ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { -class LanaiABIInfo : public DefaultABIInfo { -public: - LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - - bool shouldUseInReg(QualType Ty, CCState &State) const; - - void computeInfo(CGFunctionInfo &FI) const override { - CCState State(FI); - // Lanai uses 4 registers to pass arguments unless the function has the - // regparm attribute set. - if (FI.getHasRegParm()) { - State.FreeRegs = FI.getRegParm(); - } else { - State.FreeRegs = 4; - } - - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, State); - } - - ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; - ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; -}; -} // end anonymous namespace - -bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const { - unsigned Size = getContext().getTypeSize(Ty); - unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U; - - if (SizeInRegs == 0) - return false; - - if (SizeInRegs > State.FreeRegs) { - State.FreeRegs = 0; - return false; - } - - State.FreeRegs -= SizeInRegs; - - return true; -} - -ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, - CCState &State) const { - if (!ByVal) { - if (State.FreeRegs) { - --State.FreeRegs; // Non-byval indirects just use one pointer. - return getNaturalAlignIndirectInReg(Ty); - } - return getNaturalAlignIndirect(Ty, false); - } - - // Compute the byval alignment. - const unsigned MinABIStackAlignInBytes = 4; - unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, - /*Realign=*/TypeAlign > - MinABIStackAlignInBytes); -} - -ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, - CCState &State) const { - // Check with the C++ ABI first. - const RecordType *RT = Ty->getAs<RecordType>(); - if (RT) { - CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); - if (RAA == CGCXXABI::RAA_Indirect) { - return getIndirectResult(Ty, /*ByVal=*/false, State); - } else if (RAA == CGCXXABI::RAA_DirectInMemory) { - return getNaturalAlignIndirect(Ty, /*ByVal=*/true); - } - } - - if (isAggregateTypeForABI(Ty)) { - // Structures with flexible arrays are always indirect. - if (RT && RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, /*ByVal=*/true, State); - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - llvm::LLVMContext &LLVMContext = getVMContext(); - unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; - if (SizeInRegs <= State.FreeRegs) { - llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); - llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); - State.FreeRegs -= SizeInRegs; - return ABIArgInfo::getDirectInReg(Result); - } else { - State.FreeRegs = 0; - } - return getIndirectResult(Ty, true, State); - } - - // Treat an enum type as its underlying type. - if (const auto *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - bool InReg = shouldUseInReg(Ty, State); - - // Don't pass >64 bit integers in registers. - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > 64) - return getIndirectResult(Ty, /*ByVal=*/true, State); - - if (isPromotableIntegerTypeForABI(Ty)) { - if (InReg) - return ABIArgInfo::getDirectInReg(); - return ABIArgInfo::getExtend(Ty); - } - if (InReg) - return ABIArgInfo::getDirectInReg(); - return ABIArgInfo::getDirect(); -} - -namespace { -class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { -public: - LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {} -}; -} - -//===----------------------------------------------------------------------===// -// AMDGPU ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class AMDGPUABIInfo final : public DefaultABIInfo { -private: - static const unsigned MaxNumRegsForArgsRet = 16; - - unsigned numRegsForType(QualType Ty) const; - - bool isHomogeneousAggregateBaseType(QualType Ty) const override; - bool isHomogeneousAggregateSmallEnough(const Type *Base, - uint64_t Members) const override; - - // Coerce HIP scalar pointer arguments from generic pointers to global ones. - llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, - unsigned ToAS) const { - // Single value types. - auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); - if (PtrTy && PtrTy->getAddressSpace() == FromAS) - return llvm::PointerType::getWithSamePointeeType(PtrTy, ToAS); - return Ty; - } - -public: - explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : - DefaultABIInfo(CGT) {} - - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyKernelArgumentType(QualType Ty) const; - ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; - - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { - return true; -} - -bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( - const Type *Base, uint64_t Members) const { - uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; - - // Homogeneous Aggregates may occupy at most 16 registers. - return Members * NumRegs <= MaxNumRegsForArgsRet; -} - -/// Estimate number of registers the type will use when passed in registers. -unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { - unsigned NumRegs = 0; - - if (const VectorType *VT = Ty->getAs<VectorType>()) { - // Compute from the number of elements. The reported size is based on the - // in-memory size, which includes the padding 4th element for 3-vectors. - QualType EltTy = VT->getElementType(); - unsigned EltSize = getContext().getTypeSize(EltTy); - - // 16-bit element vectors should be passed as packed. - if (EltSize == 16) - return (VT->getNumElements() + 1) / 2; - - unsigned EltNumRegs = (EltSize + 31) / 32; - return EltNumRegs * VT->getNumElements(); - } - - if (const RecordType *RT = Ty->getAs<RecordType>()) { - const RecordDecl *RD = RT->getDecl(); - assert(!RD->hasFlexibleArrayMember()); - - for (const FieldDecl *Field : RD->fields()) { - QualType FieldTy = Field->getType(); - NumRegs += numRegsForType(FieldTy); - } - - return NumRegs; - } - - return (getContext().getTypeSize(Ty) + 31) / 32; -} - -void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { - llvm::CallingConv::ID CC = FI.getCallingConvention(); - - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - - unsigned NumRegsLeft = MaxNumRegsForArgsRet; - for (auto &Arg : FI.arguments()) { - if (CC == llvm::CallingConv::AMDGPU_KERNEL) { - Arg.info = classifyKernelArgumentType(Arg.type); - } else { - Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); - } - } -} - -Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - llvm_unreachable("AMDGPU does not support varargs"); -} - -ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { - if (isAggregateTypeForABI(RetTy)) { - // Records with non-trivial destructors/copy-constructors should not be - // returned by value. - if (!getRecordArgABI(RetTy, getCXXABI())) { - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), RetTy, true)) - return ABIArgInfo::getIgnore(); - - // Lower single-element structs to just return a regular value. - if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); - - if (const RecordType *RT = RetTy->getAs<RecordType>()) { - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return DefaultABIInfo::classifyReturnType(RetTy); - } - - // Pack aggregates <= 4 bytes into single VGPR or pair. - uint64_t Size = getContext().getTypeSize(RetTy); - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - - if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - - if (Size <= 64) { - llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); - } - - if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) - return ABIArgInfo::getDirect(); - } - } - - // Otherwise just do the default thing. - return DefaultABIInfo::classifyReturnType(RetTy); -} - -/// For kernels all parameters are really passed in a special buffer. It doesn't -/// make sense to pass anything byval, so everything must be direct. -ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - // TODO: Can we omit empty structs? - - if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - Ty = QualType(SeltTy, 0); - - llvm::Type *OrigLTy = CGT.ConvertType(Ty); - llvm::Type *LTy = OrigLTy; - if (getContext().getLangOpts().HIP) { - LTy = coerceKernelArgumentType( - OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), - /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); - } - - // FIXME: Should also use this for OpenCL, but it requires addressing the - // problem of kernels being called. - // - // FIXME: This doesn't apply the optimization of coercing pointers in structs - // to global address space when using byref. This would require implementing a - // new kind of coercion of the in-memory type when for indirect arguments. - if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && - isAggregateTypeForABI(Ty)) { - return ABIArgInfo::getIndirectAliased( - getContext().getTypeAlignInChars(Ty), - getContext().getTargetAddressSpace(LangAS::opencl_constant), - false /*Realign*/, nullptr /*Padding*/); - } - - // If we set CanBeFlattened to true, CodeGen will expand the struct to its - // individual elements, which confuses the Clover OpenCL backend; therefore we - // have to set it to false here. Other args of getDirect() are just defaults. - return ABIArgInfo::getDirect(LTy, 0, nullptr, false); -} - -ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, - unsigned &NumRegsLeft) const { - assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); - - Ty = useFirstFieldIfTransparentUnion(Ty); - - if (isAggregateTypeForABI(Ty)) { - // Records with non-trivial destructors/copy-constructors should not be - // passed by value. - if (auto RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - // Lower single-element structs to just pass a regular value. TODO: We - // could do reasonable-size multiple-element structs too, using getExpand(), - // though watch out for things like bitfields. - if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); - - if (const RecordType *RT = Ty->getAs<RecordType>()) { - const RecordDecl *RD = RT->getDecl(); - if (RD->hasFlexibleArrayMember()) - return DefaultABIInfo::classifyArgumentType(Ty); - } - - // Pack aggregates <= 8 bytes into single VGPR or pair. - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 64) { - unsigned NumRegs = (Size + 31) / 32; - NumRegsLeft -= std::min(NumRegsLeft, NumRegs); - - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - - if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - - // XXX: Should this be i64 instead, and should the limit increase? - llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); - } - - if (NumRegsLeft > 0) { - unsigned NumRegs = numRegsForType(Ty); - if (NumRegsLeft >= NumRegs) { - NumRegsLeft -= NumRegs; - return ABIArgInfo::getDirect(); - } - } - } - - // Otherwise just do the default thing. - ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); - if (!ArgInfo.isIndirect()) { - unsigned NumRegs = numRegsForType(Ty); - NumRegsLeft -= std::min(NumRegs, NumRegsLeft); - } - - return ArgInfo; -} - -class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { -public: - AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {} - - void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, - CodeGenModule &CGM) const; - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; - unsigned getOpenCLKernelCallingConv() const override; - - llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, - llvm::PointerType *T, QualType QT) const override; - - LangAS getASTAllocaAddressSpace() const override { - return getLangASFromTargetAS( - getABIInfo().getDataLayout().getAllocaAddrSpace()); - } - LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const override; - llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, - SyncScope Scope, - llvm::AtomicOrdering Ordering, - llvm::LLVMContext &Ctx) const override; - llvm::Function * - createEnqueuedBlockKernel(CodeGenFunction &CGF, - llvm::Function *BlockInvokeFunc, - llvm::Type *BlockTy) const override; - bool shouldEmitStaticExternCAliases() const override; - void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; -}; -} - -static bool requiresAMDGPUProtectedVisibility(const Decl *D, - llvm::GlobalValue *GV) { - if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) - return false; - - return D->hasAttr<OpenCLKernelAttr>() || - (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || - (isa<VarDecl>(D) && - (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || - cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || - cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())); -} - -void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( - const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { - const auto *ReqdWGS = - M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); - const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); - - const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); - if (ReqdWGS || FlatWGS) { - unsigned Min = 0; - unsigned Max = 0; - if (FlatWGS) { - Min = FlatWGS->getMin() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue(); - Max = FlatWGS->getMax() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue(); - } - if (ReqdWGS && Min == 0 && Max == 0) - Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); - - if (Min != 0) { - assert(Min <= Max && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel || IsHIPKernel) { - // By default, restrict the maximum size to a value specified by - // --gpu-max-threads-per-block=n or its default value for HIP. - const unsigned OpenCLDefaultMaxWorkGroupSize = 256; - const unsigned DefaultMaxWorkGroupSize = - IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize - : M.getLangOpts().GPUMaxThreadsPerBlock; - std::string AttrVal = - std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); - F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); - } - - if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { - unsigned Min = - Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); - unsigned Max = Attr->getMax() ? Attr->getMax() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue() - : 0; - - if (Min != 0) { - assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min); - if (Max != 0) - AttrVal = AttrVal + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-waves-per-eu", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); - } - - if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { - unsigned NumSGPR = Attr->getNumSGPR(); - - if (NumSGPR != 0) - F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); - } - - if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { - uint32_t NumVGPR = Attr->getNumVGPR(); - - if (NumVGPR != 0) - F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); - } -} - -void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (requiresAMDGPUProtectedVisibility(D, GV)) { - GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); - GV->setDSOLocal(true); - } - - if (GV->isDeclaration()) - return; - - llvm::Function *F = dyn_cast<llvm::Function>(GV); - if (!F) - return; - - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (FD) - setFunctionDeclAttributes(FD, F, M); - - const bool IsHIPKernel = - M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>(); - const bool IsOpenMPkernel = - M.getLangOpts().OpenMPIsDevice && - (F->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL); - - // TODO: This should be moved to language specific attributes instead. - if (IsHIPKernel || IsOpenMPkernel) - F->addFnAttr("uniform-work-group-size", "true"); - - if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) - F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); - - if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) - F->addFnAttr("amdgpu-ieee", "false"); -} - -unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { - return llvm::CallingConv::AMDGPU_KERNEL; -} - -// Currently LLVM assumes null pointers always have value 0, -// which results in incorrectly transformed IR. Therefore, instead of -// emitting null pointers in private and local address spaces, a null -// pointer in generic address space is emitted which is casted to a -// pointer in local or private address space. -llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( - const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, - QualType QT) const { - if (CGM.getContext().getTargetNullPointerValue(QT) == 0) - return llvm::ConstantPointerNull::get(PT); - - auto &Ctx = CGM.getContext(); - auto NPT = llvm::PointerType::getWithSamePointeeType( - PT, Ctx.getTargetAddressSpace(LangAS::opencl_generic)); - return llvm::ConstantExpr::getAddrSpaceCast( - llvm::ConstantPointerNull::get(NPT), PT); -} - -LangAS -AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const { - assert(!CGM.getLangOpts().OpenCL && - !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && - "Address space agnostic languages only"); - LangAS DefaultGlobalAS = getLangASFromTargetAS( - CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); - if (!D) - return DefaultGlobalAS; - - LangAS AddrSpace = D->getType().getAddressSpace(); - assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); - if (AddrSpace != LangAS::Default) - return AddrSpace; - - // Only promote to address space 4 if VarDecl has constant initialization. - if (CGM.isTypeConstant(D->getType(), false) && - D->hasConstantInitialization()) { - if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) - return *ConstAS; - } - return DefaultGlobalAS; -} - -llvm::SyncScope::ID -AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, - SyncScope Scope, - llvm::AtomicOrdering Ordering, - llvm::LLVMContext &Ctx) const { - std::string Name; - switch (Scope) { - case SyncScope::HIPSingleThread: - Name = "singlethread"; - break; - case SyncScope::HIPWavefront: - case SyncScope::OpenCLSubGroup: - Name = "wavefront"; - break; - case SyncScope::HIPWorkgroup: - case SyncScope::OpenCLWorkGroup: - Name = "workgroup"; - break; - case SyncScope::HIPAgent: - case SyncScope::OpenCLDevice: - Name = "agent"; - break; - case SyncScope::HIPSystem: - case SyncScope::OpenCLAllSVMDevices: - Name = ""; - break; - } - - if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { - if (!Name.empty()) - Name = Twine(Twine(Name) + Twine("-")).str(); - - Name = Twine(Twine(Name) + Twine("one-as")).str(); - } - - return Ctx.getOrInsertSyncScopeID(Name); -} - -bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { - return false; -} - -void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( - const FunctionType *&FT) const { - FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); -} - -//===----------------------------------------------------------------------===// -// SPARC v8 ABI Implementation. -// Based on the SPARC Compliance Definition version 2.4.1. -// -// Ensures that complex values are passed in registers. -// -namespace { -class SparcV8ABIInfo : public DefaultABIInfo { -public: - SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - -private: - ABIArgInfo classifyReturnType(QualType RetTy) const; - void computeInfo(CGFunctionInfo &FI) const override; -}; -} // end anonymous namespace - - -ABIArgInfo -SparcV8ABIInfo::classifyReturnType(QualType Ty) const { - if (Ty->isAnyComplexType()) { - return ABIArgInfo::getDirect(); - } - else { - return DefaultABIInfo::classifyReturnType(Ty); - } -} - -void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const { - - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &Arg : FI.arguments()) - Arg.info = classifyArgumentType(Arg.type); -} - -namespace { -class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { -public: - SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {} - - llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - int Offset; - if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType())) - Offset = 12; - else - Offset = 8; - return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, - llvm::ConstantInt::get(CGF.Int32Ty, Offset)); - } - - llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - int Offset; - if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType())) - Offset = -12; - else - Offset = -8; - return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, - llvm::ConstantInt::get(CGF.Int32Ty, Offset)); - } -}; -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// SPARC v9 ABI Implementation. -// Based on the SPARC Compliance Definition version 2.4.1. -// -// Function arguments a mapped to a nominal "parameter array" and promoted to -// registers depending on their type. Each argument occupies 8 or 16 bytes in -// the array, structs larger than 16 bytes are passed indirectly. -// -// One case requires special care: -// -// struct mixed { -// int i; -// float f; -// }; -// -// When a struct mixed is passed by value, it only occupies 8 bytes in the -// parameter array, but the int is passed in an integer register, and the float -// is passed in a floating point register. This is represented as two arguments -// with the LLVM IR inreg attribute: -// -// declare void f(i32 inreg %i, float inreg %f) -// -// The code generator will only allocate 4 bytes from the parameter array for -// the inreg arguments. All other arguments are allocated a multiple of 8 -// bytes. -// -namespace { -class SparcV9ABIInfo : public ABIInfo { -public: - SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} - -private: - ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const; - void computeInfo(CGFunctionInfo &FI) const override; - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - // Coercion type builder for structs passed in registers. The coercion type - // serves two purposes: - // - // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned' - // in registers. - // 2. Expose aligned floating point elements as first-level elements, so the - // code generator knows to pass them in floating point registers. - // - // We also compute the InReg flag which indicates that the struct contains - // aligned 32-bit floats. - // - struct CoerceBuilder { - llvm::LLVMContext &Context; - const llvm::DataLayout &DL; - SmallVector<llvm::Type*, 8> Elems; - uint64_t Size; - bool InReg; - - CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl) - : Context(c), DL(dl), Size(0), InReg(false) {} - - // Pad Elems with integers until Size is ToSize. - void pad(uint64_t ToSize) { - assert(ToSize >= Size && "Cannot remove elements"); - if (ToSize == Size) - return; - - // Finish the current 64-bit word. - uint64_t Aligned = llvm::alignTo(Size, 64); - if (Aligned > Size && Aligned <= ToSize) { - Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); - Size = Aligned; - } - - // Add whole 64-bit words. - while (Size + 64 <= ToSize) { - Elems.push_back(llvm::Type::getInt64Ty(Context)); - Size += 64; - } - - // Final in-word padding. - if (Size < ToSize) { - Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size)); - Size = ToSize; - } - } - - // Add a floating point element at Offset. - void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) { - // Unaligned floats are treated as integers. - if (Offset % Bits) - return; - // The InReg flag is only required if there are any floats < 64 bits. - if (Bits < 64) - InReg = true; - pad(Offset); - Elems.push_back(Ty); - Size = Offset + Bits; - } - - // Add a struct type to the coercion type, starting at Offset (in bits). - void addStruct(uint64_t Offset, llvm::StructType *StrTy) { - const llvm::StructLayout *Layout = DL.getStructLayout(StrTy); - for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) { - llvm::Type *ElemTy = StrTy->getElementType(i); - uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i); - switch (ElemTy->getTypeID()) { - case llvm::Type::StructTyID: - addStruct(ElemOffset, cast<llvm::StructType>(ElemTy)); - break; - case llvm::Type::FloatTyID: - addFloat(ElemOffset, ElemTy, 32); - break; - case llvm::Type::DoubleTyID: - addFloat(ElemOffset, ElemTy, 64); - break; - case llvm::Type::FP128TyID: - addFloat(ElemOffset, ElemTy, 128); - break; - case llvm::Type::PointerTyID: - if (ElemOffset % 64 == 0) { - pad(ElemOffset); - Elems.push_back(ElemTy); - Size += 64; - } - break; - default: - break; - } - } - } - - // Check if Ty is a usable substitute for the coercion type. - bool isUsableType(llvm::StructType *Ty) const { - return llvm::ArrayRef(Elems) == Ty->elements(); - } - - // Get the coercion type as a literal struct type. - llvm::Type *getType() const { - if (Elems.size() == 1) - return Elems.front(); - else - return llvm::StructType::get(Context, Elems); - } - }; -}; -} // end anonymous namespace - -ABIArgInfo -SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { - if (Ty->isVoidType()) - return ABIArgInfo::getIgnore(); - - uint64_t Size = getContext().getTypeSize(Ty); - - // Anything too big to fit in registers is passed with an explicit indirect - // pointer / sret pointer. - if (Size > SizeLimit) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // Integer types smaller than a register are extended. - if (Size < 64 && Ty->isIntegerType()) - return ABIArgInfo::getExtend(Ty); - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() < 64) - return ABIArgInfo::getExtend(Ty); - - // Other non-aggregates go in registers. - if (!isAggregateTypeForABI(Ty)) - return ABIArgInfo::getDirect(); - - // If a C++ object has either a non-trivial copy constructor or a non-trivial - // destructor, it is passed with an explicit indirect pointer / sret pointer. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); - - // This is a small aggregate type that should be passed in registers. - // Build a coercion type from the LLVM struct type. - llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)); - if (!StrTy) - return ABIArgInfo::getDirect(); - - CoerceBuilder CB(getVMContext(), getDataLayout()); - CB.addStruct(0, StrTy); - CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64)); - - // Try to use the original type for coercion. - llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); - - if (CB.InReg) - return ABIArgInfo::getDirectInReg(CoerceTy); - else - return ABIArgInfo::getDirect(CoerceTy); -} - -Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - ABIArgInfo AI = classifyType(Ty, 16 * 8); - llvm::Type *ArgTy = CGT.ConvertType(Ty); - if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) - AI.setCoerceToType(ArgTy); - - CharUnits SlotSize = CharUnits::fromQuantity(8); - - CGBuilderTy &Builder = CGF.Builder; - Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"), - getVAListElementType(CGF), SlotSize); - llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); - - auto TypeInfo = getContext().getTypeInfoInChars(Ty); - - Address ArgAddr = Address::invalid(); - CharUnits Stride; - switch (AI.getKind()) { - case ABIArgInfo::Expand: - case ABIArgInfo::CoerceAndExpand: - case ABIArgInfo::InAlloca: - llvm_unreachable("Unsupported ABI kind for va_arg"); - - case ABIArgInfo::Extend: { - Stride = SlotSize; - CharUnits Offset = SlotSize - TypeInfo.Width; - ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend"); - break; - } - - case ABIArgInfo::Direct: { - auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); - Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize); - ArgAddr = Addr; - break; - } - - case ABIArgInfo::Indirect: - case ABIArgInfo::IndirectAliased: - Stride = SlotSize; - ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect"); - ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy, - TypeInfo.Align); - break; - - case ABIArgInfo::Ignore: - return Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeInfo.Align); - } - - // Update VAList. - Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next"); - Builder.CreateStore(NextPtr.getPointer(), VAListAddr); - - return Builder.CreateElementBitCast(ArgAddr, ArgTy, "arg.addr"); -} - -void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const { - FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8); - for (auto &I : FI.arguments()) - I.info = classifyType(I.type, 16 * 8); -} - -namespace { -class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { -public: - SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {} - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 14; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override; - - llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, - llvm::ConstantInt::get(CGF.Int32Ty, 8)); - } - - llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, - llvm::ConstantInt::get(CGF.Int32Ty, -8)); - } -}; -} // end anonymous namespace - -bool -SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - // This is calculated from the LLVM and GCC tables and verified - // against gcc output. AFAIK all ABIs use the same encoding. - - CodeGen::CGBuilderTy &Builder = CGF.Builder; - - llvm::IntegerType *i8 = CGF.Int8Ty; - llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); - llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); - - // 0-31: the 8-byte general-purpose registers - AssignToArrayRange(Builder, Address, Eight8, 0, 31); - - // 32-63: f0-31, the 4-byte floating-point registers - AssignToArrayRange(Builder, Address, Four8, 32, 63); - - // Y = 64 - // PSR = 65 - // WIM = 66 - // TBR = 67 - // PC = 68 - // NPC = 69 - // FSR = 70 - // CSR = 71 - AssignToArrayRange(Builder, Address, Eight8, 64, 71); - - // 72-87: d0-15, the 8-byte floating-point registers - AssignToArrayRange(Builder, Address, Eight8, 72, 87); - - return false; -} - -// ARC ABI implementation. -namespace { - -class ARCABIInfo : public DefaultABIInfo { -public: - using DefaultABIInfo::DefaultABIInfo; - -private: - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const { - if (!State.FreeRegs) - return; - if (Info.isIndirect() && Info.getInReg()) - State.FreeRegs--; - else if (Info.isDirect() && Info.getInReg()) { - unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32; - if (sz < State.FreeRegs) - State.FreeRegs -= sz; - else - State.FreeRegs = 0; - } - } - - void computeInfo(CGFunctionInfo &FI) const override { - CCState State(FI); - // ARC uses 8 registers to pass arguments. - State.FreeRegs = 8; - - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - updateState(FI.getReturnInfo(), FI.getReturnType(), State); - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State.FreeRegs); - updateState(I.info, I.type, State); - } - } - - ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const; - ABIArgInfo getIndirectByValue(QualType Ty) const; - ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const; - ABIArgInfo classifyReturnType(QualType RetTy) const; -}; - -class ARCTargetCodeGenInfo : public TargetCodeGenInfo { -public: - ARCTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {} -}; - - -ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const { - return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) : - getNaturalAlignIndirect(Ty, false); -} - -ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { - // Compute the byval alignment. - const unsigned MinABIStackAlignInBytes = 4; - unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, - TypeAlign > MinABIStackAlignInBytes); -} - -Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, - getContext().getTypeInfoInChars(Ty), - CharUnits::fromQuantity(4), true); -} - -ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, - uint8_t FreeRegs) const { - // Handle the generic C++ ABI. - const RecordType *RT = Ty->getAs<RecordType>(); - if (RT) { - CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); - if (RAA == CGCXXABI::RAA_Indirect) - return getIndirectByRef(Ty, FreeRegs > 0); - - if (RAA == CGCXXABI::RAA_DirectInMemory) - return getIndirectByValue(Ty); - } - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32; - - if (isAggregateTypeForABI(Ty)) { - // Structures with flexible arrays are always indirect. - if (RT && RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectByValue(Ty); - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - llvm::LLVMContext &LLVMContext = getVMContext(); - - llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); - llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); - - return FreeRegs >= SizeInRegs ? - ABIArgInfo::getDirectInReg(Result) : - ABIArgInfo::getDirect(Result, 0, nullptr, false); - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > 64) - return getIndirectByValue(Ty); - - return isPromotableIntegerTypeForABI(Ty) - ? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) - : ABIArgInfo::getExtend(Ty)) - : (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() - : ABIArgInfo::getDirect()); -} - -ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isAnyComplexType()) - return ABIArgInfo::getDirectInReg(); - - // Arguments of size > 4 registers are indirect. - auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32; - if (RetSize > 4) - return getIndirectByRef(RetTy, /*HasFreeRegs*/ true); - - return DefaultABIInfo::classifyReturnType(RetTy); -} - -} // End anonymous namespace. - -//===----------------------------------------------------------------------===// -// XCore ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -/// A SmallStringEnc instance is used to build up the TypeString by passing -/// it by reference between functions that append to it. -typedef llvm::SmallString<128> SmallStringEnc; - -/// TypeStringCache caches the meta encodings of Types. -/// -/// The reason for caching TypeStrings is two fold: -/// 1. To cache a type's encoding for later uses; -/// 2. As a means to break recursive member type inclusion. -/// -/// A cache Entry can have a Status of: -/// NonRecursive: The type encoding is not recursive; -/// Recursive: The type encoding is recursive; -/// Incomplete: An incomplete TypeString; -/// IncompleteUsed: An incomplete TypeString that has been used in a -/// Recursive type encoding. -/// -/// A NonRecursive entry will have all of its sub-members expanded as fully -/// as possible. Whilst it may contain types which are recursive, the type -/// itself is not recursive and thus its encoding may be safely used whenever -/// the type is encountered. -/// -/// A Recursive entry will have all of its sub-members expanded as fully as -/// possible. The type itself is recursive and it may contain other types which -/// are recursive. The Recursive encoding must not be used during the expansion -/// of a recursive type's recursive branch. For simplicity the code uses -/// IncompleteCount to reject all usage of Recursive encodings for member types. -/// -/// An Incomplete entry is always a RecordType and only encodes its -/// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and -/// are placed into the cache during type expansion as a means to identify and -/// handle recursive inclusion of types as sub-members. If there is recursion -/// the entry becomes IncompleteUsed. -/// -/// During the expansion of a RecordType's members: -/// -/// If the cache contains a NonRecursive encoding for the member type, the -/// cached encoding is used; -/// -/// If the cache contains a Recursive encoding for the member type, the -/// cached encoding is 'Swapped' out, as it may be incorrect, and... -/// -/// If the member is a RecordType, an Incomplete encoding is placed into the -/// cache to break potential recursive inclusion of itself as a sub-member; -/// -/// Once a member RecordType has been expanded, its temporary incomplete -/// entry is removed from the cache. If a Recursive encoding was swapped out -/// it is swapped back in; -/// -/// If an incomplete entry is used to expand a sub-member, the incomplete -/// entry is marked as IncompleteUsed. The cache keeps count of how many -/// IncompleteUsed entries it currently contains in IncompleteUsedCount; -/// -/// If a member's encoding is found to be a NonRecursive or Recursive viz: -/// IncompleteUsedCount==0, the member's encoding is added to the cache. -/// Else the member is part of a recursive type and thus the recursion has -/// been exited too soon for the encoding to be correct for the member. -/// -class TypeStringCache { - enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed}; - struct Entry { - std::string Str; // The encoded TypeString for the type. - enum Status State; // Information about the encoding in 'Str'. - std::string Swapped; // A temporary place holder for a Recursive encoding - // during the expansion of RecordType's members. - }; - std::map<const IdentifierInfo *, struct Entry> Map; - unsigned IncompleteCount; // Number of Incomplete entries in the Map. - unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map. -public: - TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {} - void addIncomplete(const IdentifierInfo *ID, std::string StubEnc); - bool removeIncomplete(const IdentifierInfo *ID); - void addIfComplete(const IdentifierInfo *ID, StringRef Str, - bool IsRecursive); - StringRef lookupStr(const IdentifierInfo *ID); -}; - -/// TypeString encodings for enum & union fields must be order. -/// FieldEncoding is a helper for this ordering process. -class FieldEncoding { - bool HasName; - std::string Enc; -public: - FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} - StringRef str() { return Enc; } - bool operator<(const FieldEncoding &rhs) const { - if (HasName != rhs.HasName) return HasName; - return Enc < rhs.Enc; - } -}; - -class XCoreABIInfo : public DefaultABIInfo { -public: - XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { - mutable TypeStringCache TSC; - void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - const CodeGen::CodeGenModule &M) const; - -public: - XCoreTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {} - void emitTargetMetadata(CodeGen::CodeGenModule &CGM, - const llvm::MapVector<GlobalDecl, StringRef> - &MangledDeclNames) const override; -}; - -} // End anonymous namespace. - -// TODO: this implementation is likely now redundant with the default -// EmitVAArg. -Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - CGBuilderTy &Builder = CGF.Builder; - - // Get the VAList. - CharUnits SlotSize = CharUnits::fromQuantity(4); - Address AP = Address(Builder.CreateLoad(VAListAddr), - getVAListElementType(CGF), SlotSize); - - // Handle the argument. - ABIArgInfo AI = classifyArgumentType(Ty); - CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty); - llvm::Type *ArgTy = CGT.ConvertType(Ty); - if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) - AI.setCoerceToType(ArgTy); - llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); - - Address Val = Address::invalid(); - CharUnits ArgSize = CharUnits::Zero(); - switch (AI.getKind()) { - case ABIArgInfo::Expand: - case ABIArgInfo::CoerceAndExpand: - case ABIArgInfo::InAlloca: - llvm_unreachable("Unsupported ABI kind for va_arg"); - case ABIArgInfo::Ignore: - Val = Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeAlign); - ArgSize = CharUnits::Zero(); - break; - case ABIArgInfo::Extend: - case ABIArgInfo::Direct: - Val = Builder.CreateElementBitCast(AP, ArgTy); - ArgSize = CharUnits::fromQuantity( - getDataLayout().getTypeAllocSize(AI.getCoerceToType())); - ArgSize = ArgSize.alignTo(SlotSize); - break; - case ABIArgInfo::Indirect: - case ABIArgInfo::IndirectAliased: - Val = Builder.CreateElementBitCast(AP, ArgPtrTy); - Val = Address(Builder.CreateLoad(Val), ArgTy, TypeAlign); - ArgSize = SlotSize; - break; - } - - // Increment the VAList. - if (!ArgSize.isZero()) { - Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize); - Builder.CreateStore(APN.getPointer(), VAListAddr); - } - - return Val; -} - -/// During the expansion of a RecordType, an incomplete TypeString is placed -/// into the cache as a means to identify and break recursion. -/// If there is a Recursive encoding in the cache, it is swapped out and will -/// be reinserted by removeIncomplete(). -/// All other types of encoding should have been used rather than arriving here. -void TypeStringCache::addIncomplete(const IdentifierInfo *ID, - std::string StubEnc) { - if (!ID) - return; - Entry &E = Map[ID]; - assert( (E.Str.empty() || E.State == Recursive) && - "Incorrectly use of addIncomplete"); - assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()"); - E.Swapped.swap(E.Str); // swap out the Recursive - E.Str.swap(StubEnc); - E.State = Incomplete; - ++IncompleteCount; -} - -/// Once the RecordType has been expanded, the temporary incomplete TypeString -/// must be removed from the cache. -/// If a Recursive was swapped out by addIncomplete(), it will be replaced. -/// Returns true if the RecordType was defined recursively. -bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) { - if (!ID) - return false; - auto I = Map.find(ID); - assert(I != Map.end() && "Entry not present"); - Entry &E = I->second; - assert( (E.State == Incomplete || - E.State == IncompleteUsed) && - "Entry must be an incomplete type"); - bool IsRecursive = false; - if (E.State == IncompleteUsed) { - // We made use of our Incomplete encoding, thus we are recursive. - IsRecursive = true; - --IncompleteUsedCount; - } - if (E.Swapped.empty()) - Map.erase(I); - else { - // Swap the Recursive back. - E.Swapped.swap(E.Str); - E.Swapped.clear(); - E.State = Recursive; - } - --IncompleteCount; - return IsRecursive; -} - -/// Add the encoded TypeString to the cache only if it is NonRecursive or -/// Recursive (viz: all sub-members were expanded as fully as possible). -void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str, - bool IsRecursive) { - if (!ID || IncompleteUsedCount) - return; // No key or it is an incomplete sub-type so don't add. - Entry &E = Map[ID]; - if (IsRecursive && !E.Str.empty()) { - assert(E.State==Recursive && E.Str.size() == Str.size() && - "This is not the same Recursive entry"); - // The parent container was not recursive after all, so we could have used - // this Recursive sub-member entry after all, but we assumed the worse when - // we started viz: IncompleteCount!=0. - return; - } - assert(E.Str.empty() && "Entry already present"); - E.Str = Str.str(); - E.State = IsRecursive? Recursive : NonRecursive; -} - -/// Return a cached TypeString encoding for the ID. If there isn't one, or we -/// are recursively expanding a type (IncompleteCount != 0) and the cached -/// encoding is Recursive, return an empty StringRef. -StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { - if (!ID) - return StringRef(); // We have no key. - auto I = Map.find(ID); - if (I == Map.end()) - return StringRef(); // We have no encoding. - Entry &E = I->second; - if (E.State == Recursive && IncompleteCount) - return StringRef(); // We don't use Recursive encodings for member types. - - if (E.State == Incomplete) { - // The incomplete type is being used to break out of recursion. - E.State = IncompleteUsed; - ++IncompleteUsedCount; - } - return E.Str; -} - -/// The XCore ABI includes a type information section that communicates symbol -/// type information to the linker. The linker uses this information to verify -/// safety/correctness of things such as array bound and pointers et al. -/// The ABI only requires C (and XC) language modules to emit TypeStrings. -/// This type information (TypeString) is emitted into meta data for all global -/// symbols: definitions, declarations, functions & variables. -/// -/// The TypeString carries type, qualifier, name, size & value details. -/// Please see 'Tools Development Guide' section 2.16.2 for format details: -/// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf -/// The output is tested by test/CodeGen/xcore-stringtype.c. -/// -static bool getTypeString(SmallStringEnc &Enc, const Decl *D, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC); - -/// XCore uses emitTargetMD to emit TypeString metadata for global symbols. -void XCoreTargetCodeGenInfo::emitTargetMD( - const Decl *D, llvm::GlobalValue *GV, - const CodeGen::CodeGenModule &CGM) const { - SmallStringEnc Enc; - if (getTypeString(Enc, D, CGM, TSC)) { - llvm::LLVMContext &Ctx = CGM.getModule().getContext(); - llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), - llvm::MDString::get(Ctx, Enc.str())}; - llvm::NamedMDNode *MD = - CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); - } -} - -void XCoreTargetCodeGenInfo::emitTargetMetadata( - CodeGen::CodeGenModule &CGM, - const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const { - // Warning, new MangledDeclNames may be appended within this loop. - // We rely on MapVector insertions adding new elements to the end - // of the container. - for (unsigned I = 0; I != MangledDeclNames.size(); ++I) { - auto Val = *(MangledDeclNames.begin() + I); - llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second); - if (GV) { - const Decl *D = Val.first.getDecl()->getMostRecentDecl(); - emitTargetMD(D, GV, CGM); - } - } -} - -//===----------------------------------------------------------------------===// -// Base ABI and target codegen info implementation common between SPIR and -// SPIR-V. -//===----------------------------------------------------------------------===// - -namespace { -class CommonSPIRABIInfo : public DefaultABIInfo { -public: - CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); } - -private: - void setCCs(); -}; - -class SPIRVABIInfo : public CommonSPIRABIInfo { -public: - SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {} - void computeInfo(CGFunctionInfo &FI) const override; - -private: - ABIArgInfo classifyKernelArgumentType(QualType Ty) const; -}; -} // end anonymous namespace -namespace { -class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { -public: - CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {} - CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo) - : TargetCodeGenInfo(std::move(ABIInfo)) {} - - LangAS getASTAllocaAddressSpace() const override { - return getLangASFromTargetAS( - getABIInfo().getDataLayout().getAllocaAddrSpace()); - } - - unsigned getOpenCLKernelCallingConv() const override; -}; -class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { -public: - SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {} - void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; -}; -} // End anonymous namespace. - -void CommonSPIRABIInfo::setCCs() { - assert(getRuntimeCC() == llvm::CallingConv::C); - RuntimeCC = llvm::CallingConv::SPIR_FUNC; -} - -ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { - if (getContext().getLangOpts().CUDAIsDevice) { - // Coerce pointer arguments with default address space to CrossWorkGroup - // pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the - // SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space. - llvm::Type *LTy = CGT.ConvertType(Ty); - auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default); - auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device); - auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy); - if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) { - LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS); - return ABIArgInfo::getDirect(LTy, 0, nullptr, false); - } - - // Force copying aggregate type in kernel arguments by value when - // compiling CUDA targeting SPIR-V. This is required for the object - // copied to be valid on the device. - // This behavior follows the CUDA spec - // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, - // and matches the NVPTX implementation. - if (isAggregateTypeForABI(Ty)) - return getNaturalAlignIndirect(Ty, /* byval */ true); - } - return classifyArgumentType(Ty); -} - -void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const { - // The logic is same as in DefaultABIInfo with an exception on the kernel - // arguments handling. - llvm::CallingConv::ID CC = FI.getCallingConvention(); - - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - - for (auto &I : FI.arguments()) { - if (CC == llvm::CallingConv::SPIR_KERNEL) { - I.info = classifyKernelArgumentType(I.type); - } else { - I.info = classifyArgumentType(I.type); - } - } -} - -namespace clang { -namespace CodeGen { -void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { - if (CGM.getTarget().getTriple().isSPIRV()) - SPIRVABIInfo(CGM.getTypes()).computeInfo(FI); - else - CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI); -} -} -} - -unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { - return llvm::CallingConv::SPIR_KERNEL; -} - -void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( - const FunctionType *&FT) const { - // Convert HIP kernels to SPIR-V kernels. - if (getABIInfo().getContext().getLangOpts().HIP) { - FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); - return; - } -} - -static bool appendType(SmallStringEnc &Enc, QualType QType, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC); - -/// Helper function for appendRecordType(). -/// Builds a SmallVector containing the encoded field types in declaration -/// order. -static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE, - const RecordDecl *RD, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC) { - for (const auto *Field : RD->fields()) { - SmallStringEnc Enc; - Enc += "m("; - Enc += Field->getName(); - Enc += "){"; - if (Field->isBitField()) { - Enc += "b("; - llvm::raw_svector_ostream OS(Enc); - OS << Field->getBitWidthValue(CGM.getContext()); - Enc += ':'; - } - if (!appendType(Enc, Field->getType(), CGM, TSC)) - return false; - if (Field->isBitField()) - Enc += ')'; - Enc += '}'; - FE.emplace_back(!Field->getName().empty(), Enc); - } - return true; -} - -/// Appends structure and union types to Enc and adds encoding to cache. -/// Recursively calls appendType (via extractFieldType) for each field. -/// Union types have their fields ordered according to the ABI. -static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC, const IdentifierInfo *ID) { - // Append the cached TypeString if we have one. - StringRef TypeString = TSC.lookupStr(ID); - if (!TypeString.empty()) { - Enc += TypeString; - return true; - } - - // Start to emit an incomplete TypeString. - size_t Start = Enc.size(); - Enc += (RT->isUnionType()? 'u' : 's'); - Enc += '('; - if (ID) - Enc += ID->getName(); - Enc += "){"; - - // We collect all encoded fields and order as necessary. - bool IsRecursive = false; - const RecordDecl *RD = RT->getDecl()->getDefinition(); - if (RD && !RD->field_empty()) { - // An incomplete TypeString stub is placed in the cache for this RecordType - // so that recursive calls to this RecordType will use it whilst building a - // complete TypeString for this RecordType. - SmallVector<FieldEncoding, 16> FE; - std::string StubEnc(Enc.substr(Start).str()); - StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString. - TSC.addIncomplete(ID, std::move(StubEnc)); - if (!extractFieldType(FE, RD, CGM, TSC)) { - (void) TSC.removeIncomplete(ID); - return false; - } - IsRecursive = TSC.removeIncomplete(ID); - // The ABI requires unions to be sorted but not structures. - // See FieldEncoding::operator< for sort algorithm. - if (RT->isUnionType()) - llvm::sort(FE); - // We can now complete the TypeString. - unsigned E = FE.size(); - for (unsigned I = 0; I != E; ++I) { - if (I) - Enc += ','; - Enc += FE[I].str(); - } - } - Enc += '}'; - TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive); - return true; -} - -/// Appends enum types to Enc and adds the encoding to the cache. -static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, - TypeStringCache &TSC, - const IdentifierInfo *ID) { - // Append the cached TypeString if we have one. - StringRef TypeString = TSC.lookupStr(ID); - if (!TypeString.empty()) { - Enc += TypeString; - return true; - } - - size_t Start = Enc.size(); - Enc += "e("; - if (ID) - Enc += ID->getName(); - Enc += "){"; - - // We collect all encoded enumerations and order them alphanumerically. - if (const EnumDecl *ED = ET->getDecl()->getDefinition()) { - SmallVector<FieldEncoding, 16> FE; - for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E; - ++I) { - SmallStringEnc EnumEnc; - EnumEnc += "m("; - EnumEnc += I->getName(); - EnumEnc += "){"; - I->getInitVal().toString(EnumEnc); - EnumEnc += '}'; - FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); - } - llvm::sort(FE); - unsigned E = FE.size(); - for (unsigned I = 0; I != E; ++I) { - if (I) - Enc += ','; - Enc += FE[I].str(); - } - } - Enc += '}'; - TSC.addIfComplete(ID, Enc.substr(Start), false); - return true; -} - -/// Appends type's qualifier to Enc. -/// This is done prior to appending the type's encoding. -static void appendQualifier(SmallStringEnc &Enc, QualType QT) { - // Qualifiers are emitted in alphabetical order. - static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"}; - int Lookup = 0; - if (QT.isConstQualified()) - Lookup += 1<<0; - if (QT.isRestrictQualified()) - Lookup += 1<<1; - if (QT.isVolatileQualified()) - Lookup += 1<<2; - Enc += Table[Lookup]; -} - -/// Appends built-in types to Enc. -static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) { - const char *EncType; - switch (BT->getKind()) { - case BuiltinType::Void: - EncType = "0"; - break; - case BuiltinType::Bool: - EncType = "b"; - break; - case BuiltinType::Char_U: - EncType = "uc"; - break; - case BuiltinType::UChar: - EncType = "uc"; - break; - case BuiltinType::SChar: - EncType = "sc"; - break; - case BuiltinType::UShort: - EncType = "us"; - break; - case BuiltinType::Short: - EncType = "ss"; - break; - case BuiltinType::UInt: - EncType = "ui"; - break; - case BuiltinType::Int: - EncType = "si"; - break; - case BuiltinType::ULong: - EncType = "ul"; - break; - case BuiltinType::Long: - EncType = "sl"; - break; - case BuiltinType::ULongLong: - EncType = "ull"; - break; - case BuiltinType::LongLong: - EncType = "sll"; - break; - case BuiltinType::Float: - EncType = "ft"; - break; - case BuiltinType::Double: - EncType = "d"; - break; - case BuiltinType::LongDouble: - EncType = "ld"; - break; - default: - return false; - } - Enc += EncType; - return true; -} - -/// Appends a pointer encoding to Enc before calling appendType for the pointee. -static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC) { - Enc += "p("; - if (!appendType(Enc, PT->getPointeeType(), CGM, TSC)) - return false; - Enc += ')'; - return true; -} - -/// Appends array encoding to Enc before calling appendType for the element. -static bool appendArrayType(SmallStringEnc &Enc, QualType QT, - const ArrayType *AT, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC, StringRef NoSizeEnc) { - if (AT->getSizeModifier() != ArrayType::Normal) - return false; - Enc += "a("; - if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) - CAT->getSize().toStringUnsigned(Enc); - else - Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "". - Enc += ':'; - // The Qualifiers should be attached to the type rather than the array. - appendQualifier(Enc, QT); - if (!appendType(Enc, AT->getElementType(), CGM, TSC)) - return false; - Enc += ')'; - return true; -} - -/// Appends a function encoding to Enc, calling appendType for the return type -/// and the arguments. -static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC) { - Enc += "f{"; - if (!appendType(Enc, FT->getReturnType(), CGM, TSC)) - return false; - Enc += "}("; - if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) { - // N.B. we are only interested in the adjusted param types. - auto I = FPT->param_type_begin(); - auto E = FPT->param_type_end(); - if (I != E) { - do { - if (!appendType(Enc, *I, CGM, TSC)) - return false; - ++I; - if (I != E) - Enc += ','; - } while (I != E); - if (FPT->isVariadic()) - Enc += ",va"; - } else { - if (FPT->isVariadic()) - Enc += "va"; - else - Enc += '0'; - } - } - Enc += ')'; - return true; -} - -/// Handles the type's qualifier before dispatching a call to handle specific -/// type encodings. -static bool appendType(SmallStringEnc &Enc, QualType QType, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC) { - - QualType QT = QType.getCanonicalType(); - - if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) - // The Qualifiers should be attached to the type rather than the array. - // Thus we don't call appendQualifier() here. - return appendArrayType(Enc, QT, AT, CGM, TSC, ""); - - appendQualifier(Enc, QT); - - if (const BuiltinType *BT = QT->getAs<BuiltinType>()) - return appendBuiltinType(Enc, BT); - - if (const PointerType *PT = QT->getAs<PointerType>()) - return appendPointerType(Enc, PT, CGM, TSC); - - if (const EnumType *ET = QT->getAs<EnumType>()) - return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier()); - - if (const RecordType *RT = QT->getAsStructureType()) - return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); - - if (const RecordType *RT = QT->getAsUnionType()) - return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); - - if (const FunctionType *FT = QT->getAs<FunctionType>()) - return appendFunctionType(Enc, FT, CGM, TSC); - - return false; -} - -static bool getTypeString(SmallStringEnc &Enc, const Decl *D, - const CodeGen::CodeGenModule &CGM, - TypeStringCache &TSC) { - if (!D) - return false; - - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { - if (FD->getLanguageLinkage() != CLanguageLinkage) - return false; - return appendType(Enc, FD->getType(), CGM, TSC); - } - - if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { - if (VD->getLanguageLinkage() != CLanguageLinkage) - return false; - QualType QT = VD->getType().getCanonicalType(); - if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) { - // Global ArrayTypes are given a size of '*' if the size is unknown. - // The Qualifiers should be attached to the type rather than the array. - // Thus we don't call appendQualifier() here. - return appendArrayType(Enc, QT, AT, CGM, TSC, "*"); - } - return appendType(Enc, QT, CGM, TSC); - } - return false; -} - -//===----------------------------------------------------------------------===// -// RISCV ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { -class RISCVABIInfo : public DefaultABIInfo { -private: - // Size of the integer ('x') registers in bits. - unsigned XLen; - // Size of the floating point ('f') registers in bits. Note that the target - // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target - // with soft float ABI has FLen==0). - unsigned FLen; - static const int NumArgGPRs = 8; - static const int NumArgFPRs = 8; - bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, - llvm::Type *&Field1Ty, - CharUnits &Field1Off, - llvm::Type *&Field2Ty, - CharUnits &Field2Off) const; - -public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} - - // DefaultABIInfo's classifyReturnType and classifyArgumentType are - // non-virtual, but computeInfo is virtual, so we overload it. - void computeInfo(CGFunctionInfo &FI) const override; - - ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, - int &ArgFPRsLeft) const; - ABIArgInfo classifyReturnType(QualType RetTy) const; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - ABIArgInfo extendType(QualType Ty) const; - - bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, - CharUnits &Field1Off, llvm::Type *&Field2Ty, - CharUnits &Field2Off, int &NeededArgGPRs, - int &NeededArgFPRs) const; - ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, - CharUnits Field1Off, - llvm::Type *Field2Ty, - CharUnits Field2Off) const; -}; -} // end anonymous namespace - -void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { - QualType RetTy = FI.getReturnType(); - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(RetTy); - - // IsRetIndirect is true if classifyArgumentType indicated the value should - // be passed indirect, or if the type size is a scalar greater than 2*XLen - // and not a complex type with elements <= FLen. e.g. fp128 is passed direct - // in LLVM IR, relying on the backend lowering code to rewrite the argument - // list and pass indirectly on RV32. - bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; - if (!IsRetIndirect && RetTy->isScalarType() && - getContext().getTypeSize(RetTy) > (2 * XLen)) { - if (RetTy->isComplexType() && FLen) { - QualType EltTy = RetTy->castAs<ComplexType>()->getElementType(); - IsRetIndirect = getContext().getTypeSize(EltTy) > FLen; - } else { - // This is a normal scalar > 2*XLen, such as fp128 on RV32. - IsRetIndirect = true; - } - } - - int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; - int ArgFPRsLeft = FLen ? NumArgFPRs : 0; - int NumFixedArgs = FI.getNumRequiredArgs(); - - int ArgNum = 0; - for (auto &ArgInfo : FI.arguments()) { - bool IsFixed = ArgNum < NumFixedArgs; - ArgInfo.info = - classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); - ArgNum++; - } -} - -// Returns true if the struct is a potential candidate for the floating point -// calling convention. If this function returns true, the caller is -// responsible for checking that if there is only a single field then that -// field is a float. -bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, - llvm::Type *&Field1Ty, - CharUnits &Field1Off, - llvm::Type *&Field2Ty, - CharUnits &Field2Off) const { - bool IsInt = Ty->isIntegralOrEnumerationType(); - bool IsFloat = Ty->isRealFloatingType(); - - if (IsInt || IsFloat) { - uint64_t Size = getContext().getTypeSize(Ty); - if (IsInt && Size > XLen) - return false; - // Can't be eligible if larger than the FP registers. Half precision isn't - // currently supported on RISC-V and the ABI hasn't been confirmed, so - // default to the integer ABI in that case. - if (IsFloat && (Size > FLen || Size < 32)) - return false; - // Can't be eligible if an integer type was already found (int+int pairs - // are not eligible). - if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) - return false; - if (!Field1Ty) { - Field1Ty = CGT.ConvertType(Ty); - Field1Off = CurOff; - return true; - } - if (!Field2Ty) { - Field2Ty = CGT.ConvertType(Ty); - Field2Off = CurOff; - return true; - } - return false; - } - - if (auto CTy = Ty->getAs<ComplexType>()) { - if (Field1Ty) - return false; - QualType EltTy = CTy->getElementType(); - if (getContext().getTypeSize(EltTy) > FLen) - return false; - Field1Ty = CGT.ConvertType(EltTy); - Field1Off = CurOff; - Field2Ty = Field1Ty; - Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); - return true; - } - - if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { - uint64_t ArraySize = ATy->getSize().getZExtValue(); - QualType EltTy = ATy->getElementType(); - CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); - for (uint64_t i = 0; i < ArraySize; ++i) { - bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, - Field1Off, Field2Ty, Field2Off); - if (!Ret) - return false; - CurOff += EltSize; - } - return true; - } - - if (const auto *RTy = Ty->getAs<RecordType>()) { - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are not eligible for the FP calling convention. - if (getRecordArgABI(Ty, CGT.getCXXABI())) - return false; - if (isEmptyRecord(getContext(), Ty, true)) - return true; - const RecordDecl *RD = RTy->getDecl(); - // Unions aren't eligible unless they're empty (which is caught above). - if (RD->isUnion()) - return false; - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - for (const CXXBaseSpecifier &B : CXXRD->bases()) { - const auto *BDecl = - cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl()); - CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); - bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, - Field1Ty, Field1Off, Field2Ty, - Field2Off); - if (!Ret) - return false; - } - } - int ZeroWidthBitFieldCount = 0; - for (const FieldDecl *FD : RD->fields()) { - uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); - QualType QTy = FD->getType(); - if (FD->isBitField()) { - unsigned BitWidth = FD->getBitWidthValue(getContext()); - // Allow a bitfield with a type greater than XLen as long as the - // bitwidth is XLen or less. - if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen) - QTy = getContext().getIntTypeForBitwidth(XLen, false); - if (BitWidth == 0) { - ZeroWidthBitFieldCount++; - continue; - } - } - - bool Ret = detectFPCCEligibleStructHelper( - QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), - Field1Ty, Field1Off, Field2Ty, Field2Off); - if (!Ret) - return false; - - // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp - // or int+fp structs, but are ignored for a struct with an fp field and - // any number of zero-width bitfields. - if (Field2Ty && ZeroWidthBitFieldCount > 0) - return false; - } - return Field1Ty != nullptr; - } - - return false; -} - -// Determine if a struct is eligible for passing according to the floating -// point calling convention (i.e., when flattened it contains a single fp -// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and -// NeededArgGPRs are incremented appropriately. -bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, - CharUnits &Field1Off, - llvm::Type *&Field2Ty, - CharUnits &Field2Off, - int &NeededArgGPRs, - int &NeededArgFPRs) const { - Field1Ty = nullptr; - Field2Ty = nullptr; - NeededArgGPRs = 0; - NeededArgFPRs = 0; - bool IsCandidate = detectFPCCEligibleStructHelper( - Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); - // Not really a candidate if we have a single int but no float. - if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) - return false; - if (!IsCandidate) - return false; - if (Field1Ty && Field1Ty->isFloatingPointTy()) - NeededArgFPRs++; - else if (Field1Ty) - NeededArgGPRs++; - if (Field2Ty && Field2Ty->isFloatingPointTy()) - NeededArgFPRs++; - else if (Field2Ty) - NeededArgGPRs++; - return true; -} - -// Call getCoerceAndExpand for the two-element flattened struct described by -// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an -// appropriate coerceToType and unpaddedCoerceToType. -ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( - llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, - CharUnits Field2Off) const { - SmallVector<llvm::Type *, 3> CoerceElts; - SmallVector<llvm::Type *, 2> UnpaddedCoerceElts; - if (!Field1Off.isZero()) - CoerceElts.push_back(llvm::ArrayType::get( - llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); - - CoerceElts.push_back(Field1Ty); - UnpaddedCoerceElts.push_back(Field1Ty); - - if (!Field2Ty) { - return ABIArgInfo::getCoerceAndExpand( - llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), - UnpaddedCoerceElts[0]); - } - - CharUnits Field2Align = - CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty)); - CharUnits Field1End = Field1Off + - CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); - CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); - - CharUnits Padding = CharUnits::Zero(); - if (Field2Off > Field2OffNoPadNoPack) - Padding = Field2Off - Field2OffNoPadNoPack; - else if (Field2Off != Field2Align && Field2Off > Field1End) - Padding = Field2Off - Field1End; - - bool IsPacked = !Field2Off.isMultipleOf(Field2Align); - - if (!Padding.isZero()) - CoerceElts.push_back(llvm::ArrayType::get( - llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); - - CoerceElts.push_back(Field2Ty); - UnpaddedCoerceElts.push_back(Field2Ty); - - auto CoerceToType = - llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); - auto UnpaddedCoerceToType = - llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); - - return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); -} - -ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - int &ArgGPRsLeft, - int &ArgFPRsLeft) const { - assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); - Ty = useFirstFieldIfTransparentUnion(Ty); - - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always passed indirectly. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - if (ArgGPRsLeft) - ArgGPRsLeft -= 1; - return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == - CGCXXABI::RAA_DirectInMemory); - } - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - uint64_t Size = getContext().getTypeSize(Ty); - - // Pass floating point values via FPRs if possible. - if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && - FLen >= Size && ArgFPRsLeft) { - ArgFPRsLeft--; - return ABIArgInfo::getDirect(); - } - - // Complex types for the hard float ABI must be passed direct rather than - // using CoerceAndExpand. - if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { - QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); - if (getContext().getTypeSize(EltTy) <= FLen) { - ArgFPRsLeft -= 2; - return ABIArgInfo::getDirect(); - } - } - - if (IsFixed && FLen && Ty->isStructureOrClassType()) { - llvm::Type *Field1Ty = nullptr; - llvm::Type *Field2Ty = nullptr; - CharUnits Field1Off = CharUnits::Zero(); - CharUnits Field2Off = CharUnits::Zero(); - int NeededArgGPRs = 0; - int NeededArgFPRs = 0; - bool IsCandidate = - detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, - NeededArgGPRs, NeededArgFPRs); - if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && - NeededArgFPRs <= ArgFPRsLeft) { - ArgGPRsLeft -= NeededArgGPRs; - ArgFPRsLeft -= NeededArgFPRs; - return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, - Field2Off); - } - } - - uint64_t NeededAlign = getContext().getTypeAlign(Ty); - // Determine the number of GPRs needed to pass the current argument - // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" - // register pairs, so may consume 3 registers. - int NeededArgGPRs = 1; - if (!IsFixed && NeededAlign == 2 * XLen) - NeededArgGPRs = 2 + (ArgGPRsLeft % 2); - else if (Size > XLen && Size <= 2 * XLen) - NeededArgGPRs = 2; - - if (NeededArgGPRs > ArgGPRsLeft) { - NeededArgGPRs = ArgGPRsLeft; - } - - ArgGPRsLeft -= NeededArgGPRs; - - if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // All integral types are promoted to XLen width - if (Size < XLen && Ty->isIntegralOrEnumerationType()) { - return extendType(Ty); - } - - if (const auto *EIT = Ty->getAs<BitIntType>()) { - if (EIT->getNumBits() < XLen) - return extendType(Ty); - if (EIT->getNumBits() > 128 || - (!getContext().getTargetInfo().hasInt128Type() && - EIT->getNumBits() > 64)) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - } - - return ABIArgInfo::getDirect(); - } - - // Aggregates which are <= 2*XLen will be passed in registers if possible, - // so coerce to integers. - if (Size <= 2 * XLen) { - unsigned Alignment = getContext().getTypeAlign(Ty); - - // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is - // required, and a 2-element XLen array if only XLen alignment is required. - if (Size <= XLen) { - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), XLen)); - } else if (Alignment == 2 * XLen) { - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), 2 * XLen)); - } else { - return ABIArgInfo::getDirect(llvm::ArrayType::get( - llvm::IntegerType::get(getVMContext(), XLen), 2)); - } - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); -} - -ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - int ArgGPRsLeft = 2; - int ArgFPRsLeft = FLen ? 2 : 0; - - // The rules for return and argument types are the same, so defer to - // classifyArgumentType. - return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, - ArgFPRsLeft); -} - -Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); - - // Empty records are ignored for parameter passing purposes. - if (isEmptyRecord(getContext(), Ty, true)) { - Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), - getVAListElementType(CGF), SlotSize); - Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - return Addr; - } - - auto TInfo = getContext().getTypeInfoInChars(Ty); - - // Arguments bigger than 2*Xlen bytes are passed indirectly. - bool IsIndirect = TInfo.Width > 2 * SlotSize; - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, - SlotSize, /*AllowHigherAlign=*/true); -} - -ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { - int TySize = getContext().getTypeSize(Ty); - // RV64 ABI requires unsigned 32 bit integers to be sign extended. - if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) - return ABIArgInfo::getSignExtend(Ty); - return ABIArgInfo::getExtend(Ty); -} - -namespace { -class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { -public: - RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, - unsigned FLen) - : TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {} - - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { - const auto *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) return; - - const auto *Attr = FD->getAttr<RISCVInterruptAttr>(); - if (!Attr) - return; - - const char *Kind; - switch (Attr->getInterrupt()) { - case RISCVInterruptAttr::user: Kind = "user"; break; - case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break; - case RISCVInterruptAttr::machine: Kind = "machine"; break; - } - - auto *Fn = cast<llvm::Function>(GV); - - Fn->addFnAttr("interrupt", Kind); - } -}; -} // namespace - -//===----------------------------------------------------------------------===// -// VE ABI Implementation. -// -namespace { -class VEABIInfo : public DefaultABIInfo { -public: - VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - -private: - ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType RetTy) const; - void computeInfo(CGFunctionInfo &FI) const override; -}; -} // end anonymous namespace - -ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const { - if (Ty->isAnyComplexType()) - return ABIArgInfo::getDirect(); - uint64_t Size = getContext().getTypeSize(Ty); - if (Size < 64 && Ty->isIntegerType()) - return ABIArgInfo::getExtend(Ty); - return DefaultABIInfo::classifyReturnType(Ty); -} - -ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const { - if (Ty->isAnyComplexType()) - return ABIArgInfo::getDirect(); - uint64_t Size = getContext().getTypeSize(Ty); - if (Size < 64 && Ty->isIntegerType()) - return ABIArgInfo::getExtend(Ty); - return DefaultABIInfo::classifyArgumentType(Ty); -} - -void VEABIInfo::computeInfo(CGFunctionInfo &FI) const { - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &Arg : FI.arguments()) - Arg.info = classifyArgumentType(Arg.type); -} - -namespace { -class VETargetCodeGenInfo : public TargetCodeGenInfo { -public: - VETargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {} - // VE ABI requires the arguments of variadic and prototype-less functions - // are passed in both registers and memory. - bool isNoProtoCallVariadic(const CallArgList &args, - const FunctionNoProtoType *fnType) const override { - return true; - } -}; -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// CSKY ABI Implementation -//===----------------------------------------------------------------------===// -namespace { -class CSKYABIInfo : public DefaultABIInfo { - static const int NumArgGPRs = 4; - static const int NumArgFPRs = 4; - - static const unsigned XLen = 32; - unsigned FLen; - -public: - CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) - : DefaultABIInfo(CGT), FLen(FLen) {} - - void computeInfo(CGFunctionInfo &FI) const override; - ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft, - int &ArgFPRsLeft, - bool isReturnType = false) const; - ABIArgInfo classifyReturnType(QualType RetTy) const; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; -}; - -} // end anonymous namespace - -void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const { - QualType RetTy = FI.getReturnType(); - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(RetTy); - - bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; - - // We must track the number of GPRs used in order to conform to the CSKY - // ABI, as integer scalars passed in registers should have signext/zeroext - // when promoted. - int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; - int ArgFPRsLeft = FLen ? NumArgFPRs : 0; - - for (auto &ArgInfo : FI.arguments()) { - ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft); - } -} - -Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); - - // Empty records are ignored for parameter passing purposes. - if (isEmptyRecord(getContext(), Ty, true)) { - Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), - getVAListElementType(CGF), SlotSize); - Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - return Addr; - } - - auto TInfo = getContext().getTypeInfoInChars(Ty); - - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize, - /*AllowHigherAlign=*/true); -} - -ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft, - int &ArgFPRsLeft, - bool isReturnType) const { - assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); - Ty = useFirstFieldIfTransparentUnion(Ty); - - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always passed indirectly. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - if (ArgGPRsLeft) - ArgGPRsLeft -= 1; - return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == - CGCXXABI::RAA_DirectInMemory); - } - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - if (!Ty->getAsUnionType()) - if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); - - uint64_t Size = getContext().getTypeSize(Ty); - // Pass floating point values via FPRs if possible. - if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size && - ArgFPRsLeft) { - ArgFPRsLeft--; - return ABIArgInfo::getDirect(); - } - - // Complex types for the hard float ABI must be passed direct rather than - // using CoerceAndExpand. - if (Ty->isComplexType() && FLen && !isReturnType) { - QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); - if (getContext().getTypeSize(EltTy) <= FLen) { - ArgFPRsLeft -= 2; - return ABIArgInfo::getDirect(); - } - } - - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // All integral types are promoted to XLen width, unless passed on the - // stack. - if (Size < XLen && Ty->isIntegralOrEnumerationType()) - return ABIArgInfo::getExtend(Ty); - - if (const auto *EIT = Ty->getAs<BitIntType>()) { - if (EIT->getNumBits() < XLen) - return ABIArgInfo::getExtend(Ty); - } - - return ABIArgInfo::getDirect(); - } - - // For argument type, the first 4*XLen parts of aggregate will be passed - // in registers, and the rest will be passed in stack. - // So we can coerce to integers directly and let backend handle it correctly. - // For return type, aggregate which <= 2*XLen will be returned in registers. - // Otherwise, aggregate will be returned indirectly. - if (!isReturnType || (isReturnType && Size <= 2 * XLen)) { - if (Size <= XLen) { - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), XLen)); - } else { - return ABIArgInfo::getDirect(llvm::ArrayType::get( - llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen)); - } - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); -} - -ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - int ArgGPRsLeft = 2; - int ArgFPRsLeft = FLen ? 1 : 0; - - // The rules for return and argument types are the same, so defer to - // classifyArgumentType. - return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true); -} - -namespace { -class CSKYTargetCodeGenInfo : public TargetCodeGenInfo { -public: - CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) - : TargetCodeGenInfo(std::make_unique<CSKYABIInfo>(CGT, FLen)) {} -}; -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// BPF ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class BPFABIInfo : public DefaultABIInfo { -public: - BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} - - ABIArgInfo classifyArgumentType(QualType Ty) const { - Ty = useFirstFieldIfTransparentUnion(Ty); - - if (isAggregateTypeForABI(Ty)) { - uint64_t Bits = getContext().getTypeSize(Ty); - if (Bits == 0) - return ABIArgInfo::getIgnore(); - - // If the aggregate needs 1 or 2 registers, do not use reference. - if (Bits <= 128) { - llvm::Type *CoerceTy; - if (Bits <= 64) { - CoerceTy = - llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); - } else { - llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64); - CoerceTy = llvm::ArrayType::get(RegTy, 2); - } - return ABIArgInfo::getDirect(CoerceTy); - } else { - return getNaturalAlignIndirect(Ty); - } - } - - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - ASTContext &Context = getContext(); - if (const auto *EIT = Ty->getAs<BitIntType>()) - if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) - return getNaturalAlignIndirect(Ty); - - return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); - } - - ABIArgInfo classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - - if (isAggregateTypeForABI(RetTy)) - return getNaturalAlignIndirect(RetTy); - - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) - RetTy = EnumTy->getDecl()->getIntegerType(); - - ASTContext &Context = getContext(); - if (const auto *EIT = RetTy->getAs<BitIntType>()) - if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) - return getNaturalAlignIndirect(RetTy); - - // Caller will do necessary sign/zero extension. - return ABIArgInfo::getDirect(); - } - - void computeInfo(CGFunctionInfo &FI) const override { - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); - } - -}; - -class BPFTargetCodeGenInfo : public TargetCodeGenInfo { -public: - BPFTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique<BPFABIInfo>(CGT)) {} - - const BPFABIInfo &getABIInfo() const { - return static_cast<const BPFABIInfo&>(TargetCodeGenInfo::getABIInfo()); - } -}; - -} - -// LoongArch ABI Implementation. Documented at -// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html -// -//===----------------------------------------------------------------------===// - -namespace { -class LoongArchABIInfo : public DefaultABIInfo { -private: - // Size of the integer ('r') registers in bits. - unsigned GRLen; - // Size of the floating point ('f') registers in bits. - unsigned FRLen; - // Number of general-purpose argument registers. - static const int NumGARs = 8; - // Number of floating-point argument registers. - static const int NumFARs = 8; - bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff, - llvm::Type *&Field1Ty, - CharUnits &Field1Off, - llvm::Type *&Field2Ty, - CharUnits &Field2Off) const; - -public: - LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) - : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} - - void computeInfo(CGFunctionInfo &FI) const override; - - ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft, - int &FARsLeft) const; - ABIArgInfo classifyReturnType(QualType RetTy) const; - - Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const override; - - ABIArgInfo extendType(QualType Ty) const; - - bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, - CharUnits &Field1Off, llvm::Type *&Field2Ty, - CharUnits &Field2Off, int &NeededArgGPRs, - int &NeededArgFPRs) const; - ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty, - CharUnits Field1Off, - llvm::Type *Field2Ty, - CharUnits Field2Off) const; -}; -} // end anonymous namespace - -void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { - QualType RetTy = FI.getReturnType(); - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(RetTy); - - // IsRetIndirect is true if classifyArgumentType indicated the value should - // be passed indirect, or if the type size is a scalar greater than 2*GRLen - // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct - // in LLVM IR, relying on the backend lowering code to rewrite the argument - // list and pass indirectly on LA32. - bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; - if (!IsRetIndirect && RetTy->isScalarType() && - getContext().getTypeSize(RetTy) > (2 * GRLen)) { - if (RetTy->isComplexType() && FRLen) { - QualType EltTy = RetTy->castAs<ComplexType>()->getElementType(); - IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen; - } else { - // This is a normal scalar > 2*GRLen, such as fp128 on LA32. - IsRetIndirect = true; - } - } - - // We must track the number of GARs and FARs used in order to conform to the - // LoongArch ABI. As GAR usage is different for variadic arguments, we must - // also track whether we are examining a vararg or not. - int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs; - int FARsLeft = FRLen ? NumFARs : 0; - int NumFixedArgs = FI.getNumRequiredArgs(); - - int ArgNum = 0; - for (auto &ArgInfo : FI.arguments()) { - ArgInfo.info = classifyArgumentType( - ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft); - ArgNum++; - } -} - -// Returns true if the struct is a potential candidate to be passed in FARs (and -// GARs). If this function returns true, the caller is responsible for checking -// that if there is only a single field then that field is a float. -bool LoongArchABIInfo::detectFARsEligibleStructHelper( - QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, - llvm::Type *&Field2Ty, CharUnits &Field2Off) const { - bool IsInt = Ty->isIntegralOrEnumerationType(); - bool IsFloat = Ty->isRealFloatingType(); - - if (IsInt || IsFloat) { - uint64_t Size = getContext().getTypeSize(Ty); - if (IsInt && Size > GRLen) - return false; - // Can't be eligible if larger than the FP registers. Half precision isn't - // currently supported on LoongArch and the ABI hasn't been confirmed, so - // default to the integer ABI in that case. - if (IsFloat && (Size > FRLen || Size < 32)) - return false; - // Can't be eligible if an integer type was already found (int+int pairs - // are not eligible). - if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) - return false; - if (!Field1Ty) { - Field1Ty = CGT.ConvertType(Ty); - Field1Off = CurOff; - return true; - } - if (!Field2Ty) { - Field2Ty = CGT.ConvertType(Ty); - Field2Off = CurOff; - return true; - } - return false; - } - - if (auto CTy = Ty->getAs<ComplexType>()) { - if (Field1Ty) - return false; - QualType EltTy = CTy->getElementType(); - if (getContext().getTypeSize(EltTy) > FRLen) - return false; - Field1Ty = CGT.ConvertType(EltTy); - Field1Off = CurOff; - Field2Ty = Field1Ty; - Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); - return true; - } - - if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { - uint64_t ArraySize = ATy->getSize().getZExtValue(); - QualType EltTy = ATy->getElementType(); - CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); - for (uint64_t i = 0; i < ArraySize; ++i) { - if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, - Field2Ty, Field2Off)) - return false; - CurOff += EltSize; - } - return true; - } - - if (const auto *RTy = Ty->getAs<RecordType>()) { - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are not eligible for the FP calling convention. - if (getRecordArgABI(Ty, CGT.getCXXABI())) - return false; - if (isEmptyRecord(getContext(), Ty, true)) - return true; - const RecordDecl *RD = RTy->getDecl(); - // Unions aren't eligible unless they're empty (which is caught above). - if (RD->isUnion()) - return false; - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); - // If this is a C++ record, check the bases first. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - for (const CXXBaseSpecifier &B : CXXRD->bases()) { - const auto *BDecl = - cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl()); - if (!detectFARsEligibleStructHelper( - B.getType(), CurOff + Layout.getBaseClassOffset(BDecl), - Field1Ty, Field1Off, Field2Ty, Field2Off)) - return false; - } - } - for (const FieldDecl *FD : RD->fields()) { - QualType QTy = FD->getType(); - if (FD->isBitField()) { - unsigned BitWidth = FD->getBitWidthValue(getContext()); - // Zero-width bitfields are ignored. - if (BitWidth == 0) - continue; - // Allow a bitfield with a type greater than GRLen as long as the - // bitwidth is GRLen or less. - if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) { - QTy = getContext().getIntTypeForBitwidth(GRLen, false); - } - } - - if (!detectFARsEligibleStructHelper( - QTy, - CurOff + getContext().toCharUnitsFromBits( - Layout.getFieldOffset(FD->getFieldIndex())), - Field1Ty, Field1Off, Field2Ty, Field2Off)) - return false; - } - return Field1Ty != nullptr; - } - - return false; -} - -// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when -// flattened it contains a single fp value, fp+fp, or int+fp of appropriate -// size). If so, NeededFARs and NeededGARs are incremented appropriately. -bool LoongArchABIInfo::detectFARsEligibleStruct( - QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, - llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs, - int &NeededFARs) const { - Field1Ty = nullptr; - Field2Ty = nullptr; - NeededGARs = 0; - NeededFARs = 0; - if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, - Field1Off, Field2Ty, Field2Off)) - return false; - // Not really a candidate if we have a single int but no float. - if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) - return false; - if (Field1Ty && Field1Ty->isFloatingPointTy()) - NeededFARs++; - else if (Field1Ty) - NeededGARs++; - if (Field2Ty && Field2Ty->isFloatingPointTy()) - NeededFARs++; - else if (Field2Ty) - NeededGARs++; - return true; -} - -// Call getCoerceAndExpand for the two-element flattened struct described by -// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an -// appropriate coerceToType and unpaddedCoerceToType. -ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct( - llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, - CharUnits Field2Off) const { - SmallVector<llvm::Type *, 3> CoerceElts; - SmallVector<llvm::Type *, 2> UnpaddedCoerceElts; - if (!Field1Off.isZero()) - CoerceElts.push_back(llvm::ArrayType::get( - llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); - - CoerceElts.push_back(Field1Ty); - UnpaddedCoerceElts.push_back(Field1Ty); - - if (!Field2Ty) { - return ABIArgInfo::getCoerceAndExpand( - llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), - UnpaddedCoerceElts[0]); - } - - CharUnits Field2Align = - CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty)); - CharUnits Field1End = - Field1Off + - CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); - CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); - - CharUnits Padding = CharUnits::Zero(); - if (Field2Off > Field2OffNoPadNoPack) - Padding = Field2Off - Field2OffNoPadNoPack; - else if (Field2Off != Field2Align && Field2Off > Field1End) - Padding = Field2Off - Field1End; - - bool IsPacked = !Field2Off.isMultipleOf(Field2Align); - - if (!Padding.isZero()) - CoerceElts.push_back(llvm::ArrayType::get( - llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); - - CoerceElts.push_back(Field2Ty); - UnpaddedCoerceElts.push_back(Field2Ty); - - return ABIArgInfo::getCoerceAndExpand( - llvm::StructType::get(getVMContext(), CoerceElts, IsPacked), - llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked)); -} - -ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - int &GARsLeft, - int &FARsLeft) const { - assert(GARsLeft <= NumGARs && "GAR tracking underflow"); - Ty = useFirstFieldIfTransparentUnion(Ty); - - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always passed indirectly. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - if (GARsLeft) - GARsLeft -= 1; - return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == - CGCXXABI::RAA_DirectInMemory); - } - - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - - uint64_t Size = getContext().getTypeSize(Ty); - - // Pass floating point values via FARs if possible. - if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && - FRLen >= Size && FARsLeft) { - FARsLeft--; - return ABIArgInfo::getDirect(); - } - - // Complex types for the *f or *d ABI must be passed directly rather than - // using CoerceAndExpand. - if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { - QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); - if (getContext().getTypeSize(EltTy) <= FRLen) { - FARsLeft -= 2; - return ABIArgInfo::getDirect(); - } - } - - if (IsFixed && FRLen && Ty->isStructureOrClassType()) { - llvm::Type *Field1Ty = nullptr; - llvm::Type *Field2Ty = nullptr; - CharUnits Field1Off = CharUnits::Zero(); - CharUnits Field2Off = CharUnits::Zero(); - int NeededGARs = 0; - int NeededFARs = 0; - bool IsCandidate = detectFARsEligibleStruct( - Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs); - if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) { - GARsLeft -= NeededGARs; - FARsLeft -= NeededFARs; - return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty, - Field2Off); - } - } - - uint64_t NeededAlign = getContext().getTypeAlign(Ty); - // Determine the number of GARs needed to pass the current argument - // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" - // register pairs, so may consume 3 registers. - int NeededGARs = 1; - if (!IsFixed && NeededAlign == 2 * GRLen) - NeededGARs = 2 + (GARsLeft % 2); - else if (Size > GRLen && Size <= 2 * GRLen) - NeededGARs = 2; - - if (NeededGARs > GARsLeft) - NeededGARs = GARsLeft; - - GARsLeft -= NeededGARs; - - if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs<EnumType>()) - Ty = EnumTy->getDecl()->getIntegerType(); - - // All integral types are promoted to GRLen width. - if (Size < GRLen && Ty->isIntegralOrEnumerationType()) - return extendType(Ty); - - if (const auto *EIT = Ty->getAs<BitIntType>()) { - if (EIT->getNumBits() < GRLen) - return extendType(Ty); - if (EIT->getNumBits() > 128 || - (!getContext().getTargetInfo().hasInt128Type() && - EIT->getNumBits() > 64)) - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - } - - return ABIArgInfo::getDirect(); - } - - // Aggregates which are <= 2*GRLen will be passed in registers if possible, - // so coerce to integers. - if (Size <= 2 * GRLen) { - // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is - // required, and a 2-element GRLen array if only GRLen alignment is - // required. - if (Size <= GRLen) { - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), GRLen)); - } - if (getContext().getTypeAlign(Ty) == 2 * GRLen) { - return ABIArgInfo::getDirect( - llvm::IntegerType::get(getVMContext(), 2 * GRLen)); - } - return ABIArgInfo::getDirect( - llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2)); - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); -} - -ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { - if (RetTy->isVoidType()) - return ABIArgInfo::getIgnore(); - // The rules for return and argument types are the same, so defer to - // classifyArgumentType. - int GARsLeft = 2; - int FARsLeft = FRLen ? 2 : 0; - return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft); -} - -Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, - QualType Ty) const { - CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); - - // Empty records are ignored for parameter passing purposes. - if (isEmptyRecord(getContext(), Ty, true)) { - Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), - getVAListElementType(CGF), SlotSize); - Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); - return Addr; - } - - auto TInfo = getContext().getTypeInfoInChars(Ty); - - // Arguments bigger than 2*GRLen bytes are passed indirectly. - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, - /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo, - SlotSize, - /*AllowHigherAlign=*/true); -} - -ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { - int TySize = getContext().getTypeSize(Ty); - // LA64 ABI requires unsigned 32 bit integers to be sign extended. - if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) - return ABIArgInfo::getSignExtend(Ty); - return ABIArgInfo::getExtend(Ty); -} - -namespace { -class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { -public: - LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, - unsigned FRLen) - : TargetCodeGenInfo( - std::make_unique<LoongArchABIInfo>(CGT, GRLen, FRLen)) {} -}; -} // namespace - -//===----------------------------------------------------------------------===// -// Driver code -//===----------------------------------------------------------------------===// - -bool CodeGenModule::supportsCOMDAT() const { - return getTriple().supportsCOMDAT(); -} - -const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { - if (TheTargetCodeGenInfo) - return *TheTargetCodeGenInfo; - - // Helper to set the unique_ptr while still keeping the return value. - auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & { - this->TheTargetCodeGenInfo.reset(P); - return *P; - }; - - const llvm::Triple &Triple = getTarget().getTriple(); - switch (Triple.getArch()) { - default: - return SetCGInfo(new DefaultTargetCodeGenInfo(Types)); - - case llvm::Triple::le32: - return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); - case llvm::Triple::m68k: - return SetCGInfo(new M68kTargetCodeGenInfo(Types)); - case llvm::Triple::mips: - case llvm::Triple::mipsel: - if (Triple.getOS() == llvm::Triple::NaCl) - return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); - return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true)); - - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); - - case llvm::Triple::avr: { - // For passing parameters, R8~R25 are used on avr, and R18~R25 are used - // on avrtiny. For passing return value, R18~R25 are used on avr, and - // R22~R25 are used on avrtiny. - unsigned NPR = getTarget().getABI() == "avrtiny" ? 6 : 18; - unsigned NRR = getTarget().getABI() == "avrtiny" ? 4 : 8; - return SetCGInfo(new AVRTargetCodeGenInfo(Types, NPR, NRR)); - } - - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_32: - case llvm::Triple::aarch64_be: { - AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; - if (getTarget().getABI() == "darwinpcs") - Kind = AArch64ABIInfo::DarwinPCS; - else if (Triple.isOSWindows()) - return SetCGInfo( - new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64)); - - return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); - } - - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: { - WebAssemblyABIInfo::ABIKind Kind = WebAssemblyABIInfo::MVP; - if (getTarget().getABI() == "experimental-mv") - Kind = WebAssemblyABIInfo::ExperimentalMV; - return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind)); - } - - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: { - if (Triple.getOS() == llvm::Triple::Win32) { - return SetCGInfo( - new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP)); - } - - ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; - StringRef ABIStr = getTarget().getABI(); - if (ABIStr == "apcs-gnu") - Kind = ARMABIInfo::APCS; - else if (ABIStr == "aapcs16") - Kind = ARMABIInfo::AAPCS16_VFP; - else if (CodeGenOpts.FloatABI == "hard" || - (CodeGenOpts.FloatABI != "soft" && - (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || - Triple.getEnvironment() == llvm::Triple::MuslEABIHF || - Triple.getEnvironment() == llvm::Triple::EABIHF))) - Kind = ARMABIInfo::AAPCS_VFP; - - return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); - } - - case llvm::Triple::ppc: { - if (Triple.isOSAIX()) - return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ false)); - - bool IsSoftFloat = - CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe"); - bool RetSmallStructInRegABI = - PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); - return SetCGInfo( - new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI)); - } - case llvm::Triple::ppcle: { - bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - bool RetSmallStructInRegABI = - PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); - return SetCGInfo( - new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI)); - } - case llvm::Triple::ppc64: - if (Triple.isOSAIX()) - return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ true)); - - if (Triple.isOSBinFormatELF()) { - PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; - if (getTarget().getABI() == "elfv2") - Kind = PPC64_SVR4_ABIInfo::ELFv2; - bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - - return SetCGInfo( - new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat)); - } - return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); - case llvm::Triple::ppc64le: { - assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); - PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; - if (getTarget().getABI() == "elfv1") - Kind = PPC64_SVR4_ABIInfo::ELFv1; - bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - - return SetCGInfo( - new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat)); - } - - case llvm::Triple::nvptx: - case llvm::Triple::nvptx64: - return SetCGInfo(new NVPTXTargetCodeGenInfo(Types)); - - case llvm::Triple::msp430: - return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); - - case llvm::Triple::riscv32: - case llvm::Triple::riscv64: { - StringRef ABIStr = getTarget().getABI(); - unsigned XLen = getTarget().getPointerWidth(LangAS::Default); - unsigned ABIFLen = 0; - if (ABIStr.endswith("f")) - ABIFLen = 32; - else if (ABIStr.endswith("d")) - ABIFLen = 64; - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen)); - } - - case llvm::Triple::systemz: { - bool SoftFloat = CodeGenOpts.FloatABI == "soft"; - bool HasVector = !SoftFloat && getTarget().getABI() == "vector"; - return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat)); - } - - case llvm::Triple::tce: - case llvm::Triple::tcele: - return SetCGInfo(new TCETargetCodeGenInfo(Types)); - - case llvm::Triple::x86: { - bool IsDarwinVectorABI = Triple.isOSDarwin(); - bool RetSmallStructInRegABI = - X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); - bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); - - if (Triple.getOS() == llvm::Triple::Win32) { - return SetCGInfo(new WinX86_32TargetCodeGenInfo( - Types, IsDarwinVectorABI, RetSmallStructInRegABI, - IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); - } else { - return SetCGInfo(new X86_32TargetCodeGenInfo( - Types, IsDarwinVectorABI, RetSmallStructInRegABI, - IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, - CodeGenOpts.FloatABI == "soft")); - } - } - - case llvm::Triple::x86_64: { - StringRef ABI = getTarget().getABI(); - X86AVXABILevel AVXLevel = - (ABI == "avx512" - ? X86AVXABILevel::AVX512 - : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); - - switch (Triple.getOS()) { - case llvm::Triple::Win32: - return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); - default: - return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel)); - } - } - case llvm::Triple::hexagon: - return SetCGInfo(new HexagonTargetCodeGenInfo(Types)); - case llvm::Triple::lanai: - return SetCGInfo(new LanaiTargetCodeGenInfo(Types)); - case llvm::Triple::r600: - return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); - case llvm::Triple::amdgcn: - return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); - case llvm::Triple::sparc: - return SetCGInfo(new SparcV8TargetCodeGenInfo(Types)); - case llvm::Triple::sparcv9: - return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); - case llvm::Triple::xcore: - return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); - case llvm::Triple::arc: - return SetCGInfo(new ARCTargetCodeGenInfo(Types)); - case llvm::Triple::spir: - case llvm::Triple::spir64: - return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types)); - case llvm::Triple::spirv32: - case llvm::Triple::spirv64: - return SetCGInfo(new SPIRVTargetCodeGenInfo(Types)); - case llvm::Triple::ve: - return SetCGInfo(new VETargetCodeGenInfo(Types)); - case llvm::Triple::csky: { - bool IsSoftFloat = !getTarget().hasFeature("hard-float-abi"); - bool hasFP64 = getTarget().hasFeature("fpuv2_df") || - getTarget().hasFeature("fpuv3_df"); - return SetCGInfo(new CSKYTargetCodeGenInfo(Types, IsSoftFloat ? 0 - : hasFP64 ? 64 - : 32)); - } - case llvm::Triple::bpfeb: - case llvm::Triple::bpfel: - return SetCGInfo(new BPFTargetCodeGenInfo(Types)); - case llvm::Triple::loongarch32: - case llvm::Triple::loongarch64: { - StringRef ABIStr = getTarget().getABI(); - unsigned ABIFRLen = 0; - if (ABIStr.endswith("f")) - ABIFRLen = 32; - else if (ABIStr.endswith("d")) - ABIFRLen = 64; - return SetCGInfo(new LoongArchTargetCodeGenInfo( - Types, getTarget().getPointerWidth(LangAS::Default), ABIFRLen)); - } - } -} - /// Create an OpenCL kernel for an enqueued block. /// /// The kernel has the same function type as the block invoke function. Its /// name is the name of the block invoke function postfixed with "_kernel". /// It simply calls the block invoke function then returns. -llvm::Function * -TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, - llvm::Function *Invoke, - llvm::Type *BlockTy) const { +llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( + CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const { auto *InvokeFT = Invoke->getFunctionType(); auto &C = CGF.getLLVMContext(); std::string Name = Invoke->getName().str() + "_kernel"; @@ -12406,88 +183,38 @@ TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, InvokeFT->params(), false); auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name, &CGF.CGM.getModule()); + llvm::CallingConv::ID KernelCC = + CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel); + F->setCallingConv(KernelCC); + + llvm::AttrBuilder KernelAttrs(C); + + // FIXME: This is missing setTargetAttributes + CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs); + F->addFnAttrs(KernelAttrs); + auto IP = CGF.Builder.saveIP(); auto *BB = llvm::BasicBlock::Create(C, "entry", F); auto &Builder = CGF.Builder; Builder.SetInsertPoint(BB); llvm::SmallVector<llvm::Value *, 2> Args(llvm::make_pointer_range(F->args())); - llvm::CallInst *call = Builder.CreateCall(Invoke, Args); - call->setCallingConv(Invoke->getCallingConv()); + llvm::CallInst *Call = Builder.CreateCall(Invoke, Args); + Call->setCallingConv(Invoke->getCallingConv()); + Builder.CreateRetVoid(); Builder.restoreIP(IP); return F; } -/// Create an OpenCL kernel for an enqueued block. -/// -/// The type of the first argument (the block literal) is the struct type -/// of the block literal instead of a pointer type. The first argument -/// (block literal) is passed directly by value to the kernel. The kernel -/// allocates the same type of struct on stack and stores the block literal -/// to it and passes its pointer to the block invoke function. The kernel -/// has "enqueued-block" function attribute and kernel argument metadata. -llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( - CodeGenFunction &CGF, llvm::Function *Invoke, - llvm::Type *BlockTy) const { - auto &Builder = CGF.Builder; - auto &C = CGF.getLLVMContext(); - - auto *InvokeFT = Invoke->getFunctionType(); - llvm::SmallVector<llvm::Type *, 2> ArgTys; - llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; - llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; - llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; - llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; - llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; - llvm::SmallVector<llvm::Metadata *, 8> ArgNames; - - ArgTys.push_back(BlockTy); - ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); - AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); - ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); - ArgTypeQuals.push_back(llvm::MDString::get(C, "")); - AccessQuals.push_back(llvm::MDString::get(C, "none")); - ArgNames.push_back(llvm::MDString::get(C, "block_literal")); - for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { - ArgTys.push_back(InvokeFT->getParamType(I)); - ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); - AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); - AccessQuals.push_back(llvm::MDString::get(C, "none")); - ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); - ArgTypeQuals.push_back(llvm::MDString::get(C, "")); - ArgNames.push_back( - llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); - } - std::string Name = Invoke->getName().str() + "_kernel"; - auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); - auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, - &CGF.CGM.getModule()); - F->addFnAttr("enqueued-block"); - auto IP = CGF.Builder.saveIP(); - auto *BB = llvm::BasicBlock::Create(C, "entry", F); - Builder.SetInsertPoint(BB); - const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); - auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); - BlockPtr->setAlignment(BlockAlign); - Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); - auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); - llvm::SmallVector<llvm::Value *, 2> Args; - Args.push_back(Cast); - for (llvm::Argument &A : llvm::drop_begin(F->args())) - Args.push_back(&A); - llvm::CallInst *call = Builder.CreateCall(Invoke, Args); - call->setCallingConv(Invoke->getCallingConv()); - Builder.CreateRetVoid(); - Builder.restoreIP(IP); - - F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); - F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); - F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); - F->setMetadata("kernel_arg_base_type", - llvm::MDNode::get(C, ArgBaseTypeNames)); - F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); - if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) - F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); +namespace { +class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { +public: + DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} +}; +} // namespace - return F; +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createDefaultTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<DefaultTargetCodeGenInfo>(CGM.getTypes()); } diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index c7c1ec7fce7e..14ed5e5d2d2c 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -52,6 +52,11 @@ protected: // by returning true from TargetInfo::checkCallingConvention for them. std::unique_ptr<SwiftABIInfo> SwiftInfo; + // Returns ABI info helper for the target. This is for use by derived classes. + template <typename T> const T &getABIInfo() const { + return static_cast<const T &>(*Info); + } + public: TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info); virtual ~TargetCodeGenInfo(); @@ -199,9 +204,10 @@ public: /// Return a constant used by UBSan as a signature to identify functions /// possessing type information, or 0 if the platform is unsupported. + /// This magic number is invalid instruction encoding in many targets. virtual llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const { - return nullptr; + return llvm::ConstantInt::get(CGM.Int32Ty, 0xc105cafe); } /// Determine whether a call to an unprototyped functions under @@ -339,7 +345,7 @@ public: /// convention and ABI as an OpenCL kernel. The wrapper function accepts /// block context and block arguments in target-specific way and calls /// the original block invoke function. - virtual llvm::Function * + virtual llvm::Value * createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Type *BlockTy) const; @@ -349,6 +355,11 @@ public: /// as 'used', and having internal linkage. virtual bool shouldEmitStaticExternCAliases() const { return true; } + /// \return true if annonymous zero-sized bitfields should be emitted to + /// correctly distinguish between struct types whose memory layout is the + /// same, but whose layout may differ when used as argument passed by value + virtual bool shouldEmitDWARFBitFieldSeparators() const { return false; } + virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} /// Return the device-side type for the CUDA device builtin surface type. @@ -362,6 +373,12 @@ public: return nullptr; } + /// Return the WebAssembly externref reference type. + virtual llvm::Type *getWasmExternrefReferenceType() const { return nullptr; } + + /// Return the WebAssembly funcref reference type. + virtual llvm::Type *getWasmFuncrefReferenceType() const { return nullptr; } + /// Emit the device-side copy of the builtin surface type. virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, @@ -376,8 +393,163 @@ public: // DO NOTHING by default. return false; } + + /// Return an LLVM type that corresponds to an OpenCL type. + virtual llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const { + return nullptr; + } + +protected: + static std::string qualifyWindowsLibrary(StringRef Lib); + + void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const; +}; + +std::unique_ptr<TargetCodeGenInfo> +createDefaultTargetCodeGenInfo(CodeGenModule &CGM); + +enum class AArch64ABIKind { + AAPCS = 0, + DarwinPCS, + Win64, +}; + +std::unique_ptr<TargetCodeGenInfo> +createAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind); + +std::unique_ptr<TargetCodeGenInfo> +createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K); + +std::unique_ptr<TargetCodeGenInfo> +createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createARCTargetCodeGenInfo(CodeGenModule &CGM); + +enum class ARMABIKind { + APCS = 0, + AAPCS = 1, + AAPCS_VFP = 2, + AAPCS16_VFP = 3, }; +std::unique_ptr<TargetCodeGenInfo> +createARMTargetCodeGenInfo(CodeGenModule &CGM, ARMABIKind Kind); + +std::unique_ptr<TargetCodeGenInfo> +createWindowsARMTargetCodeGenInfo(CodeGenModule &CGM, ARMABIKind K); + +std::unique_ptr<TargetCodeGenInfo> +createAVRTargetCodeGenInfo(CodeGenModule &CGM, unsigned NPR, unsigned NRR); + +std::unique_ptr<TargetCodeGenInfo> +createBPFTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createCSKYTargetCodeGenInfo(CodeGenModule &CGM, unsigned FLen); + +std::unique_ptr<TargetCodeGenInfo> +createHexagonTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createLanaiTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createLoongArchTargetCodeGenInfo(CodeGenModule &CGM, unsigned GRLen, + unsigned FLen); + +std::unique_ptr<TargetCodeGenInfo> +createM68kTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createMIPSTargetCodeGenInfo(CodeGenModule &CGM, bool IsOS32); + +std::unique_ptr<TargetCodeGenInfo> +createMSP430TargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createNVPTXTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createPNaClTargetCodeGenInfo(CodeGenModule &CGM); + +enum class PPC64_SVR4_ABIKind { + ELFv1 = 0, + ELFv2, +}; + +std::unique_ptr<TargetCodeGenInfo> +createAIXTargetCodeGenInfo(CodeGenModule &CGM, bool Is64Bit); + +std::unique_ptr<TargetCodeGenInfo> +createPPC32TargetCodeGenInfo(CodeGenModule &CGM, bool SoftFloatABI); + +std::unique_ptr<TargetCodeGenInfo> +createPPC64TargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createPPC64_SVR4_TargetCodeGenInfo(CodeGenModule &CGM, PPC64_SVR4_ABIKind Kind, + bool SoftFloatABI); + +std::unique_ptr<TargetCodeGenInfo> +createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, unsigned FLen); + +std::unique_ptr<TargetCodeGenInfo> +createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createSPIRVTargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createSparcV8TargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createSparcV9TargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createSystemZTargetCodeGenInfo(CodeGenModule &CGM, bool HasVector, + bool SoftFloatABI); + +std::unique_ptr<TargetCodeGenInfo> +createTCETargetCodeGenInfo(CodeGenModule &CGM); + +std::unique_ptr<TargetCodeGenInfo> +createVETargetCodeGenInfo(CodeGenModule &CGM); + +enum class WebAssemblyABIKind { + MVP = 0, + ExperimentalMV = 1, +}; + +std::unique_ptr<TargetCodeGenInfo> +createWebAssemblyTargetCodeGenInfo(CodeGenModule &CGM, WebAssemblyABIKind K); + +/// The AVX ABI level for X86 targets. +enum class X86AVXABILevel { + None, + AVX, + AVX512, +}; + +std::unique_ptr<TargetCodeGenInfo> createX86_32TargetCodeGenInfo( + CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, + unsigned NumRegisterParameters, bool SoftFloatABI); + +std::unique_ptr<TargetCodeGenInfo> +createWinX86_32TargetCodeGenInfo(CodeGenModule &CGM, bool DarwinVectorABI, + bool Win32StructABI, + unsigned NumRegisterParameters); + +std::unique_ptr<TargetCodeGenInfo> +createX86_64TargetCodeGenInfo(CodeGenModule &CGM, X86AVXABILevel AVXLevel); + +std::unique_ptr<TargetCodeGenInfo> +createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, X86AVXABILevel AVXLevel); + +std::unique_ptr<TargetCodeGenInfo> +createXCoreTargetCodeGenInfo(CodeGenModule &CGM); + } // namespace CodeGen } // namespace clang diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp new file mode 100644 index 000000000000..561110ff8c0d --- /dev/null +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -0,0 +1,824 @@ +//===- AArch64.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// AArch64 ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AArch64ABIInfo : public ABIInfo { + AArch64ABIKind Kind; + +public: + AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) + : ABIInfo(CGT), Kind(Kind) {} + +private: + AArch64ABIKind getABIKind() const { return Kind; } + bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; } + + ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const; + ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic, + unsigned CallingConvention) const; + ABIArgInfo coerceIllegalVector(QualType Ty) const; + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t Members) const override; + bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; + + bool isIllegalVectorType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!::classifyReturnType(getCXXABI(), FI, *this)) + FI.getReturnInfo() = + classifyReturnType(FI.getReturnType(), FI.isVariadic()); + + for (auto &it : FI.arguments()) + it.info = classifyArgumentType(it.type, FI.isVariadic(), + FI.getCallingConvention()); + } + + Address EmitDarwinVAArg(Address VAListAddr, QualType Ty, + CodeGenFunction &CGF) const; + + Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, + CodeGenFunction &CGF) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override { + llvm::Type *BaseTy = CGF.ConvertType(Ty); + if (isa<llvm::ScalableVectorType>(BaseTy)) + llvm::report_fatal_error("Passing SVE types to variadic functions is " + "currently not supported"); + + return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) + : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) + : EmitAAPCSVAArg(VAListAddr, Ty, CGF); + } + + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + bool allowBFloatArgsAndRet() const override { + return getTarget().hasBFloat16Type(); + } +}; + +class AArch64SwiftABIInfo : public SwiftABIInfo { +public: + explicit AArch64SwiftABIInfo(CodeGenTypes &CGT) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} + + bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const override; +}; + +class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { +public: + AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) + : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) { + SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT); + } + + StringRef getARCRetainAutoreleasedReturnValueMarker() const override { + return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; + } + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + return 31; + } + + bool doesReturnSlotInterfereWithArgs() const override { return false; } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + + const auto *TA = FD->getAttr<TargetAttr>(); + if (TA == nullptr) + return; + + ParsedTargetAttr Attr = + CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); + if (Attr.BranchProtection.empty()) + return; + + TargetInfo::BranchProtectionInfo BPI; + StringRef Error; + (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + Attr.CPU, BPI, Error); + assert(Error.empty()); + + auto *Fn = cast<llvm::Function>(GV); + static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; + Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); + + if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { + Fn->addFnAttr("sign-return-address-key", + BPI.SignKey == LangOptions::SignReturnAddressKeyKind::AKey + ? "a_key" + : "b_key"); + } + + Fn->addFnAttr("branch-target-enforcement", + BPI.BranchTargetEnforcement ? "true" : "false"); + } + + bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF, + llvm::Type *Ty) const override { + if (CGF.getTarget().hasFeature("ls64")) { + auto *ST = dyn_cast<llvm::StructType>(Ty); + if (ST && ST->getNumElements() == 1) { + auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0)); + if (AT && AT->getNumElements() == 8 && + AT->getElementType()->isIntegerTy(64)) + return true; + } + } + return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty); + } +}; + +class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { +public: + WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K) + : AArch64TargetCodeGenInfo(CGT, K) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; + +void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); +} +} + +ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { + assert(Ty->isVectorType() && "expected vector type!"); + + const auto *VT = Ty->castAs<VectorType>(); + if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) { + assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); + assert(VT->getElementType()->castAs<BuiltinType>()->getKind() == + BuiltinType::UChar && + "unexpected builtin type for SVE predicate!"); + return ABIArgInfo::getDirect(llvm::ScalableVectorType::get( + llvm::Type::getInt1Ty(getVMContext()), 16)); + } + + if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) { + assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); + + const auto *BT = VT->getElementType()->castAs<BuiltinType>(); + llvm::ScalableVectorType *ResType = nullptr; + switch (BT->getKind()) { + default: + llvm_unreachable("unexpected builtin type for SVE vector!"); + case BuiltinType::SChar: + case BuiltinType::UChar: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getInt8Ty(getVMContext()), 16); + break; + case BuiltinType::Short: + case BuiltinType::UShort: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getInt16Ty(getVMContext()), 8); + break; + case BuiltinType::Int: + case BuiltinType::UInt: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getInt32Ty(getVMContext()), 4); + break; + case BuiltinType::Long: + case BuiltinType::ULong: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getInt64Ty(getVMContext()), 2); + break; + case BuiltinType::Half: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getHalfTy(getVMContext()), 8); + break; + case BuiltinType::Float: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getFloatTy(getVMContext()), 4); + break; + case BuiltinType::Double: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getDoubleTy(getVMContext()), 2); + break; + case BuiltinType::BFloat16: + ResType = llvm::ScalableVectorType::get( + llvm::Type::getBFloatTy(getVMContext()), 8); + break; + } + return ABIArgInfo::getDirect(ResType); + } + + uint64_t Size = getContext().getTypeSize(Ty); + // Android promotes <2 x i8> to i16, not i32 + if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) { + llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } + if (Size <= 32) { + llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 64) { + auto *ResType = + llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 128) { + auto *ResType = + llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); + return ABIArgInfo::getDirect(ResType); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo +AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, + unsigned CallingConvention) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Handle illegal vector types here. + if (isIllegalVectorType(Ty)) + return coerceIllegalVector(Ty); + + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS() + ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); + } + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always indirect. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Empty records are always ignored on Darwin, but actually passed in C++ mode + // elsewhere for GNU compatibility. + uint64_t Size = getContext().getTypeSize(Ty); + bool IsEmpty = isEmptyRecord(getContext(), Ty, true); + if (IsEmpty || Size == 0) { + if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) + return ABIArgInfo::getIgnore(); + + // GNU C mode. The only argument that gets ignored is an empty one with size + // 0. + if (IsEmpty && Size == 0) + return ABIArgInfo::getIgnore(); + return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); + } + + // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. + const Type *Base = nullptr; + uint64_t Members = 0; + bool IsWin64 = Kind == AArch64ABIKind::Win64 || + CallingConvention == llvm::CallingConv::Win64; + bool IsWinVariadic = IsWin64 && IsVariadic; + // In variadic functions on Windows, all composite types are treated alike, + // no special handling of HFAs/HVAs. + if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) { + if (Kind != AArch64ABIKind::AAPCS) + return ABIArgInfo::getDirect( + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); + + // For alignment adjusted HFAs, cap the argument alignment to 16, leave it + // default otherwise. + unsigned Align = + getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); + unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); + Align = (Align > BaseAlign && Align >= 16) ? 16 : 0; + return ABIArgInfo::getDirect( + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0, + nullptr, true, Align); + } + + // Aggregates <= 16 bytes are passed directly in registers or on the stack. + if (Size <= 128) { + // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(Ty, getContext(), getVMContext()); + } + unsigned Alignment; + if (Kind == AArch64ABIKind::AAPCS) { + Alignment = getContext().getTypeUnadjustedAlign(Ty); + Alignment = Alignment < 128 ? 64 : 128; + } else { + Alignment = + std::max(getContext().getTypeAlign(Ty), + (unsigned)getTarget().getPointerWidth(LangAS::Default)); + } + Size = llvm::alignTo(Size, Alignment); + + // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. + // For aggregates with 16-byte alignment, we use i128. + llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment); + return ABIArgInfo::getDirect( + Size == Alignment ? BaseTy + : llvm::ArrayType::get(BaseTy, Size / Alignment)); + } + + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, + bool IsVariadic) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (const auto *VT = RetTy->getAs<VectorType>()) { + if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || + VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) + return coerceIllegalVector(RetTy); + } + + // Large vector types should be returned via memory. + if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) + return getNaturalAlignIndirect(RetTy); + + if (!isAggregateTypeForABI(RetTy)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(RetTy); + + return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS() + ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); + } + + uint64_t Size = getContext().getTypeSize(RetTy); + if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) + return ABIArgInfo::getIgnore(); + + const Type *Base = nullptr; + uint64_t Members = 0; + if (isHomogeneousAggregate(RetTy, Base, Members) && + !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && + IsVariadic)) + // Homogeneous Floating-point Aggregates (HFAs) are returned directly. + return ABIArgInfo::getDirect(); + + // Aggregates <= 16 bytes are returned directly in registers or on the stack. + if (Size <= 128) { + // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(RetTy, getContext(), getVMContext()); + } + + if (Size <= 64 && getDataLayout().isLittleEndian()) { + // Composite types are returned in lower bits of a 64-bit register for LE, + // and in higher bits for BE. However, integer types are always returned + // in lower bits for both LE and BE, and they are not rounded up to + // 64-bits. We can skip rounding up of composite types for LE, but not for + // BE, otherwise composite types will be indistinguishable from integer + // types. + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), Size)); + } + + unsigned Alignment = getContext().getTypeAlign(RetTy); + Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes + + // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. + // For aggregates with 16-byte alignment, we use i128. + if (Alignment < 128 && Size == 128) { + llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); + } + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); + } + + return getNaturalAlignIndirect(RetTy); +} + +/// isIllegalVectorType - check whether the vector type is legal for AArch64. +bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Check whether VT is a fixed-length SVE vector. These types are + // represented as scalable vectors in function args/return and must be + // coerced from fixed vectors. + if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || + VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) + return true; + + // Check whether VT is legal. + unsigned NumElements = VT->getNumElements(); + uint64_t Size = getContext().getTypeSize(VT); + // NumElements should be power of 2. + if (!llvm::isPowerOf2_32(NumElements)) + return true; + + // arm64_32 has to be compatible with the ARM logic here, which allows huge + // vectors for some reason. + llvm::Triple Triple = getTarget().getTriple(); + if (Triple.getArch() == llvm::Triple::aarch64_32 && + Triple.isOSBinFormatMachO()) + return Size <= 32; + + return Size != 64 && (Size != 128 || NumElements == 1); + } + return false; +} + +bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, + llvm::Type *EltTy, + unsigned NumElts) const { + if (!llvm::isPowerOf2_32(NumElts)) + return false; + if (VectorSize.getQuantity() != 8 && + (VectorSize.getQuantity() != 16 || NumElts == 1)) + return false; + return true; +} + +bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + // Homogeneous aggregates for AAPCS64 must have base types of a floating + // point type or a short-vector type. This is the same as the 32-bit ABI, + // but with the difference that any floating-point type is allowed, + // including __fp16. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + if (BT->isFloatingPoint()) + return true; + } else if (const VectorType *VT = Ty->getAs<VectorType>()) { + unsigned VecSize = getContext().getTypeSize(VT); + if (VecSize == 64 || VecSize == 128) + return true; + } + return false; +} + +bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const { + return Members <= 4; +} + +bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() + const { + // AAPCS64 says that the rule for whether something is a homogeneous + // aggregate is applied to the output of the data layout decision. So + // anything that doesn't affect the data layout also does not affect + // homogeneity. In particular, zero-length bitfields don't stop a struct + // being homogeneous. + return true; +} + +Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, + CodeGenFunction &CGF) const { + ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true, + CGF.CurFnInfo->getCallingConvention()); + // Empty records are ignored for parameter passing purposes. + if (AI.isIgnore()) { + uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; + CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); + VAListAddr = VAListAddr.withElementType(CGF.Int8PtrTy); + auto *Load = CGF.Builder.CreateLoad(VAListAddr); + return Address(Load, CGF.ConvertTypeForMem(Ty), SlotSize); + } + + bool IsIndirect = AI.isIndirect(); + + llvm::Type *BaseTy = CGF.ConvertType(Ty); + if (IsIndirect) + BaseTy = llvm::PointerType::getUnqual(BaseTy); + else if (AI.getCoerceToType()) + BaseTy = AI.getCoerceToType(); + + unsigned NumRegs = 1; + if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) { + BaseTy = ArrTy->getElementType(); + NumRegs = ArrTy->getNumElements(); + } + bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); + + // The AArch64 va_list type and handling is specified in the Procedure Call + // Standard, section B.4: + // + // struct { + // void *__stack; + // void *__gr_top; + // void *__vr_top; + // int __gr_offs; + // int __vr_offs; + // }; + + llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + + CharUnits TySize = getContext().getTypeSizeInChars(Ty); + CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty); + + Address reg_offs_p = Address::invalid(); + llvm::Value *reg_offs = nullptr; + int reg_top_index; + int RegSize = IsIndirect ? 8 : TySize.getQuantity(); + if (!IsFPR) { + // 3 is the field number of __gr_offs + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); + reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); + reg_top_index = 1; // field number for __gr_top + RegSize = llvm::alignTo(RegSize, 8); + } else { + // 4 is the field number of __vr_offs. + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); + reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); + reg_top_index = 2; // field number for __vr_top + RegSize = 16 * NumRegs; + } + + //======================================= + // Find out where argument was passed + //======================================= + + // If reg_offs >= 0 we're already using the stack for this type of + // argument. We don't want to keep updating reg_offs (in case it overflows, + // though anyone passing 2GB of arguments, each at most 16 bytes, deserves + // whatever they get). + llvm::Value *UsingStack = nullptr; + UsingStack = CGF.Builder.CreateICmpSGE( + reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0)); + + CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); + + // Otherwise, at least some kind of argument could go in these registers, the + // question is whether this particular type is too big. + CGF.EmitBlock(MaybeRegBlock); + + // Integer arguments may need to correct register alignment (for example a + // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we + // align __gr_offs to calculate the potential address. + if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) { + int Align = TyAlign.getQuantity(); + + reg_offs = CGF.Builder.CreateAdd( + reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), + "align_regoffs"); + reg_offs = CGF.Builder.CreateAnd( + reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align), + "aligned_regoffs"); + } + + // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. + // The fact that this is done unconditionally reflects the fact that + // allocating an argument to the stack also uses up all the remaining + // registers of the appropriate kind. + llvm::Value *NewOffset = nullptr; + NewOffset = CGF.Builder.CreateAdd( + reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs"); + CGF.Builder.CreateStore(NewOffset, reg_offs_p); + + // Now we're in a position to decide whether this argument really was in + // registers or not. + llvm::Value *InRegs = nullptr; + InRegs = CGF.Builder.CreateICmpSLE( + NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg"); + + CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); + + //======================================= + // Argument was in registers + //======================================= + + // Now we emit the code for if the argument was originally passed in + // registers. First start the appropriate block: + CGF.EmitBlock(InRegBlock); + + llvm::Value *reg_top = nullptr; + Address reg_top_p = + CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); + reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); + Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs), + CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8)); + Address RegAddr = Address::invalid(); + llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy; + + if (IsIndirect) { + // If it's been passed indirectly (actually a struct), whatever we find from + // stored registers or on the stack will actually be a struct **. + MemTy = llvm::PointerType::getUnqual(MemTy); + } + + const Type *Base = nullptr; + uint64_t NumMembers = 0; + bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); + if (IsHFA && NumMembers > 1) { + // Homogeneous aggregates passed in registers will have their elements split + // and stored 16-bytes apart regardless of size (they're notionally in qN, + // qN+1, ...). We reload and store into a temporary local variable + // contiguously. + assert(!IsIndirect && "Homogeneous aggregates should be passed directly"); + auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0)); + llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); + llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); + Address Tmp = CGF.CreateTempAlloca(HFATy, + std::max(TyAlign, BaseTyInfo.Align)); + + // On big-endian platforms, the value will be right-aligned in its slot. + int Offset = 0; + if (CGF.CGM.getDataLayout().isBigEndian() && + BaseTyInfo.Width.getQuantity() < 16) + Offset = 16 - BaseTyInfo.Width.getQuantity(); + + for (unsigned i = 0; i < NumMembers; ++i) { + CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset); + Address LoadAddr = + CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); + LoadAddr = LoadAddr.withElementType(BaseTy); + + Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i); + + llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); + CGF.Builder.CreateStore(Elem, StoreAddr); + } + + RegAddr = Tmp.withElementType(MemTy); + } else { + // Otherwise the object is contiguous in memory. + + // It might be right-aligned in its slot. + CharUnits SlotSize = BaseAddr.getAlignment(); + if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && + (IsHFA || !isAggregateTypeForABI(Ty)) && + TySize < SlotSize) { + CharUnits Offset = SlotSize - TySize; + BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); + } + + RegAddr = BaseAddr.withElementType(MemTy); + } + + CGF.EmitBranch(ContBlock); + + //======================================= + // Argument was on the stack + //======================================= + CGF.EmitBlock(OnStackBlock); + + Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); + llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); + + // Again, stack arguments may need realignment. In this case both integer and + // floating-point ones might be affected. + if (!IsIndirect && TyAlign.getQuantity() > 8) { + int Align = TyAlign.getQuantity(); + + OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty); + + OnStackPtr = CGF.Builder.CreateAdd( + OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), + "align_stack"); + OnStackPtr = CGF.Builder.CreateAnd( + OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align), + "align_stack"); + + OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy); + } + Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty, + std::max(CharUnits::fromQuantity(8), TyAlign)); + + // All stack slots are multiples of 8 bytes. + CharUnits StackSlotSize = CharUnits::fromQuantity(8); + CharUnits StackSize; + if (IsIndirect) + StackSize = StackSlotSize; + else + StackSize = TySize.alignTo(StackSlotSize); + + llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); + llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP( + CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack"); + + // Write the new value of __stack for the next call to va_arg + CGF.Builder.CreateStore(NewStack, stack_p); + + if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && + TySize < StackSlotSize) { + CharUnits Offset = StackSlotSize - TySize; + OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); + } + + OnStackAddr = OnStackAddr.withElementType(MemTy); + + CGF.EmitBranch(ContBlock); + + //======================================= + // Tidy up + //======================================= + CGF.EmitBlock(ContBlock); + + Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr, + OnStackBlock, "vaargs.addr"); + + if (IsIndirect) + return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy, + TyAlign); + + return ResAddr; +} + +Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, + CodeGenFunction &CGF) const { + // The backend's lowering doesn't support va_arg for aggregates or + // illegal vector types. Lower VAArg here for these cases and use + // the LLVM va_arg instruction for everything else. + if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) + return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); + + uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; + CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) + return Address(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), + CGF.ConvertTypeForMem(Ty), SlotSize); + + // The size of the actual thing passed, which might end up just + // being a pointer for indirect types. + auto TyInfo = getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 16 bytes which aren't homogeneous + // aggregates should be passed indirectly. + bool IsIndirect = false; + if (TyInfo.Width.getQuantity() > 16) { + const Type *Base = nullptr; + uint64_t Members = 0; + IsIndirect = !isHomogeneousAggregate(Ty, Base, Members); + } + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, + TyInfo, SlotSize, /*AllowHigherAlign*/ true); +} + +Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + bool IsIndirect = false; + + // Composites larger than 16 bytes are passed by reference. + if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) + IsIndirect = true; + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM, + AArch64ABIKind Kind) { + return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, + AArch64ABIKind K) { + return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K); +} diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp new file mode 100644 index 000000000000..796a2be81a09 --- /dev/null +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -0,0 +1,601 @@ +//===- AMDGPU.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// AMDGPU ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AMDGPUABIInfo final : public DefaultABIInfo { +private: + static const unsigned MaxNumRegsForArgsRet = 16; + + unsigned numRegsForType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const override; + + // Coerce HIP scalar pointer arguments from generic pointers to global ones. + llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, + unsigned ToAS) const { + // Single value types. + auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); + if (PtrTy && PtrTy->getAddressSpace() == FromAS) + return llvm::PointerType::get(Ty->getContext(), ToAS); + return Ty; + } + +public: + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : + DefaultABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; + + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return true; +} + +bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { + uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; + + // Homogeneous Aggregates may occupy at most 16 registers. + return Members * NumRegs <= MaxNumRegsForArgsRet; +} + +/// Estimate number of registers the type will use when passed in registers. +unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { + unsigned NumRegs = 0; + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Compute from the number of elements. The reported size is based on the + // in-memory size, which includes the padding 4th element for 3-vectors. + QualType EltTy = VT->getElementType(); + unsigned EltSize = getContext().getTypeSize(EltTy); + + // 16-bit element vectors should be passed as packed. + if (EltSize == 16) + return (VT->getNumElements() + 1) / 2; + + unsigned EltNumRegs = (EltSize + 31) / 32; + return EltNumRegs * VT->getNumElements(); + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + assert(!RD->hasFlexibleArrayMember()); + + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + NumRegs += numRegsForType(FieldTy); + } + + return NumRegs; + } + + return (getContext().getTypeSize(Ty) + 31) / 32; +} + +void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + llvm::CallingConv::ID CC = FI.getCallingConvention(); + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + unsigned NumRegsLeft = MaxNumRegsForArgsRet; + for (auto &Arg : FI.arguments()) { + if (CC == llvm::CallingConv::AMDGPU_KERNEL) { + Arg.info = classifyKernelArgumentType(Arg.type); + } else { + Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); + } + } +} + +Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + llvm_unreachable("AMDGPU does not support varargs"); +} + +ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { + if (isAggregateTypeForABI(RetTy)) { + // Records with non-trivial destructors/copy-constructors should not be + // returned by value. + if (!getRecordArgABI(RetTy, getCXXABI())) { + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just return a regular value. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = RetTy->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + } + + // Pack aggregates <= 4 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + if (Size <= 64) { + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) + return ABIArgInfo::getDirect(); + } + } + + // Otherwise just do the default thing. + return DefaultABIInfo::classifyReturnType(RetTy); +} + +/// For kernels all parameters are really passed in a special buffer. It doesn't +/// make sense to pass anything byval, so everything must be direct. +ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + Ty = QualType(SeltTy, 0); + + llvm::Type *OrigLTy = CGT.ConvertType(Ty); + llvm::Type *LTy = OrigLTy; + if (getContext().getLangOpts().HIP) { + LTy = coerceKernelArgumentType( + OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), + /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); + } + + // FIXME: Should also use this for OpenCL, but it requires addressing the + // problem of kernels being called. + // + // FIXME: This doesn't apply the optimization of coercing pointers in structs + // to global address space when using byref. This would require implementing a + // new kind of coercion of the in-memory type when for indirect arguments. + if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && + isAggregateTypeForABI(Ty)) { + return ABIArgInfo::getIndirectAliased( + getContext().getTypeAlignInChars(Ty), + getContext().getTargetAddressSpace(LangAS::opencl_constant), + false /*Realign*/, nullptr /*Padding*/); + } + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); +} + +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, + unsigned &NumRegsLeft) const { + assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Pack aggregates <= 8 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // XXX: Should this be i64 instead, and should the limit increase? + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (NumRegsLeft > 0) { + unsigned NumRegs = numRegsForType(Ty); + if (NumRegsLeft >= NumRegs) { + NumRegsLeft -= NumRegs; + return ABIArgInfo::getDirect(); + } + } + } + + // Otherwise just do the default thing. + ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); + if (!ArgInfo.isIndirect()) { + unsigned NumRegs = numRegsForType(Ty); + NumRegsLeft -= std::min(NumRegs, NumRegsLeft); + } + + return ArgInfo; +} + +class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {} + + void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, + CodeGenModule &CGM) const; + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; + unsigned getOpenCLKernelCallingConv() const override; + + llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, + llvm::PointerType *T, QualType QT) const override; + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; + llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Type *BlockTy) const override; + bool shouldEmitStaticExternCAliases() const override; + bool shouldEmitDWARFBitFieldSeparators() const override; + void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; +}; +} + +static bool requiresAMDGPUProtectedVisibility(const Decl *D, + llvm::GlobalValue *GV) { + if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) + return false; + + return D->hasAttr<OpenCLKernelAttr>() || + (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || + (isa<VarDecl>(D) && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || + cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())); +} + +void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( + const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { + const auto *ReqdWGS = + M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; + const bool IsOpenCLKernel = + M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); + const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); + + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); + if (ReqdWGS || FlatWGS) { + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + Max = FlatWGS->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + } + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); + + if (Min != 0) { + assert(Min <= Max && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); + } else if (IsOpenCLKernel || IsHIPKernel) { + // By default, restrict the maximum size to a value specified by + // --gpu-max-threads-per-block=n or its default value for HIP. + const unsigned OpenCLDefaultMaxWorkGroupSize = 256; + const unsigned DefaultMaxWorkGroupSize = + IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize + : M.getLangOpts().GPUMaxThreadsPerBlock; + std::string AttrVal = + std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } + + if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + unsigned Max = Attr->getMax() ? Attr->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 0; + + if (Min != 0) { + assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min); + if (Max != 0) + AttrVal = AttrVal + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); + } + + if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { + unsigned NumSGPR = Attr->getNumSGPR(); + + if (NumSGPR != 0) + F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); + } + + if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { + uint32_t NumVGPR = Attr->getNumVGPR(); + + if (NumVGPR != 0) + F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); + } +} + +void AMDGPUTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (requiresAMDGPUProtectedVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); + GV->setDSOLocal(true); + } + + if (GV->isDeclaration()) + return; + + llvm::Function *F = dyn_cast<llvm::Function>(GV); + if (!F) + return; + + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (FD) + setFunctionDeclAttributes(FD, F, M); + + const bool IsHIPKernel = + M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>(); + + // TODO: This should be moved to language specific attributes instead. + if (IsHIPKernel) + F->addFnAttr("uniform-work-group-size", "true"); + + if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) + F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); + + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) + F->addFnAttr("amdgpu-ieee", "false"); +} + +unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::AMDGPU_KERNEL; +} + +// Currently LLVM assumes null pointers always have value 0, +// which results in incorrectly transformed IR. Therefore, instead of +// emitting null pointers in private and local address spaces, a null +// pointer in generic address space is emitted which is casted to a +// pointer in local or private address space. +llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( + const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, + QualType QT) const { + if (CGM.getContext().getTargetNullPointerValue(QT) == 0) + return llvm::ConstantPointerNull::get(PT); + + auto &Ctx = CGM.getContext(); + auto NPT = llvm::PointerType::get( + PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); + return llvm::ConstantExpr::getAddrSpaceCast( + llvm::ConstantPointerNull::get(NPT), PT); +} + +LangAS +AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + // Only promote to address space 4 if VarDecl has constant initialization. + if (CGM.isTypeConstant(D->getType(), false, false) && + D->hasConstantInitialization()) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return *ConstAS; + } + return DefaultGlobalAS; +} + +llvm::SyncScope::ID +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + std::string Name; + switch (Scope) { + case SyncScope::HIPSingleThread: + Name = "singlethread"; + break; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + Name = "wavefront"; + break; + case SyncScope::HIPWorkgroup: + case SyncScope::OpenCLWorkGroup: + Name = "workgroup"; + break; + case SyncScope::HIPAgent: + case SyncScope::OpenCLDevice: + Name = "agent"; + break; + case SyncScope::HIPSystem: + case SyncScope::OpenCLAllSVMDevices: + Name = ""; + break; + } + + if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { + if (!Name.empty()) + Name = Twine(Twine(Name) + Twine("-")).str(); + + Name = Twine(Twine(Name) + Twine("one-as")).str(); + } + + return Ctx.getOrInsertSyncScopeID(Name); +} + +bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} + +bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { + return true; +} + +void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( + const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); +} + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The type of the first argument (the block literal) is the struct type +/// of the block literal instead of a pointer type. The first argument +/// (block literal) is passed directly by value to the kernel. The kernel +/// allocates the same type of struct on stack and stores the block literal +/// to it and passes its pointer to the block invoke function. The kernel +/// has "enqueued-block" function attribute and kernel argument metadata. +llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( + CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy) const { + auto &Builder = CGF.Builder; + auto &C = CGF.getLLVMContext(); + + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; + llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgNames; + + ArgTys.push_back(BlockTy); + ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgNames.push_back(llvm::MDString::get(C, "block_literal")); + for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { + ArgTys.push_back(InvokeFT->getParamType(I)); + ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + ArgNames.push_back( + llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); + } + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + + llvm::AttrBuilder KernelAttrs(C); + // FIXME: The invoke isn't applying the right attributes either + // FIXME: This is missing setTargetAttributes + CGF.CGM.addDefaultFunctionDefinitionAttributes(KernelAttrs); + KernelAttrs.addAttribute("enqueued-block"); + F->addFnAttrs(KernelAttrs); + + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + Builder.SetInsertPoint(BB); + const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); + auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); + BlockPtr->setAlignment(BlockAlign); + Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); + auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); + llvm::SmallVector<llvm::Value *, 2> Args; + Args.push_back(Cast); + for (llvm::Argument &A : llvm::drop_begin(F->args())) + Args.push_back(&A); + llvm::CallInst *call = Builder.CreateCall(Invoke, Args); + call->setCallingConv(Invoke->getCallingConv()); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + + F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); + F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); + F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); + F->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(C, ArgBaseTypeNames)); + F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); + if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) + F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); + + return F; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/ARC.cpp b/clang/lib/CodeGen/Targets/ARC.cpp new file mode 100644 index 000000000000..550eb4068f25 --- /dev/null +++ b/clang/lib/CodeGen/Targets/ARC.cpp @@ -0,0 +1,158 @@ +//===- ARC.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +// ARC ABI implementation. +namespace { + +class ARCABIInfo : public DefaultABIInfo { + struct CCState { + unsigned FreeRegs; + }; + +public: + using DefaultABIInfo::DefaultABIInfo; + +private: + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const { + if (!State.FreeRegs) + return; + if (Info.isIndirect() && Info.getInReg()) + State.FreeRegs--; + else if (Info.isDirect() && Info.getInReg()) { + unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32; + if (sz < State.FreeRegs) + State.FreeRegs -= sz; + else + State.FreeRegs = 0; + } + } + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State; + // ARC uses 8 registers to pass arguments. + State.FreeRegs = 8; + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + updateState(FI.getReturnInfo(), FI.getReturnType(), State); + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State.FreeRegs); + updateState(I.info, I.type, State); + } + } + + ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const; + ABIArgInfo getIndirectByValue(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; +}; + +class ARCTargetCodeGenInfo : public TargetCodeGenInfo { +public: + ARCTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {} +}; + + +ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const { + return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) : + getNaturalAlignIndirect(Ty, false); +} + +ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + TypeAlign > MinABIStackAlignInBytes); +} + +Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), true); +} + +ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, + uint8_t FreeRegs) const { + // Handle the generic C++ ABI. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) + return getIndirectByRef(Ty, FreeRegs > 0); + + if (RAA == CGCXXABI::RAA_DirectInMemory) + return getIndirectByValue(Ty); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32; + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectByValue(Ty); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + + return FreeRegs >= SizeInRegs ? + ABIArgInfo::getDirectInReg(Result) : + ABIArgInfo::getDirect(Result, 0, nullptr, false); + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > 64) + return getIndirectByValue(Ty); + + return isPromotableIntegerTypeForABI(Ty) + ? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) + : ABIArgInfo::getExtend(Ty)) + : (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() + : ABIArgInfo::getDirect()); +} + +ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirectInReg(); + + // Arguments of size > 4 registers are indirect. + auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32; + if (RetSize > 4) + return getIndirectByRef(RetTy, /*HasFreeRegs*/ true); + + return DefaultABIInfo::classifyReturnType(RetTy); +} + +} // End anonymous namespace. + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createARCTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<ARCTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp new file mode 100644 index 000000000000..d7d175ff1724 --- /dev/null +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -0,0 +1,819 @@ +//===- ARM.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// ARM ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class ARMABIInfo : public ABIInfo { + ARMABIKind Kind; + bool IsFloatABISoftFP; + +public: + ARMABIInfo(CodeGenTypes &CGT, ARMABIKind Kind) : ABIInfo(CGT), Kind(Kind) { + setCCs(); + IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" || + CGT.getCodeGenOpts().FloatABI == ""; // default + } + + bool isEABI() const { + switch (getTarget().getTriple().getEnvironment()) { + case llvm::Triple::Android: + case llvm::Triple::EABI: + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABI: + case llvm::Triple::MuslEABIHF: + return true; + default: + return getTarget().getTriple().isOHOSFamily(); + } + } + + bool isEABIHF() const { + switch (getTarget().getTriple().getEnvironment()) { + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABIHF: + case llvm::Triple::MuslEABIHF: + return true; + default: + return false; + } + } + + ARMABIKind getABIKind() const { return Kind; } + + bool allowBFloatArgsAndRet() const override { + return !IsFloatABISoftFP && getTarget().hasBFloat16Type(); + } + +private: + ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; + ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; + ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, + uint64_t Members) const; + ABIArgInfo coerceIllegalVector(QualType Ty) const; + bool isIllegalVectorType(QualType Ty) const; + bool containsAnyFP16Vectors(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t Members) const override; + bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; + + bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; + + void computeInfo(CGFunctionInfo &FI) const override; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + llvm::CallingConv::ID getLLVMDefaultCC() const; + llvm::CallingConv::ID getABIDefaultCC() const; + void setCCs(); +}; + +class ARMSwiftABIInfo : public SwiftABIInfo { +public: + explicit ARMSwiftABIInfo(CodeGenTypes &CGT) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} + + bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const override; +}; + +class ARMTargetCodeGenInfo : public TargetCodeGenInfo { +public: + ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIKind K) + : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) { + SwiftInfo = std::make_unique<ARMSwiftABIInfo>(CGT); + } + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + return 13; + } + + StringRef getARCRetainAutoreleasedReturnValueMarker() const override { + return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); + + // 0-15 are the 16 integer registers. + AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15); + return false; + } + + unsigned getSizeOfUnwindException() const override { + if (getABIInfo<ARMABIInfo>().isEABI()) + return 88; + return TargetCodeGenInfo::getSizeOfUnwindException(); + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) + return; + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + auto *Fn = cast<llvm::Function>(GV); + + if (const auto *TA = FD->getAttr<TargetAttr>()) { + ParsedTargetAttr Attr = + CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); + if (!Attr.BranchProtection.empty()) { + TargetInfo::BranchProtectionInfo BPI; + StringRef DiagMsg; + StringRef Arch = + Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU; + if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + Arch, BPI, DiagMsg)) { + CGM.getDiags().Report( + D->getLocation(), + diag::warn_target_unsupported_branch_protection_attribute) + << Arch; + } else { + static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; + assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 && + "Unexpected SignReturnAddressScopeKind"); + Fn->addFnAttr( + "sign-return-address", + SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); + + Fn->addFnAttr("branch-target-enforcement", + BPI.BranchTargetEnforcement ? "true" : "false"); + } + } else if (CGM.getLangOpts().BranchTargetEnforcement || + CGM.getLangOpts().hasSignReturnAddress()) { + // If the Branch Protection attribute is missing, validate the target + // Architecture attribute against Branch Protection command line + // settings. + if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.CPU)) + CGM.getDiags().Report( + D->getLocation(), + diag::warn_target_unsupported_branch_protection_attribute) + << Attr.CPU; + } + } + + const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>(); + if (!Attr) + return; + + const char *Kind; + switch (Attr->getInterrupt()) { + case ARMInterruptAttr::Generic: Kind = ""; break; + case ARMInterruptAttr::IRQ: Kind = "IRQ"; break; + case ARMInterruptAttr::FIQ: Kind = "FIQ"; break; + case ARMInterruptAttr::SWI: Kind = "SWI"; break; + case ARMInterruptAttr::ABORT: Kind = "ABORT"; break; + case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; + } + + Fn->addFnAttr("interrupt", Kind); + + ARMABIKind ABI = getABIInfo<ARMABIInfo>().getABIKind(); + if (ABI == ARMABIKind::APCS) + return; + + // AAPCS guarantees that sp will be 8-byte aligned on any public interface, + // however this is not necessarily true on taking any interrupt. Instruct + // the backend to perform a realignment as part of the function prologue. + llvm::AttrBuilder B(Fn->getContext()); + B.addStackAlignmentAttr(8); + Fn->addFnAttrs(B); + } +}; + +class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo { +public: + WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIKind K) + : ARMTargetCodeGenInfo(CGT, K) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; + +void WindowsARMTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); +} +} + +void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!::classifyReturnType(getCXXABI(), FI, *this)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), + FI.getCallingConvention()); + + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, FI.isVariadic(), + FI.getCallingConvention()); + + + // Always honor user-specified calling convention. + if (FI.getCallingConvention() != llvm::CallingConv::C) + return; + + llvm::CallingConv::ID cc = getRuntimeCC(); + if (cc != llvm::CallingConv::C) + FI.setEffectiveCallingConvention(cc); +} + +/// Return the default calling convention that LLVM will use. +llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { + // The default calling convention that LLVM will infer. + if (isEABIHF() || getTarget().getTriple().isWatchABI()) + return llvm::CallingConv::ARM_AAPCS_VFP; + else if (isEABI()) + return llvm::CallingConv::ARM_AAPCS; + else + return llvm::CallingConv::ARM_APCS; +} + +/// Return the calling convention that our ABI would like us to use +/// as the C calling convention. +llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { + switch (getABIKind()) { + case ARMABIKind::APCS: + return llvm::CallingConv::ARM_APCS; + case ARMABIKind::AAPCS: + return llvm::CallingConv::ARM_AAPCS; + case ARMABIKind::AAPCS_VFP: + return llvm::CallingConv::ARM_AAPCS_VFP; + case ARMABIKind::AAPCS16_VFP: + return llvm::CallingConv::ARM_AAPCS_VFP; + } + llvm_unreachable("bad ABI kind"); +} + +void ARMABIInfo::setCCs() { + assert(getRuntimeCC() == llvm::CallingConv::C); + + // Don't muddy up the IR with a ton of explicit annotations if + // they'd just match what LLVM will infer from the triple. + llvm::CallingConv::ID abiCC = getABIDefaultCC(); + if (abiCC != getLLVMDefaultCC()) + RuntimeCC = abiCC; +} + +ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 32) { + llvm::Type *ResType = + llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 64 || Size == 128) { + auto *ResType = llvm::FixedVectorType::get( + llvm::Type::getInt32Ty(getVMContext()), Size / 32); + return ABIArgInfo::getDirect(ResType); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, + const Type *Base, + uint64_t Members) const { + assert(Base && "Base class should be set for homogeneous aggregate"); + // Base can be a floating-point or a vector. + if (const VectorType *VT = Base->getAs<VectorType>()) { + // FP16 vectors should be converted to integer vectors + if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { + uint64_t Size = getContext().getTypeSize(VT); + auto *NewVecTy = llvm::FixedVectorType::get( + llvm::Type::getInt32Ty(getVMContext()), Size / 32); + llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); + return ABIArgInfo::getDirect(Ty, 0, nullptr, false); + } + } + unsigned Align = 0; + if (getABIKind() == ARMABIKind::AAPCS || + getABIKind() == ARMABIKind::AAPCS_VFP) { + // For alignment adjusted HFAs, cap the argument alignment to 8, leave it + // default otherwise. + Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); + unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); + Align = (Align > BaseAlign && Align >= 8) ? 8 : 0; + } + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align); +} + +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, + unsigned functionCallConv) const { + // 6.1.2.1 The following argument types are VFP CPRCs: + // A single-precision floating-point type (including promoted + // half-precision types); A double-precision floating-point type; + // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate + // with a Base Type of a single- or double-precision floating-point type, + // 64-bit containerized vectors or 128-bit containerized vectors with one + // to four Elements. + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Handle illegal vector types here. + if (isIllegalVectorType(Ty)) + return coerceIllegalVector(Ty); + + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) { + Ty = EnumTy->getDecl()->getIntegerType(); + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); + } + + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty records. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + if (IsAAPCS_VFP) { + // Homogeneous Aggregates need to be expanded when we can fit the aggregate + // into VFP registers. + const Type *Base = nullptr; + uint64_t Members = 0; + if (isHomogeneousAggregate(Ty, Base, Members)) + return classifyHomogeneousAggregate(Ty, Base, Members); + } else if (getABIKind() == ARMABIKind::AAPCS16_VFP) { + // WatchOS does have homogeneous aggregates. Note that we intentionally use + // this convention even for a variadic function: the backend will use GPRs + // if needed. + const Type *Base = nullptr; + uint64_t Members = 0; + if (isHomogeneousAggregate(Ty, Base, Members)) { + assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); + llvm::Type *Ty = + llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); + return ABIArgInfo::getDirect(Ty, 0, nullptr, false); + } + } + + if (getABIKind() == ARMABIKind::AAPCS16_VFP && + getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) { + // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're + // bigger than 128-bits, they get placed in space allocated by the caller, + // and a pointer is passed. + return ABIArgInfo::getIndirect( + CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); + } + + // Support byval for ARM. + // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at + // most 8-byte. We realign the indirect argument if type alignment is bigger + // than ABI alignment. + uint64_t ABIAlign = 4; + uint64_t TyAlign; + if (getABIKind() == ARMABIKind::AAPCS_VFP || + getABIKind() == ARMABIKind::AAPCS) { + TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); + ABIAlign = std::clamp(TyAlign, (uint64_t)4, (uint64_t)8); + } else { + TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); + } + if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { + assert(getABIKind() != ARMABIKind::AAPCS16_VFP && "unexpected byval"); + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), + /*ByVal=*/true, + /*Realign=*/TyAlign > ABIAlign); + } + + // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(Ty, getContext(), getVMContext()); + } + + // Otherwise, pass by coercing to a structure of the appropriate size. + llvm::Type* ElemTy; + unsigned SizeRegs; + // FIXME: Try to match the types of the arguments more accurately where + // we can. + if (TyAlign <= 4) { + ElemTy = llvm::Type::getInt32Ty(getVMContext()); + SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; + } else { + ElemTy = llvm::Type::getInt64Ty(getVMContext()); + SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; + } + + return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs)); +} + +static bool isIntegerLikeType(QualType Ty, ASTContext &Context, + llvm::LLVMContext &VMContext) { + // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure + // is called integer-like if its size is less than or equal to one word, and + // the offset of each of its addressable sub-fields is zero. + + uint64_t Size = Context.getTypeSize(Ty); + + // Check that the type fits in a word. + if (Size > 32) + return false; + + // FIXME: Handle vector types! + if (Ty->isVectorType()) + return false; + + // Float types are never treated as "integer like". + if (Ty->isRealFloatingType()) + return false; + + // If this is a builtin or pointer type then it is ok. + if (Ty->getAs<BuiltinType>() || Ty->isPointerType()) + return true; + + // Small complex integer types are "integer like". + if (const ComplexType *CT = Ty->getAs<ComplexType>()) + return isIntegerLikeType(CT->getElementType(), Context, VMContext); + + // Single element and zero sized arrays should be allowed, by the definition + // above, but they are not. + + // Otherwise, it must be a record type. + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) return false; + + // Ignore records with flexible arrays. + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return false; + + // Check that all sub-fields are at offset 0, and are themselves "integer + // like". + const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); + + bool HadField = false; + unsigned idx = 0; + for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); + i != e; ++i, ++idx) { + const FieldDecl *FD = *i; + + // Bit-fields are not addressable, we only need to verify they are "integer + // like". We still have to disallow a subsequent non-bitfield, for example: + // struct { int : 0; int x } + // is non-integer like according to gcc. + if (FD->isBitField()) { + if (!RD->isUnion()) + HadField = true; + + if (!isIntegerLikeType(FD->getType(), Context, VMContext)) + return false; + + continue; + } + + // Check if this field is at offset 0. + if (Layout.getFieldOffset(idx) != 0) + return false; + + if (!isIntegerLikeType(FD->getType(), Context, VMContext)) + return false; + + // Only allow at most one field in a structure. This doesn't match the + // wording above, but follows gcc in situations with a field following an + // empty structure. + if (!RD->isUnion()) { + if (HadField) + return false; + + HadField = true; + } + } + + return true; +} + +ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const { + + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); + + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (const VectorType *VT = RetTy->getAs<VectorType>()) { + // Large vector types should be returned via memory. + if (getContext().getTypeSize(RetTy) > 128) + return getNaturalAlignIndirect(RetTy); + // TODO: FP16/BF16 vectors should be converted to integer vectors + // This check is similar to isIllegalVectorType - refactor? + if ((!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) || + (IsFloatABISoftFP && + VT->getElementType()->isBFloat16Type())) + return coerceIllegalVector(RetTy); + } + + if (!isAggregateTypeForABI(RetTy)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); + + return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect(); + } + + // Are we following APCS? + if (getABIKind() == ARMABIKind::APCS) { + if (isEmptyRecord(getContext(), RetTy, false)) + return ABIArgInfo::getIgnore(); + + // Complex types are all returned as packed integers. + // + // FIXME: Consider using 2 x vector types if the back end handles them + // correctly. + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirect(llvm::IntegerType::get( + getVMContext(), getContext().getTypeSize(RetTy))); + + // Integer like structures are returned in r0. + if (isIntegerLikeType(RetTy, getContext(), getVMContext())) { + // Return in the smallest viable integer type. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 8) + return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + } + + // Otherwise return in memory. + return getNaturalAlignIndirect(RetTy); + } + + // Otherwise this is an AAPCS variant. + + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Check for homogeneous aggregates with AAPCS-VFP. + if (IsAAPCS_VFP) { + const Type *Base = nullptr; + uint64_t Members = 0; + if (isHomogeneousAggregate(RetTy, Base, Members)) + return classifyHomogeneousAggregate(RetTy, Base, Members); + } + + // Aggregates <= 4 bytes are returned in r0; other aggregates + // are returned indirectly. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 32) { + // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(RetTy, getContext(), getVMContext()); + } + if (getDataLayout().isBigEndian()) + // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // Return in the smallest viable integer type. + if (Size <= 8) + return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + } else if (Size <= 128 && getABIKind() == ARMABIKind::AAPCS16_VFP) { + llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); + llvm::Type *CoerceTy = + llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); + return ABIArgInfo::getDirect(CoerceTy); + } + + return getNaturalAlignIndirect(RetTy); +} + +/// isIllegalVector - check whether Ty is an illegal vector type. +bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { + if (const VectorType *VT = Ty->getAs<VectorType> ()) { + // On targets that don't support half, fp16 or bfloat, they are expanded + // into float, and we don't want the ABI to depend on whether or not they + // are supported in hardware. Thus return false to coerce vectors of these + // types into integer vectors. + // We do not depend on hasLegalHalfType for bfloat as it is a + // separate IR type. + if ((!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) || + (IsFloatABISoftFP && + VT->getElementType()->isBFloat16Type())) + return true; + if (isAndroid()) { + // Android shipped using Clang 3.1, which supported a slightly different + // vector ABI. The primary differences were that 3-element vector types + // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path + // accepts that legacy behavior for Android only. + // Check whether VT is legal. + unsigned NumElements = VT->getNumElements(); + // NumElements should be power of 2 or equal to 3. + if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3) + return true; + } else { + // Check whether VT is legal. + unsigned NumElements = VT->getNumElements(); + uint64_t Size = getContext().getTypeSize(VT); + // NumElements should be power of 2. + if (!llvm::isPowerOf2_32(NumElements)) + return true; + // Size should be greater than 32 bits. + return Size <= 32; + } + } + return false; +} + +/// Return true if a type contains any 16-bit floating point vectors +bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { + uint64_t NElements = AT->getSize().getZExtValue(); + if (NElements == 0) + return false; + return containsAnyFP16Vectors(AT->getElementType()); + } else if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) { + return containsAnyFP16Vectors(B.getType()); + })) + return true; + + if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) { + return FD && containsAnyFP16Vectors(FD->getType()); + })) + return true; + + return false; + } else { + if (const VectorType *VT = Ty->getAs<VectorType>()) + return (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isBFloat16Type() || + VT->getElementType()->isHalfType()); + return false; + } +} + +bool ARMSwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, + unsigned NumElts) const { + if (!llvm::isPowerOf2_32(NumElts)) + return false; + unsigned size = CGT.getDataLayout().getTypeStoreSizeInBits(EltTy); + if (size > 64) + return false; + if (VectorSize.getQuantity() != 8 && + (VectorSize.getQuantity() != 16 || NumElts == 1)) + return false; + return true; +} + +bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + // Homogeneous aggregates for AAPCS-VFP must have base types of float, + // double, or 64-bit or 128-bit vectors. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + if (BT->getKind() == BuiltinType::Float || + BT->getKind() == BuiltinType::Double || + BT->getKind() == BuiltinType::LongDouble) + return true; + } else if (const VectorType *VT = Ty->getAs<VectorType>()) { + unsigned VecSize = getContext().getTypeSize(VT); + if (VecSize == 64 || VecSize == 128) + return true; + } + return false; +} + +bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const { + return Members <= 4; +} + +bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { + // AAPCS32 says that the rule for whether something is a homogeneous + // aggregate is applied to the output of the data layout decision. So + // anything that doesn't affect the data layout also does not affect + // homogeneity. In particular, zero-length bitfields don't stop a struct + // being homogeneous. + return true; +} + +bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, + bool acceptHalf) const { + // Give precedence to user-specified calling conventions. + if (callConvention != llvm::CallingConv::C) + return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); + else + return (getABIKind() == ARMABIKind::AAPCS_VFP) || + (acceptHalf && (getABIKind() == ARMABIKind::AAPCS16_VFP)); +} + +Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(4); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + VAListAddr = VAListAddr.withElementType(CGF.Int8PtrTy); + auto *Load = CGF.Builder.CreateLoad(VAListAddr); + return Address(Load, CGF.ConvertTypeForMem(Ty), SlotSize); + } + + CharUnits TySize = getContext().getTypeSizeInChars(Ty); + CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty); + + // Use indirect if size of the illegal vector is bigger than 16 bytes. + bool IsIndirect = false; + const Type *Base = nullptr; + uint64_t Members = 0; + if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { + IsIndirect = true; + + // ARMv7k passes structs bigger than 16 bytes indirectly, in space + // allocated by the caller. + } else if (TySize > CharUnits::fromQuantity(16) && + getABIKind() == ARMABIKind::AAPCS16_VFP && + !isHomogeneousAggregate(Ty, Base, Members)) { + IsIndirect = true; + + // Otherwise, bound the type's ABI alignment. + // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for + // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. + // Our callers should be prepared to handle an under-aligned address. + } else if (getABIKind() == ARMABIKind::AAPCS_VFP || + getABIKind() == ARMABIKind::AAPCS) { + TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); + TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8)); + } else if (getABIKind() == ARMABIKind::AAPCS16_VFP) { + // ARMv7k allows type alignment up to 16 bytes. + TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); + TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16)); + } else { + TyAlignForABI = CharUnits::fromQuantity(4); + } + + TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None); + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, + SlotSize, /*AllowHigherAlign*/ true); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createARMTargetCodeGenInfo(CodeGenModule &CGM, ARMABIKind Kind) { + return std::make_unique<ARMTargetCodeGenInfo>(CGM.getTypes(), Kind); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createWindowsARMTargetCodeGenInfo(CodeGenModule &CGM, ARMABIKind K) { + return std::make_unique<WindowsARMTargetCodeGenInfo>(CGM.getTypes(), K); +} diff --git a/clang/lib/CodeGen/Targets/AVR.cpp b/clang/lib/CodeGen/Targets/AVR.cpp new file mode 100644 index 000000000000..50547dd6dec5 --- /dev/null +++ b/clang/lib/CodeGen/Targets/AVR.cpp @@ -0,0 +1,154 @@ +//===- AVR.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" +#include "clang/Basic/DiagnosticFrontend.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// AVR ABI Implementation. Documented at +// https://gcc.gnu.org/wiki/avr-gcc#Calling_Convention +// https://gcc.gnu.org/wiki/avr-gcc#Reduced_Tiny +//===----------------------------------------------------------------------===// + +namespace { +class AVRABIInfo : public DefaultABIInfo { +private: + // The total amount of registers can be used to pass parameters. It is 18 on + // AVR, or 6 on AVRTiny. + const unsigned ParamRegs; + // The total amount of registers can be used to pass return value. It is 8 on + // AVR, or 4 on AVRTiny. + const unsigned RetRegs; + +public: + AVRABIInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR) + : DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {} + + ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const { + // On AVR, a return struct with size less than or equals to 8 bytes is + // returned directly via registers R18-R25. On AVRTiny, a return struct + // with size less than or equals to 4 bytes is returned directly via + // registers R22-R25. + if (isAggregateTypeForABI(Ty) && + getContext().getTypeSize(Ty) <= RetRegs * 8) + return ABIArgInfo::getDirect(); + // A return value (struct or scalar) with larger size is returned via a + // stack slot, along with a pointer as the function's implicit argument. + if (getContext().getTypeSize(Ty) > RetRegs * 8) { + LargeRet = true; + return getNaturalAlignIndirect(Ty); + } + // An i8 return value should not be extended to i16, since AVR has 8-bit + // registers. + if (Ty->isIntegralOrEnumerationType() && getContext().getTypeSize(Ty) <= 8) + return ABIArgInfo::getDirect(); + // Otherwise we follow the default way which is compatible. + return DefaultABIInfo::classifyReturnType(Ty); + } + + ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegs) const { + unsigned TySize = getContext().getTypeSize(Ty); + + // An int8 type argument always costs two registers like an int16. + if (TySize == 8 && NumRegs >= 2) { + NumRegs -= 2; + return ABIArgInfo::getExtend(Ty); + } + + // If the argument size is an odd number of bytes, round up the size + // to the next even number. + TySize = llvm::alignTo(TySize, 16); + + // Any type including an array/struct type can be passed in rgisters, + // if there are enough registers left. + if (TySize <= NumRegs * 8) { + NumRegs -= TySize / 8; + return ABIArgInfo::getDirect(); + } + + // An argument is passed either completely in registers or completely in + // memory. Since there are not enough registers left, current argument + // and all other unprocessed arguments should be passed in memory. + // However we still need to return `ABIArgInfo::getDirect()` other than + // `ABIInfo::getNaturalAlignIndirect(Ty)`, otherwise an extra stack slot + // will be allocated, so the stack frame layout will be incompatible with + // avr-gcc. + NumRegs = 0; + return ABIArgInfo::getDirect(); + } + + void computeInfo(CGFunctionInfo &FI) const override { + // Decide the return type. + bool LargeRet = false; + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), LargeRet); + + // Decide each argument type. The total number of registers can be used for + // arguments depends on several factors: + // 1. Arguments of varargs functions are passed on the stack. This applies + // even to the named arguments. So no register can be used. + // 2. Total 18 registers can be used on avr and 6 ones on avrtiny. + // 3. If the return type is a struct with too large size, two registers + // (out of 18/6) will be cost as an implicit pointer argument. + unsigned NumRegs = ParamRegs; + if (FI.isVariadic()) + NumRegs = 0; + else if (LargeRet) + NumRegs -= 2; + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, NumRegs); + } +}; + +class AVRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AVRTargetCodeGenInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR) + : TargetCodeGenInfo(std::make_unique<AVRABIInfo>(CGT, NPR, NRR)) {} + + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override { + // Check if global/static variable is defined in address space + // 1~6 (__flash, __flash1, __flash2, __flash3, __flash4, __flash5) + // but not constant. + if (D) { + LangAS AS = D->getType().getAddressSpace(); + if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) && + toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified()) + CGM.getDiags().Report(D->getLocation(), + diag::err_verify_nonconst_addrspace) + << "__flash*"; + } + return TargetCodeGenInfo::getGlobalVarAddressSpace(CGM, D); + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) + return; + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + auto *Fn = cast<llvm::Function>(GV); + + if (FD->getAttr<AVRInterruptAttr>()) + Fn->addFnAttr("interrupt"); + + if (FD->getAttr<AVRSignalAttr>()) + Fn->addFnAttr("signal"); + } +}; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createAVRTargetCodeGenInfo(CodeGenModule &CGM, unsigned NPR, + unsigned NRR) { + return std::make_unique<AVRTargetCodeGenInfo>(CGM.getTypes(), NPR, NRR); +} diff --git a/clang/lib/CodeGen/Targets/BPF.cpp b/clang/lib/CodeGen/Targets/BPF.cpp new file mode 100644 index 000000000000..2849222f7a18 --- /dev/null +++ b/clang/lib/CodeGen/Targets/BPF.cpp @@ -0,0 +1,100 @@ +//===- BPF.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// BPF ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class BPFABIInfo : public DefaultABIInfo { +public: + BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + ABIArgInfo classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + uint64_t Bits = getContext().getTypeSize(Ty); + if (Bits == 0) + return ABIArgInfo::getIgnore(); + + // If the aggregate needs 1 or 2 registers, do not use reference. + if (Bits <= 128) { + llvm::Type *CoerceTy; + if (Bits <= 64) { + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); + } else { + llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), 64); + CoerceTy = llvm::ArrayType::get(RegTy, 2); + } + return ABIArgInfo::getDirect(CoerceTy); + } else { + return getNaturalAlignIndirect(Ty); + } + } + + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + ASTContext &Context = getContext(); + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); + } + + ABIArgInfo classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + ASTContext &Context = getContext(); + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) + return getNaturalAlignIndirect(RetTy); + + // Caller will do necessary sign/zero extension. + return ABIArgInfo::getDirect(); + } + + void computeInfo(CGFunctionInfo &FI) const override { + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + +}; + +class BPFTargetCodeGenInfo : public TargetCodeGenInfo { +public: + BPFTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<BPFABIInfo>(CGT)) {} +}; + +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createBPFTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<BPFTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/CSKY.cpp b/clang/lib/CodeGen/Targets/CSKY.cpp new file mode 100644 index 000000000000..924eced700e1 --- /dev/null +++ b/clang/lib/CodeGen/Targets/CSKY.cpp @@ -0,0 +1,175 @@ +//===- CSKY.cpp -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// CSKY ABI Implementation +//===----------------------------------------------------------------------===// +namespace { +class CSKYABIInfo : public DefaultABIInfo { + static const int NumArgGPRs = 4; + static const int NumArgFPRs = 4; + + static const unsigned XLen = 32; + unsigned FLen; + +public: + CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) + : DefaultABIInfo(CGT), FLen(FLen) {} + + void computeInfo(CGFunctionInfo &FI) const override; + ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft, + int &ArgFPRsLeft, + bool isReturnType = false) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +} // end anonymous namespace + +void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + + // We must track the number of GPRs used in order to conform to the CSKY + // ABI, as integer scalars passed in registers should have signext/zeroext + // when promoted. + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + + for (auto &ArgInfo : FI.arguments()) { + ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft); + } +} + +Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + return Address(CGF.Builder.CreateLoad(VAListAddr), + CGF.ConvertTypeForMem(Ty), SlotSize); + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize, + /*AllowHigherAlign=*/true); +} + +ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft, + int &ArgFPRsLeft, + bool isReturnType) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + if (!Ty->getAsUnionType()) + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + uint64_t Size = getContext().getTypeSize(Ty); + // Pass floating point values via FPRs if possible. + if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size && + ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (Ty->isComplexType() && FLen && !isReturnType) { + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to XLen width, unless passed on the + // stack. + if (Size < XLen && Ty->isIntegralOrEnumerationType()) + return ABIArgInfo::getExtend(Ty); + + if (const auto *EIT = Ty->getAs<BitIntType>()) { + if (EIT->getNumBits() < XLen) + return ABIArgInfo::getExtend(Ty); + } + + return ABIArgInfo::getDirect(); + } + + // For argument type, the first 4*XLen parts of aggregate will be passed + // in registers, and the rest will be passed in stack. + // So we can coerce to integers directly and let backend handle it correctly. + // For return type, aggregate which <= 2*XLen will be returned in registers. + // Otherwise, aggregate will be returned indirectly. + if (!isReturnType || (isReturnType && Size <= 2 * XLen)) { + if (Size <= XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), XLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + int ArgFPRsLeft = FLen ? 1 : 0; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true); +} + +namespace { +class CSKYTargetCodeGenInfo : public TargetCodeGenInfo { +public: + CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen) + : TargetCodeGenInfo(std::make_unique<CSKYABIInfo>(CGT, FLen)) {} +}; +} // end anonymous namespace + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createCSKYTargetCodeGenInfo(CodeGenModule &CGM, unsigned FLen) { + return std::make_unique<CSKYTargetCodeGenInfo>(CGM.getTypes(), FLen); +} diff --git a/clang/lib/CodeGen/Targets/Hexagon.cpp b/clang/lib/CodeGen/Targets/Hexagon.cpp new file mode 100644 index 000000000000..944a8d002ecf --- /dev/null +++ b/clang/lib/CodeGen/Targets/Hexagon.cpp @@ -0,0 +1,423 @@ +//===- Hexagon.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// Hexagon ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class HexagonABIInfo : public DefaultABIInfo { +public: + HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const; + + void computeInfo(CGFunctionInfo &FI) const override; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr, + QualType Ty) const; + Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr, + QualType Ty) const; + Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr, + QualType Ty) const; +}; + +class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { +public: + HexagonTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {} + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + return 29; + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &GCM) const override { + if (GV->isDeclaration()) + return; + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + } +}; + +} // namespace + +void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const { + unsigned RegsLeft = 6; + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, &RegsLeft); +} + +static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) { + assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits" + " through registers"); + + if (*RegsLeft == 0) + return false; + + if (Size <= 32) { + (*RegsLeft)--; + return true; + } + + if (2 <= (*RegsLeft & (~1U))) { + *RegsLeft = (*RegsLeft & (~1U)) - 2; + return true; + } + + // Next available register was r5 but candidate was greater than 32-bits so it + // has to go on the stack. However we still consume r5 + if (*RegsLeft == 1) + *RegsLeft = 0; + + return false; +} + +ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty, + unsigned *RegsLeft) const { + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) + HexagonAdjustRegsLeft(Size, RegsLeft); + + if (Size > 64 && Ty->isBitIntType()) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect(); + } + + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Ignore empty records. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + unsigned Align = getContext().getTypeAlign(Ty); + + if (Size > 64) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + if (HexagonAdjustRegsLeft(Size, RegsLeft)) + Align = Size <= 32 ? 32 : 64; + if (Size <= Align) { + // Pass in the smallest viable integer type. + Size = llvm::bit_ceil(Size); + return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); + } + return DefaultABIInfo::classifyArgumentType(Ty); +} + +ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + const TargetInfo &T = CGT.getTarget(); + uint64_t Size = getContext().getTypeSize(RetTy); + + if (RetTy->getAs<VectorType>()) { + // HVX vectors are returned in vector registers or register pairs. + if (T.hasFeature("hvx")) { + assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b")); + uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8; + if (Size == VecSize || Size == 2*VecSize) + return ABIArgInfo::getDirectInReg(); + } + // Large vector types should be returned via memory. + if (Size > 64) + return getNaturalAlignIndirect(RetTy); + } + + if (!isAggregateTypeForABI(RetTy)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (Size > 64 && RetTy->isBitIntType()) + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); + + return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect(); + } + + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Aggregates <= 8 bytes are returned in registers, other aggregates + // are returned indirectly. + if (Size <= 64) { + // Return in the smallest viable integer type. + Size = llvm::bit_ceil(Size); + return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); + } + return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); +} + +Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF, + Address VAListAddr, + QualType Ty) const { + // Load the overflow area pointer. + Address __overflow_area_pointer_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); + llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( + __overflow_area_pointer_p, "__overflow_area_pointer"); + + uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8; + if (Align > 4) { + // Alignment should be a power of 2. + assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!"); + + // overflow_arg_area = (overflow_arg_area + align - 1) & -align; + llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1); + + // Add offset to the current pointer to access the argument. + __overflow_area_pointer = + CGF.Builder.CreateGEP(CGF.Int8Ty, __overflow_area_pointer, Offset); + llvm::Value *AsInt = + CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); + + // Create a mask which should be "AND"ed + // with (overflow_arg_area + align - 1) + llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align); + __overflow_area_pointer = CGF.Builder.CreateIntToPtr( + CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(), + "__overflow_area_pointer.align"); + } + + // Get the type of the argument from memory and bitcast + // overflow area pointer to the argument type. + llvm::Type *PTy = CGF.ConvertTypeForMem(Ty); + Address AddrTyped = + Address(__overflow_area_pointer, PTy, CharUnits::fromQuantity(Align)); + + // Round up to the minimum stack alignment for varargs which is 4 bytes. + uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); + + __overflow_area_pointer = CGF.Builder.CreateGEP( + CGF.Int8Ty, __overflow_area_pointer, + llvm::ConstantInt::get(CGF.Int32Ty, Offset), + "__overflow_area_pointer.next"); + CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p); + + return AddrTyped; +} + +Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF, + Address VAListAddr, + QualType Ty) const { + // FIXME: Need to handle alignment + llvm::Type *BP = CGF.Int8PtrTy; + CGBuilderTy &Builder = CGF.Builder; + Address VAListAddrAsBPP = VAListAddr.withElementType(BP); + llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur"); + // Handle address alignment for type alignment > 32 bits + uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8; + if (TyAlign > 4) { + assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!"); + llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty); + AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1)); + AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1))); + Addr = Builder.CreateIntToPtr(AddrAsInt, BP); + } + Address AddrTyped = + Address(Addr, CGF.ConvertType(Ty), CharUnits::fromQuantity(TyAlign)); + + uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); + llvm::Value *NextAddr = Builder.CreateGEP( + CGF.Int8Ty, Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next"); + Builder.CreateStore(NextAddr, VAListAddrAsBPP); + + return AddrTyped; +} + +Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF, + Address VAListAddr, + QualType Ty) const { + int ArgSize = CGF.getContext().getTypeSize(Ty) / 8; + + if (ArgSize > 8) + return EmitVAArgFromMemory(CGF, VAListAddr, Ty); + + // Here we have check if the argument is in register area or + // in overflow area. + // If the saved register area pointer + argsize rounded up to alignment > + // saved register area end pointer, argument is in overflow area. + unsigned RegsLeft = 6; + Ty = CGF.getContext().getCanonicalType(Ty); + (void)classifyArgumentType(Ty, &RegsLeft); + + llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + + // Get rounded size of the argument.GCC does not allow vararg of + // size < 4 bytes. We follow the same logic here. + ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; + int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; + + // Argument may be in saved register area + CGF.EmitBlock(MaybeRegBlock); + + // Load the current saved register area pointer. + Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP( + VAListAddr, 0, "__current_saved_reg_area_pointer_p"); + llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad( + __current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer"); + + // Load the saved register area end pointer. + Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP( + VAListAddr, 1, "__saved_reg_area_end_pointer_p"); + llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad( + __saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer"); + + // If the size of argument is > 4 bytes, check if the stack + // location is aligned to 8 bytes + if (ArgAlign > 4) { + + llvm::Value *__current_saved_reg_area_pointer_int = + CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer, + CGF.Int32Ty); + + __current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd( + __current_saved_reg_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)), + "align_current_saved_reg_area_pointer"); + + __current_saved_reg_area_pointer_int = + CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), + "align_current_saved_reg_area_pointer"); + + __current_saved_reg_area_pointer = + CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int, + __current_saved_reg_area_pointer->getType(), + "align_current_saved_reg_area_pointer"); + } + + llvm::Value *__new_saved_reg_area_pointer = + CGF.Builder.CreateGEP(CGF.Int8Ty, __current_saved_reg_area_pointer, + llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), + "__new_saved_reg_area_pointer"); + + llvm::Value *UsingStack = nullptr; + UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer, + __saved_reg_area_end_pointer); + + CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock); + + // Argument in saved register area + // Implement the block where argument is in register saved area + CGF.EmitBlock(InRegBlock); + + llvm::Type *PTy = CGF.ConvertType(Ty); + llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast( + __current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy)); + + CGF.Builder.CreateStore(__new_saved_reg_area_pointer, + __current_saved_reg_area_pointer_p); + + CGF.EmitBranch(ContBlock); + + // Argument in overflow area + // Implement the block where the argument is in overflow area. + CGF.EmitBlock(OnStackBlock); + + // Load the overflow area pointer + Address __overflow_area_pointer_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); + llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( + __overflow_area_pointer_p, "__overflow_area_pointer"); + + // Align the overflow area pointer according to the alignment of the argument + if (ArgAlign > 4) { + llvm::Value *__overflow_area_pointer_int = + CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); + + __overflow_area_pointer_int = + CGF.Builder.CreateAdd(__overflow_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1), + "align_overflow_area_pointer"); + + __overflow_area_pointer_int = + CGF.Builder.CreateAnd(__overflow_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), + "align_overflow_area_pointer"); + + __overflow_area_pointer = CGF.Builder.CreateIntToPtr( + __overflow_area_pointer_int, __overflow_area_pointer->getType(), + "align_overflow_area_pointer"); + } + + // Get the pointer for next argument in overflow area and store it + // to overflow area pointer. + llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP( + CGF.Int8Ty, __overflow_area_pointer, + llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), + "__overflow_area_pointer.next"); + + CGF.Builder.CreateStore(__new_overflow_area_pointer, + __overflow_area_pointer_p); + + CGF.Builder.CreateStore(__new_overflow_area_pointer, + __current_saved_reg_area_pointer_p); + + // Bitcast the overflow area pointer to the type of argument. + llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty); + llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast( + __overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy)); + + CGF.EmitBranch(ContBlock); + + // Get the correct pointer to load the variable argument + // Implement the ContBlock + CGF.EmitBlock(ContBlock); + + llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty); + llvm::Type *MemPTy = llvm::PointerType::getUnqual(MemTy); + llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr"); + ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock); + ArgAddr->addIncoming(__overflow_area_p, OnStackBlock); + + return Address(ArgAddr, MemTy, CharUnits::fromQuantity(ArgAlign)); +} + +Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + + if (getTarget().getTriple().isMusl()) + return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty); + + return EmitVAArgForHexagon(CGF, VAListAddr, Ty); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createHexagonTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<HexagonTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/Lanai.cpp b/clang/lib/CodeGen/Targets/Lanai.cpp new file mode 100644 index 000000000000..2578fc0291e7 --- /dev/null +++ b/clang/lib/CodeGen/Targets/Lanai.cpp @@ -0,0 +1,154 @@ +//===- Lanai.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// Lanai ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class LanaiABIInfo : public DefaultABIInfo { + struct CCState { + unsigned FreeRegs; + }; + +public: + LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + bool shouldUseInReg(QualType Ty, CCState &State) const; + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State; + // Lanai uses 4 registers to pass arguments unless the function has the + // regparm attribute set. + if (FI.getHasRegParm()) { + State.FreeRegs = FI.getRegParm(); + } else { + State.FreeRegs = 4; + } + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, State); + } + + ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; +}; +} // end anonymous namespace + +bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const { + unsigned Size = getContext().getTypeSize(Ty); + unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U; + + if (SizeInRegs == 0) + return false; + + if (SizeInRegs > State.FreeRegs) { + State.FreeRegs = 0; + return false; + } + + State.FreeRegs -= SizeInRegs; + + return true; +} + +ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, + CCState &State) const { + if (!ByVal) { + if (State.FreeRegs) { + --State.FreeRegs; // Non-byval indirects just use one pointer. + return getNaturalAlignIndirectInReg(Ty); + } + return getNaturalAlignIndirect(Ty, false); + } + + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + /*Realign=*/TypeAlign > + MinABIStackAlignInBytes); +} + +ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, + CCState &State) const { + // Check with the C++ ABI first. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) { + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (RAA == CGCXXABI::RAA_DirectInMemory) { + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + } + } + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, /*ByVal=*/true, State); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; + if (SizeInRegs <= State.FreeRegs) { + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + State.FreeRegs -= SizeInRegs; + return ABIArgInfo::getDirectInReg(Result); + } else { + State.FreeRegs = 0; + } + return getIndirectResult(Ty, true, State); + } + + // Treat an enum type as its underlying type. + if (const auto *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + bool InReg = shouldUseInReg(Ty, State); + + // Don't pass >64 bit integers in registers. + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > 64) + return getIndirectResult(Ty, /*ByVal=*/true, State); + + if (isPromotableIntegerTypeForABI(Ty)) { + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getExtend(Ty); + } + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); +} + +namespace { +class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {} +}; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createLanaiTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<LanaiTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp new file mode 100644 index 000000000000..6391a8aeaa67 --- /dev/null +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -0,0 +1,449 @@ +//===- LoongArch.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +// LoongArch ABI Implementation. Documented at +// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html +// +//===----------------------------------------------------------------------===// + +namespace { +class LoongArchABIInfo : public DefaultABIInfo { +private: + // Size of the integer ('r') registers in bits. + unsigned GRLen; + // Size of the floating point ('f') registers in bits. + unsigned FRLen; + // Number of general-purpose argument registers. + static const int NumGARs = 8; + // Number of floating-point argument registers. + static const int NumFARs = 8; + bool detectFARsEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; + +public: + LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) + : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} + + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &GARsLeft, + int &FARsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; + + bool detectFARsEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFARsEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; +}; +} // end anonymous namespace + +void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect, or if the type size is a scalar greater than 2*GRLen + // and not a complex type with elements <= FRLen. e.g. fp128 is passed direct + // in LLVM IR, relying on the backend lowering code to rewrite the argument + // list and pass indirectly on LA32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + if (!IsRetIndirect && RetTy->isScalarType() && + getContext().getTypeSize(RetTy) > (2 * GRLen)) { + if (RetTy->isComplexType() && FRLen) { + QualType EltTy = RetTy->castAs<ComplexType>()->getElementType(); + IsRetIndirect = getContext().getTypeSize(EltTy) > FRLen; + } else { + // This is a normal scalar > 2*GRLen, such as fp128 on LA32. + IsRetIndirect = true; + } + } + + // We must track the number of GARs and FARs used in order to conform to the + // LoongArch ABI. As GAR usage is different for variadic arguments, we must + // also track whether we are examining a vararg or not. + int GARsLeft = IsRetIndirect ? NumGARs - 1 : NumGARs; + int FARsLeft = FRLen ? NumFARs : 0; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + ArgInfo.info = classifyArgumentType( + ArgInfo.type, /*IsFixed=*/ArgNum < NumFixedArgs, GARsLeft, FARsLeft); + ArgNum++; + } +} + +// Returns true if the struct is a potential candidate to be passed in FARs (and +// GARs). If this function returns true, the caller is responsible for checking +// that if there is only a single field then that field is a float. +bool LoongArchABIInfo::detectFARsEligibleStructHelper( + QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, + llvm::Type *&Field2Ty, CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > GRLen) + return false; + // Can't be eligible if larger than the FP registers. Half precision isn't + // currently supported on LoongArch and the ABI hasn't been confirmed, so + // default to the integer ABI in that case. + if (IsFloat && (Size > FRLen || Size < 32)) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs<ComplexType>()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FRLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, + Field2Ty, Field2Off)) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs<RecordType>()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + if (isEmptyRecord(getContext(), Ty, true)) + return true; + const RecordDecl *RD = RTy->getDecl(); + // Unions aren't eligible unless they're empty (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl()); + if (!detectFARsEligibleStructHelper( + B.getType(), CurOff + Layout.getBaseClassOffset(BDecl), + Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + } + } + for (const FieldDecl *FD : RD->fields()) { + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Zero-width bitfields are ignored. + if (BitWidth == 0) + continue; + // Allow a bitfield with a type greater than GRLen as long as the + // bitwidth is GRLen or less. + if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) { + QTy = getContext().getIntTypeForBitwidth(GRLen, false); + } + } + + if (!detectFARsEligibleStructHelper( + QTy, + CurOff + getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FD->getFieldIndex())), + Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible to be passed in FARs (and GARs) (i.e., when +// flattened it contains a single fp value, fp+fp, or int+fp of appropriate +// size). If so, NeededFARs and NeededGARs are incremented appropriately. +bool LoongArchABIInfo::detectFARsEligibleStruct( + QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, + llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededGARs, + int &NeededFARs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededGARs = 0; + NeededFARs = 0; + if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, + Field1Off, Field2Ty, Field2Off)) + return false; + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededFARs++; + else if (Field1Ty) + NeededGARs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededFARs++; + else if (Field2Ty) + NeededGARs++; + return true; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo LoongArchABIInfo::coerceAndExpandFARsEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector<llvm::Type *, 3> CoerceElts; + SmallVector<llvm::Type *, 2> UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty)); + CharUnits Field1End = + Field1Off + + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1End) + Padding = Field2Off - Field1End; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked), + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked)); +} + +ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &GARsLeft, + int &FARsLeft) const { + assert(GARsLeft <= NumGARs && "GAR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (GARsLeft) + GARsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + + // Pass floating point values via FARs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && FARsLeft) { + FARsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FRLen) { + FARsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (IsFixed && FRLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededGARs = 0; + int NeededFARs = 0; + bool IsCandidate = detectFARsEligibleStruct( + Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededGARs, NeededFARs); + if (IsCandidate && NeededGARs <= GARsLeft && NeededFARs <= FARsLeft) { + GARsLeft -= NeededGARs; + FARsLeft -= NeededFARs; + return coerceAndExpandFARsEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } + + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + // Determine the number of GARs needed to pass the current argument + // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededGARs = 1; + if (!IsFixed && NeededAlign == 2 * GRLen) + NeededGARs = 2 + (GARsLeft % 2); + else if (Size > GRLen && Size <= 2 * GRLen) + NeededGARs = 2; + + if (NeededGARs > GARsLeft) + NeededGARs = GARsLeft; + + GARsLeft -= NeededGARs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to GRLen width. + if (Size < GRLen && Ty->isIntegralOrEnumerationType()) + return extendType(Ty); + + if (const auto *EIT = Ty->getAs<BitIntType>()) { + if (EIT->getNumBits() < GRLen) + return extendType(Ty); + if (EIT->getNumBits() > 128 || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } + + return ABIArgInfo::getDirect(); + } + + // Aggregates which are <= 2*GRLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * GRLen) { + // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is + // required, and a 2-element GRLen array if only GRLen alignment is + // required. + if (Size <= GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), GRLen)); + } + if (getContext().getTypeAlign(Ty) == 2 * GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * GRLen)); + } + return ABIArgInfo::getDirect( + llvm::ArrayType::get(llvm::IntegerType::get(getVMContext(), GRLen), 2)); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + int GARsLeft = 2; + int FARsLeft = FRLen ? 2 : 0; + return classifyArgumentType(RetTy, /*IsFixed=*/true, GARsLeft, FARsLeft); +} + +Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) + return Address(CGF.Builder.CreateLoad(VAListAddr), + CGF.ConvertTypeForMem(Ty), SlotSize); + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*GRLen bytes are passed indirectly. + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, + /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo, + SlotSize, + /*AllowHigherAlign=*/true); +} + +ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // LA64 ABI requires unsigned 32 bit integers to be sign extended. + if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +} + +namespace { +class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, + unsigned FRLen) + : TargetCodeGenInfo( + std::make_unique<LoongArchABIInfo>(CGT, GRLen, FRLen)) {} +}; +} // namespace + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createLoongArchTargetCodeGenInfo(CodeGenModule &CGM, unsigned GRLen, + unsigned FLen) { + return std::make_unique<LoongArchTargetCodeGenInfo>(CGM.getTypes(), GRLen, + FLen); +} diff --git a/clang/lib/CodeGen/Targets/M68k.cpp b/clang/lib/CodeGen/Targets/M68k.cpp new file mode 100644 index 000000000000..120022105f34 --- /dev/null +++ b/clang/lib/CodeGen/Targets/M68k.cpp @@ -0,0 +1,55 @@ +//===- M68k.cpp -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// M68k ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class M68kTargetCodeGenInfo : public TargetCodeGenInfo { +public: + M68kTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; +}; + +} // namespace + +void M68kTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (const auto *attr = FD->getAttr<M68kInterruptAttr>()) { + // Handle 'interrupt' attribute: + llvm::Function *F = cast<llvm::Function>(GV); + + // Step 1: Set ISR calling convention. + F->setCallingConv(llvm::CallingConv::M68k_INTR); + + // Step 2: Add attributes goodness. + F->addFnAttr(llvm::Attribute::NoInline); + + // Step 3: Emit ISR vector alias. + unsigned Num = attr->getNumber() / 2; + llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, + "__isr_" + Twine(Num), F); + } + } +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createM68kTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<M68kTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/MSP430.cpp b/clang/lib/CodeGen/Targets/MSP430.cpp new file mode 100644 index 000000000000..bb67d97f4421 --- /dev/null +++ b/clang/lib/CodeGen/Targets/MSP430.cpp @@ -0,0 +1,94 @@ +//===- MSP430.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// MSP430 ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class MSP430ABIInfo : public DefaultABIInfo { + static ABIArgInfo complexArgInfo() { + ABIArgInfo Info = ABIArgInfo::getDirect(); + Info.setCanBeFlattened(false); + return Info; + } + +public: + MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return complexArgInfo(); + + return DefaultABIInfo::classifyReturnType(RetTy); + } + + ABIArgInfo classifyArgumentType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return complexArgInfo(); + + return DefaultABIInfo::classifyArgumentType(RetTy); + } + + // Just copy the original implementations because + // DefaultABIInfo::classify{Return,Argument}Type() are not virtual + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override { + return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); + } +}; + +class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { +public: + MSP430TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {} + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; +}; + +} + +void MSP430TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>(); + if (!InterruptAttr) + return; + + // Handle 'interrupt' attribute: + llvm::Function *F = cast<llvm::Function>(GV); + + // Step 1: Set ISR calling convention. + F->setCallingConv(llvm::CallingConv::MSP430_INTR); + + // Step 2: Add attributes goodness. + F->addFnAttr(llvm::Attribute::NoInline); + F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); + } +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createMSP430TargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<MSP430TargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/Mips.cpp b/clang/lib/CodeGen/Targets/Mips.cpp new file mode 100644 index 000000000000..8f11c63dcd85 --- /dev/null +++ b/clang/lib/CodeGen/Targets/Mips.cpp @@ -0,0 +1,441 @@ +//===- Mips.cpp -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// MIPS ABI Implementation. This works for both little-endian and +// big-endian variants. +//===----------------------------------------------------------------------===// + +namespace { +class MipsABIInfo : public ABIInfo { + bool IsO32; + const unsigned MinABIStackAlignInBytes, StackAlignInBytes; + void CoerceToIntArgs(uint64_t TySize, + SmallVectorImpl<llvm::Type *> &ArgList) const; + llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; + llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const; + llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; +public: + MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) : + ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8), + StackAlignInBytes(IsO32 ? 8 : 16) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const; + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + ABIArgInfo extendType(QualType Ty) const; +}; + +class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { + unsigned SizeOfUnwindException; +public: + MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) + : TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)), + SizeOfUnwindException(IsO32 ? 24 : 32) {} + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { + return 29; + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + llvm::Function *Fn = cast<llvm::Function>(GV); + + if (FD->hasAttr<MipsLongCallAttr>()) + Fn->addFnAttr("long-call"); + else if (FD->hasAttr<MipsShortCallAttr>()) + Fn->addFnAttr("short-call"); + + // Other attributes do not have a meaning for declarations. + if (GV->isDeclaration()) + return; + + if (FD->hasAttr<Mips16Attr>()) { + Fn->addFnAttr("mips16"); + } + else if (FD->hasAttr<NoMips16Attr>()) { + Fn->addFnAttr("nomips16"); + } + + if (FD->hasAttr<MicroMipsAttr>()) + Fn->addFnAttr("micromips"); + else if (FD->hasAttr<NoMicroMipsAttr>()) + Fn->addFnAttr("nomicromips"); + + const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>(); + if (!Attr) + return; + + const char *Kind; + switch (Attr->getInterrupt()) { + case MipsInterruptAttr::eic: Kind = "eic"; break; + case MipsInterruptAttr::sw0: Kind = "sw0"; break; + case MipsInterruptAttr::sw1: Kind = "sw1"; break; + case MipsInterruptAttr::hw0: Kind = "hw0"; break; + case MipsInterruptAttr::hw1: Kind = "hw1"; break; + case MipsInterruptAttr::hw2: Kind = "hw2"; break; + case MipsInterruptAttr::hw3: Kind = "hw3"; break; + case MipsInterruptAttr::hw4: Kind = "hw4"; break; + case MipsInterruptAttr::hw5: Kind = "hw5"; break; + } + + Fn->addFnAttr("interrupt", Kind); + + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; + + unsigned getSizeOfUnwindException() const override { + return SizeOfUnwindException; + } +}; +} + +void MipsABIInfo::CoerceToIntArgs( + uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const { + llvm::IntegerType *IntTy = + llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); + + // Add (TySize / MinABIStackAlignInBytes) args of IntTy. + for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) + ArgList.push_back(IntTy); + + // If necessary, add one more integer type to ArgList. + unsigned R = TySize % (MinABIStackAlignInBytes * 8); + + if (R) + ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); +} + +// In N32/64, an aligned double precision floating point field is passed in +// a register. +llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { + SmallVector<llvm::Type*, 8> ArgList, IntArgList; + + if (IsO32) { + CoerceToIntArgs(TySize, ArgList); + return llvm::StructType::get(getVMContext(), ArgList); + } + + if (Ty->isComplexType()) + return CGT.ConvertType(Ty); + + const RecordType *RT = Ty->getAs<RecordType>(); + + // Unions/vectors are passed in integer registers. + if (!RT || !RT->isStructureOrClassType()) { + CoerceToIntArgs(TySize, ArgList); + return llvm::StructType::get(getVMContext(), ArgList); + } + + const RecordDecl *RD = RT->getDecl(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + assert(!(TySize % 8) && "Size of structure must be multiple of 8."); + + uint64_t LastOffset = 0; + unsigned idx = 0; + llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); + + // Iterate over fields in the struct/class and check if there are any aligned + // double fields. + for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); + i != e; ++i, ++idx) { + const QualType Ty = i->getType(); + const BuiltinType *BT = Ty->getAs<BuiltinType>(); + + if (!BT || BT->getKind() != BuiltinType::Double) + continue; + + uint64_t Offset = Layout.getFieldOffset(idx); + if (Offset % 64) // Ignore doubles that are not aligned. + continue; + + // Add ((Offset - LastOffset) / 64) args of type i64. + for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) + ArgList.push_back(I64); + + // Add double type. + ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); + LastOffset = Offset + 64; + } + + CoerceToIntArgs(TySize - LastOffset, IntArgList); + ArgList.append(IntArgList.begin(), IntArgList.end()); + + return llvm::StructType::get(getVMContext(), ArgList); +} + +llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset, + uint64_t Offset) const { + if (OrigOffset + MinABIStackAlignInBytes > Offset) + return nullptr; + + return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); +} + +ABIArgInfo +MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + uint64_t OrigOffset = Offset; + uint64_t TySize = getContext().getTypeSize(Ty); + uint64_t Align = getContext().getTypeAlign(Ty) / 8; + + Align = std::clamp(Align, (uint64_t)MinABIStackAlignInBytes, + (uint64_t)StackAlignInBytes); + unsigned CurrOffset = llvm::alignTo(Offset, Align); + Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; + + if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { + // Ignore empty aggregates. + if (TySize == 0) + return ABIArgInfo::getIgnore(); + + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + Offset = OrigOffset + MinABIStackAlignInBytes; + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + } + + // If we have reached here, aggregates are passed directly by coercing to + // another structure type. Padding is inserted if the offset of the + // aggregate is unaligned. + ABIArgInfo ArgInfo = + ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, + getPaddingType(OrigOffset, CurrOffset)); + ArgInfo.setInReg(true); + return ArgInfo; + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Make sure we pass indirectly things that are too large. + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > 128 || + (EIT->getNumBits() > 64 && + !getContext().getTargetInfo().hasInt128Type())) + return getNaturalAlignIndirect(Ty); + + // All integral types are promoted to the GPR width. + if (Ty->isIntegralOrEnumerationType()) + return extendType(Ty); + + return ABIArgInfo::getDirect( + nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); +} + +llvm::Type* +MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { + const RecordType *RT = RetTy->getAs<RecordType>(); + SmallVector<llvm::Type*, 8> RTList; + + if (RT && RT->isStructureOrClassType()) { + const RecordDecl *RD = RT->getDecl(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + unsigned FieldCnt = Layout.getFieldCount(); + + // N32/64 returns struct/classes in floating point registers if the + // following conditions are met: + // 1. The size of the struct/class is no larger than 128-bit. + // 2. The struct/class has one or two fields all of which are floating + // point types. + // 3. The offset of the first field is zero (this follows what gcc does). + // + // Any other composite results are returned in integer registers. + // + if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { + RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); + for (; b != e; ++b) { + const BuiltinType *BT = b->getType()->getAs<BuiltinType>(); + + if (!BT || !BT->isFloatingPoint()) + break; + + RTList.push_back(CGT.ConvertType(b->getType())); + } + + if (b == e) + return llvm::StructType::get(getVMContext(), RTList, + RD->hasAttr<PackedAttr>()); + + RTList.clear(); + } + } + + CoerceToIntArgs(Size, RTList); + return llvm::StructType::get(getVMContext(), RTList); +} + +ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { + uint64_t Size = getContext().getTypeSize(RetTy); + + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + // O32 doesn't treat zero-sized structs differently from other structs. + // However, N32/N64 ignores zero sized return values. + if (!IsO32 && Size == 0) + return ABIArgInfo::getIgnore(); + + if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { + if (Size <= 128) { + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirect(); + + // O32 returns integer vectors in registers and N32/N64 returns all small + // aggregates in registers. + if (!IsO32 || + (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { + ABIArgInfo ArgInfo = + ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); + ArgInfo.setInReg(true); + return ArgInfo; + } + } + + return getNaturalAlignIndirect(RetTy); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + // Make sure we pass indirectly things that are too large. + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > 128 || + (EIT->getNumBits() > 64 && + !getContext().getTargetInfo().hasInt128Type())) + return getNaturalAlignIndirect(RetTy); + + if (isPromotableIntegerTypeForABI(RetTy)) + return ABIArgInfo::getExtend(RetTy); + + if ((RetTy->isUnsignedIntegerOrEnumerationType() || + RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32) + return ABIArgInfo::getSignExtend(RetTy); + + return ABIArgInfo::getDirect(); +} + +void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { + ABIArgInfo &RetInfo = FI.getReturnInfo(); + if (!getCXXABI().classifyReturnType(FI)) + RetInfo = classifyReturnType(FI.getReturnType()); + + // Check if a pointer to an aggregate is passed as a hidden argument. + uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; + + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, Offset); +} + +Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType OrigTy) const { + QualType Ty = OrigTy; + + // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64. + // Pointers are also promoted in the same way but this only matters for N32. + unsigned SlotSizeInBits = IsO32 ? 32 : 64; + unsigned PtrWidth = getTarget().getPointerWidth(LangAS::Default); + bool DidPromote = false; + if ((Ty->isIntegerType() && + getContext().getIntWidth(Ty) < SlotSizeInBits) || + (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) { + DidPromote = true; + Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits, + Ty->isSignedIntegerType()); + } + + auto TyInfo = getContext().getTypeInfoInChars(Ty); + + // The alignment of things in the argument area is never larger than + // StackAlignInBytes. + TyInfo.Align = + std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes)); + + // MinABIStackAlignInBytes is the size of argument slots on the stack. + CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); + + Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true); + + + // If there was a promotion, "unpromote" into a temporary. + // TODO: can we just use a pointer into a subset of the original slot? + if (DidPromote) { + Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); + llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); + + // Truncate down to the right width. + llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType() + : CGF.IntPtrTy); + llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); + if (OrigTy->isPointerType()) + V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); + + CGF.Builder.CreateStore(V, Temp); + Addr = Temp; + } + + return Addr; +} + +ABIArgInfo MipsABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + + // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. + if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + + return ABIArgInfo::getExtend(Ty); +} + +bool +MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + // This information comes from gcc's implementation, which seems to + // as canonical as it gets. + + // Everything on MIPS is 4 bytes. Double-precision FP registers + // are aliased to pairs of single-precision FP registers. + llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); + + // 0-31 are the general purpose registers, $0 - $31. + // 32-63 are the floating-point registers, $f0 - $f31. + // 64 and 65 are the multiply/divide registers, $hi and $lo. + // 66 is the (notional, I think) register for signal-handler return. + AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); + + // 67-74 are the floating-point status registers, $fcc0 - $fcc7. + // They are one bit wide and ignored here. + + // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. + // (coprocessor 1 is the FP unit) + // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. + // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. + // 176-181 are the DSP accumulator registers. + AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); + return false; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createMIPSTargetCodeGenInfo(CodeGenModule &CGM, bool IsOS32) { + return std::make_unique<MIPSTargetCodeGenInfo>(CGM.getTypes(), IsOS32); +} diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp new file mode 100644 index 000000000000..1ca0192333a0 --- /dev/null +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -0,0 +1,309 @@ +//===- NVPTX.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" +#include "llvm/IR/IntrinsicsNVPTX.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// NVPTX ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class NVPTXTargetCodeGenInfo; + +class NVPTXABIInfo : public ABIInfo { + NVPTXTargetCodeGenInfo &CGInfo; + +public: + NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) + : ABIInfo(CGT), CGInfo(Info) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + bool isUnsupportedType(QualType T) const; + ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; +}; + +class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { +public: + NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; + bool shouldEmitStaticExternCAliases() const override; + + llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { + // On the device side, surface reference is represented as an object handle + // in 64-bit integer. + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { + // On the device side, texture reference is represented as an object handle + // in 64-bit integer. + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + + bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + +private: + // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the + // resulting MDNode to the nvvm.annotations MDNode. + static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, + int Operand); + + static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) { + llvm::Value *Handle = nullptr; + llvm::Constant *C = + llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer()); + // Lookup `addrspacecast` through the constant pointer if any. + if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) + C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); + if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { + // Load the handle from the specific global variable using + // `nvvm.texsurf.handle.internal` intrinsic. + Handle = CGF.EmitRuntimeCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, + {GV->getType()}), + {GV}, "texsurf_handle"); + } else + Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); + CGF.EmitStoreOfScalar(Handle, Dst); + } +}; + +/// Checks if the type is unsupported directly by the current target. +bool NVPTXABIInfo::isUnsupportedType(QualType T) const { + ASTContext &Context = getContext(); + if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) + return true; + if (!Context.getTargetInfo().hasFloat128Type() && + (T->isFloat128Type() || + (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) + return true; + if (const auto *EIT = T->getAs<BitIntType>()) + return EIT->getNumBits() > + (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); + if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && + Context.getTypeSize(T) > 64U) + return true; + if (const auto *AT = T->getAsArrayTypeUnsafe()) + return isUnsupportedType(AT->getElementType()); + const auto *RT = T->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + for (const CXXBaseSpecifier &I : CXXRD->bases()) + if (isUnsupportedType(I.getType())) + return true; + + for (const FieldDecl *I : RD->fields()) + if (isUnsupportedType(I->getType())) + return true; + return false; +} + +/// Coerce the given type into an array with maximum allowed size of elements. +ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, + unsigned MaxSize) const { + // Alignment and Size are measured in bits. + const uint64_t Size = getContext().getTypeSize(Ty); + const uint64_t Alignment = getContext().getTypeAlign(Ty); + const unsigned Div = std::min<unsigned>(MaxSize, Alignment); + llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); + const uint64_t NumElements = (Size + Div - 1) / Div; + return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); +} + +ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (getContext().getLangOpts().OpenMP && + getContext().getLangOpts().OpenMPIsTargetDevice && + isUnsupportedType(RetTy)) + return coerceToIntArrayWithLimit(RetTy, 64); + + // note: this is different from default ABI + if (!RetTy->isScalarType()) + return ABIArgInfo::getDirect(); + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Return aggregates type as indirect by value + if (isAggregateTypeForABI(Ty)) { + // Under CUDA device compilation, tex/surf builtin types are replaced with + // object types and passed directly. + if (getContext().getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) + return ABIArgInfo::getDirect( + CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); + if (Ty->isCUDADeviceBuiltinTextureType()) + return ABIArgInfo::getDirect( + CGInfo.getCUDADeviceBuiltinTextureDeviceType()); + } + return getNaturalAlignIndirect(Ty, /* byval */ true); + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) { + if ((EIT->getNumBits() > 128) || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /* byval */ true); + } + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); +} + +void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + + // Always honor user-specified calling convention. + if (FI.getCallingConvention() != llvm::CallingConv::C) + return; + + FI.setEffectiveCallingConvention(getRuntimeCC()); +} + +Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + llvm_unreachable("NVPTX does not support varargs"); +} + +void NVPTXTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; + const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); + if (VD) { + if (M.getLangOpts().CUDA) { + if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) + addNVVMMetadata(GV, "surface", 1); + else if (VD->getType()->isCUDADeviceBuiltinTextureType()) + addNVVMMetadata(GV, "texture", 1); + return; + } + } + + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + + llvm::Function *F = cast<llvm::Function>(GV); + + // Perform special handling in OpenCL mode + if (M.getLangOpts().OpenCL) { + // Use OpenCL function attributes to check for kernel functions + // By default, all functions are device functions + if (FD->hasAttr<OpenCLKernelAttr>()) { + // OpenCL __kernel functions get kernel metadata + // Create !{<func-ref>, metadata !"kernel", i32 1} node + addNVVMMetadata(F, "kernel", 1); + // And kernel functions are not subject to inlining + F->addFnAttr(llvm::Attribute::NoInline); + } + } + + // Perform special handling in CUDA mode. + if (M.getLangOpts().CUDA) { + // CUDA __global__ functions get a kernel metadata entry. Since + // __global__ functions cannot be called from the device, we do not + // need to set the noinline attribute. + if (FD->hasAttr<CUDAGlobalAttr>()) { + // Create !{<func-ref>, metadata !"kernel", i32 1} node + addNVVMMetadata(F, "kernel", 1); + } + if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) { + // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node + llvm::APSInt MaxThreads(32); + MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); + if (MaxThreads > 0) + addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); + + // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was + // not specified in __launch_bounds__ or if the user specified a 0 value, + // we don't have to add a PTX directive. + if (Attr->getMinBlocks()) { + llvm::APSInt MinBlocks(32); + MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); + if (MinBlocks > 0) + // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node + addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); + } + } + } + + // Attach kernel metadata directly if compiling for NVPTX. + if (FD->hasAttr<NVPTXKernelAttr>()) { + addNVVMMetadata(F, "kernel", 1); + } +} + +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { + llvm::Module *M = GV->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + llvm::Metadata *MDVals[] = { + llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), + llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + +bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/PNaCl.cpp b/clang/lib/CodeGen/Targets/PNaCl.cpp new file mode 100644 index 000000000000..771aa7469da2 --- /dev/null +++ b/clang/lib/CodeGen/Targets/PNaCl.cpp @@ -0,0 +1,109 @@ +//===- PNaCl.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// le32/PNaCl bitcode ABI Implementation +// +// This is a simplified version of the x86_32 ABI. Arguments and return values +// are always passed on the stack. +//===----------------------------------------------------------------------===// + +class PNaClABIInfo : public ABIInfo { + public: + PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; + + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, + Address VAListAddr, QualType Ty) const override; +}; + +class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { + public: + PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {} +}; + +void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); +} + +Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + // The PNaCL ABI is a bit odd, in that varargs don't use normal + // function classification. Structs get passed directly for varargs + // functions, through a rewriting transform in + // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows + // this target to actually support a va_arg instructions with an + // aggregate type, unlike other targets. + return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); +} + +/// Classify argument of given type \p Ty. +ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { + if (isAggregateTypeForABI(Ty)) { + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + return getNaturalAlignIndirect(Ty); + } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) { + // Treat an enum type as its underlying type. + Ty = EnumTy->getDecl()->getIntegerType(); + } else if (Ty->isFloatingType()) { + // Floating-point types don't go inreg. + return ABIArgInfo::getDirect(); + } else if (const auto *EIT = Ty->getAs<BitIntType>()) { + // Treat bit-precise integers as integers if <= 64, otherwise pass + // indirectly. + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(Ty); + return ABIArgInfo::getDirect(); + } + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + // In the PNaCl ABI we always return records/structures on the stack. + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + // Treat bit-precise integers as integers if <= 64, otherwise pass indirectly. + if (const auto *EIT = RetTy->getAs<BitIntType>()) { + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(RetTy); + return ABIArgInfo::getDirect(); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createPNaClTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<PNaClTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp new file mode 100644 index 000000000000..9cdd2aa07791 --- /dev/null +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -0,0 +1,972 @@ +//===- PPC.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty, CharUnits SlotSize, + CharUnits EltSize, const ComplexType *CTy) { + Address Addr = + emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2, + SlotSize, SlotSize, /*AllowHigher*/ true); + + Address RealAddr = Addr; + Address ImagAddr = RealAddr; + if (CGF.CGM.getDataLayout().isBigEndian()) { + RealAddr = + CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize); + ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, + 2 * SlotSize - EltSize); + } else { + ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); + } + + llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); + RealAddr = RealAddr.withElementType(EltTy); + ImagAddr = ImagAddr.withElementType(EltTy); + llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); + llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); + + Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); + CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), + /*init*/ true); + return Temp; +} + +static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address, bool Is64Bit, + bool IsAIX) { + // This is calculated from the LLVM and GCC tables and verified + // against gcc output. AFAIK all PPC ABIs use the same encoding. + + CodeGen::CGBuilderTy &Builder = CGF.Builder; + + llvm::IntegerType *i8 = CGF.Int8Ty; + llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); + llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); + llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); + + // 0-31: r0-31, the 4-byte or 8-byte general-purpose registers + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31); + + // 32-63: fp0-31, the 8-byte floating-point registers + AssignToArrayRange(Builder, Address, Eight8, 32, 63); + + // 64-67 are various 4-byte or 8-byte special-purpose registers: + // 64: mq + // 65: lr + // 66: ctr + // 67: ap + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67); + + // 68-76 are various 4-byte special-purpose registers: + // 68-75 cr0-7 + // 76: xer + AssignToArrayRange(Builder, Address, Four8, 68, 76); + + // 77-108: v0-31, the 16-byte vector registers + AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); + + // 109: vrsave + // 110: vscr + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110); + + // AIX does not utilize the rest of the registers. + if (IsAIX) + return false; + + // 111: spe_acc + // 112: spefscr + // 113: sfp + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113); + + if (!Is64Bit) + return false; + + // TODO: Need to verify if these registers are used on 64 bit AIX with Power8 + // or above CPU. + // 64-bit only registers: + // 114: tfhar + // 115: tfiar + // 116: texasr + AssignToArrayRange(Builder, Address, Eight8, 114, 116); + + return false; +} + +// AIX +namespace { +/// AIXABIInfo - The AIX XCOFF ABI information. +class AIXABIInfo : public ABIInfo { + const bool Is64Bit; + const unsigned PtrByteSize; + CharUnits getParamTypeAlignment(QualType Ty) const; + +public: + AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) + : ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {} + + bool isPromotableTypeForABI(QualType Ty) const; + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class AIXTargetCodeGenInfo : public TargetCodeGenInfo { + const bool Is64Bit; + +public: + AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) + : TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)), + Is64Bit(Is64Bit) {} + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + return 1; // r1 is the dedicated stack pointer + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; +}; +} // namespace + +// Return true if the ABI requires Ty to be passed sign- or zero- +// extended to 32/64 bits. +bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Promotable integer types are required to be promoted by the ABI. + if (getContext().isPromotableIntegerType(Ty)) + return true; + + if (!Is64Bit) + return false; + + // For 64 bit mode, in addition to the usual promotable integer types, we also + // need to extend all 32-bit types, since the ABI requires promotion to 64 + // bits. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Int: + case BuiltinType::UInt: + return true; + default: + break; + } + + return false; +} + +ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirect(); + + if (RetTy->isVectorType()) + return ABIArgInfo::getDirect(); + + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (Ty->isAnyComplexType()) + return ABIArgInfo::getDirect(); + + if (Ty->isVectorType()) + return ABIArgInfo::getDirect(); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + CharUnits CCAlign = getParamTypeAlignment(Ty); + CharUnits TyAlign = getContext().getTypeAlignInChars(Ty); + + return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true, + /*Realign*/ TyAlign > CCAlign); + } + + return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); +} + +CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + if (Ty->isVectorType()) + return CharUnits::fromQuantity(16); + + // If the structure contains a vector type, the alignment is 16. + if (isRecordWithSIMDVectorType(getContext(), Ty)) + return CharUnits::fromQuantity(16); + + return CharUnits::fromQuantity(PtrByteSize); +} + +Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + + auto TypeInfo = getContext().getTypeInfoInChars(Ty); + TypeInfo.Align = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize); + + // If we have a complex type and the base type is smaller than the register + // size, the ABI calls for the real and imaginary parts to be right-adjusted + // in separate words in 32bit mode or doublewords in 64bit mode. However, + // Clang expects us to produce a pointer to a structure with the two parts + // packed tightly. So generate loads of the real and imaginary parts relative + // to the va_list pointer, and store them to a temporary structure. We do the + // same as the PPC64ABI here. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { + CharUnits EltSize = TypeInfo.Width / 2; + if (EltSize < SlotSize) + return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); + } + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, + SlotSize, /*AllowHigher*/ true); +} + +bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable( + CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { + return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true); +} + +// PowerPC-32 +namespace { +/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. +class PPC32_SVR4_ABIInfo : public DefaultABIInfo { + bool IsSoftFloatABI; + bool IsRetSmallStructInRegABI; + + CharUnits getParamTypeAlignment(QualType Ty) const; + +public: + PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { +public: + PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>( + CGT, SoftFloatABI, RetSmallStructInRegABI)) {} + + static bool isStructReturnInRegABI(const llvm::Triple &Triple, + const CodeGenOptions &Opts); + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + // This is recovered from gcc output. + return 1; // r1 is the dedicated stack pointer + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; +}; +} + +CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + if (Ty->isVectorType()) + return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 + : 4); + + // For single-element float/vector structs, we consider the whole type + // to have the same alignment requirements as its single element. + const Type *AlignTy = nullptr; + if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { + const BuiltinType *BT = EltType->getAs<BuiltinType>(); + if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || + (BT && BT->isFloatingPoint())) + AlignTy = EltType; + } + + if (AlignTy) + return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); + return CharUnits::fromQuantity(4); +} + +ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { + uint64_t Size; + + // -msvr4-struct-return puts small aggregates in GPR3 and GPR4. + if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI && + (Size = getContext().getTypeSize(RetTy)) <= 64) { + // System V ABI (1995), page 3-22, specified: + // > A structure or union whose size is less than or equal to 8 bytes + // > shall be returned in r3 and r4, as if it were first stored in the + // > 8-byte aligned memory area and then the low addressed word were + // > loaded into r3 and the high-addressed word into r4. Bits beyond + // > the last member of the structure or union are not defined. + // + // GCC for big-endian PPC32 inserts the pad before the first member, + // not "beyond the last member" of the struct. To stay compatible + // with GCC, we coerce the struct to an integer of the same size. + // LLVM will extend it and return i32 in r3, or i64 in r3:r4. + if (Size == 0) + return ABIArgInfo::getIgnore(); + else { + llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + return DefaultABIInfo::classifyReturnType(RetTy); +} + +// TODO: this implementation is now likely redundant with +// DefaultABIInfo::EmitVAArg. +Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, + QualType Ty) const { + if (getTarget().getTriple().isOSDarwin()) { + auto TI = getContext().getTypeInfoInChars(Ty); + TI.Align = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(4); + return emitVoidPtrVAArg(CGF, VAList, Ty, + classifyArgumentType(Ty).isIndirect(), TI, SlotSize, + /*AllowHigherAlign=*/true); + } + + const unsigned OverflowLimit = 8; + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { + // TODO: Implement this. For now ignore. + (void)CTy; + return Address::invalid(); // FIXME? + } + + // struct __va_list_tag { + // unsigned char gpr; + // unsigned char fpr; + // unsigned short reserved; + // void *overflow_arg_area; + // void *reg_save_area; + // }; + + bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; + bool isInt = !Ty->isFloatingType(); + bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; + + // All aggregates are passed indirectly? That doesn't seem consistent + // with the argument-lowering code. + bool isIndirect = isAggregateTypeForABI(Ty); + + CGBuilderTy &Builder = CGF.Builder; + + // The calling convention either uses 1-2 GPRs or 1 FPR. + Address NumRegsAddr = Address::invalid(); + if (isInt || IsSoftFloatABI) { + NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr"); + } else { + NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr"); + } + + llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); + + // "Align" the register count when TY is i64. + if (isI64 || (isF64 && IsSoftFloatABI)) { + NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1)); + NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U)); + } + + llvm::Value *CC = + Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond"); + + llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs"); + llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow"); + llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); + + Builder.CreateCondBr(CC, UsingRegs, UsingOverflow); + + llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy; + if (isIndirect) + DirectTy = llvm::PointerType::getUnqual(CGF.getLLVMContext()); + + // Case 1: consume registers. + Address RegAddr = Address::invalid(); + { + CGF.EmitBlock(UsingRegs); + + Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4); + RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CGF.Int8Ty, + CharUnits::fromQuantity(8)); + assert(RegAddr.getElementType() == CGF.Int8Ty); + + // Floating-point registers start after the general-purpose registers. + if (!(isInt || IsSoftFloatABI)) { + RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr, + CharUnits::fromQuantity(32)); + } + + // Get the address of the saved value by scaling the number of + // registers we've used by the number of + CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8); + llvm::Value *RegOffset = + Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity())); + RegAddr = Address( + Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset), + DirectTy, RegAddr.getAlignment().alignmentOfArrayElement(RegSize)); + + // Increase the used-register count. + NumRegs = + Builder.CreateAdd(NumRegs, + Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1)); + Builder.CreateStore(NumRegs, NumRegsAddr); + + CGF.EmitBranch(Cont); + } + + // Case 2: consume space in the overflow area. + Address MemAddr = Address::invalid(); + { + CGF.EmitBlock(UsingOverflow); + + Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr); + + // Everything in the overflow area is rounded up to a size of at least 4. + CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4); + + CharUnits Size; + if (!isIndirect) { + auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); + Size = TypeInfo.Width.alignTo(OverflowAreaAlign); + } else { + Size = CGF.getPointerSize(); + } + + Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3); + Address OverflowArea = + Address(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), CGF.Int8Ty, + OverflowAreaAlign); + // Round up address of argument to alignment + CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); + if (Align > OverflowAreaAlign) { + llvm::Value *Ptr = OverflowArea.getPointer(); + OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), + OverflowArea.getElementType(), Align); + } + + MemAddr = OverflowArea.withElementType(DirectTy); + + // Increase the overflow area. + OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size); + Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr); + CGF.EmitBranch(Cont); + } + + CGF.EmitBlock(Cont); + + // Merge the cases with a phi. + Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow, + "vaarg.addr"); + + // Load the pointer if the argument was passed indirectly. + if (isIndirect) { + Result = Address(Builder.CreateLoad(Result, "aggr"), ElementTy, + getContext().getTypeAlignInChars(Ty)); + } + + return Result; +} + +bool PPC32TargetCodeGenInfo::isStructReturnInRegABI( + const llvm::Triple &Triple, const CodeGenOptions &Opts) { + assert(Triple.isPPC32()); + + switch (Opts.getStructReturnConvention()) { + case CodeGenOptions::SRCK_Default: + break; + case CodeGenOptions::SRCK_OnStack: // -maix-struct-return + return false; + case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return + return true; + } + + if (Triple.isOSBinFormatELF() && !Triple.isOSLinux()) + return true; + + return false; +} + +bool +PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false, + /*IsAIX*/ false); +} + +// PowerPC-64 + +namespace { + +/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. +class PPC64_SVR4_ABIInfo : public ABIInfo { + static const unsigned GPRBits = 64; + PPC64_SVR4_ABIKind Kind; + bool IsSoftFloatABI; + +public: + PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, PPC64_SVR4_ABIKind Kind, + bool SoftFloatABI) + : ABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {} + + bool isPromotableTypeForABI(QualType Ty) const; + CharUnits getParamTypeAlignment(QualType Ty) const; + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t Members) const override; + + // TODO: We can add more logic to computeInfo to improve performance. + // Example: For aggregate arguments that fit in a register, we could + // use getDirectInReg (as is done below for structs containing a single + // floating-point value) to avoid pushing them to memory on function + // entry. This would require changing the logic in PPCISelLowering + // when lowering the parameters in the caller and args in the callee. + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) { + // We rely on the default argument classification for the most part. + // One exception: An aggregate containing a single floating-point + // or vector item must be passed in a register if one is available. + const Type *T = isSingleElementStruct(I.type, getContext()); + if (T) { + const BuiltinType *BT = T->getAs<BuiltinType>(); + if ((T->isVectorType() && getContext().getTypeSize(T) == 128) || + (BT && BT->isFloatingPoint())) { + QualType QT(T, 0); + I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT)); + continue; + } + } + I.info = classifyArgumentType(I.type); + } + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { + +public: + PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, PPC64_SVR4_ABIKind Kind, + bool SoftFloatABI) + : TargetCodeGenInfo( + std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); + } + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + // This is recovered from gcc output. + return 1; // r1 is the dedicated stack pointer + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; +}; + +class PPC64TargetCodeGenInfo : public TargetCodeGenInfo { +public: + PPC64TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + // This is recovered from gcc output. + return 1; // r1 is the dedicated stack pointer + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; +}; +} + +// Return true if the ABI requires Ty to be passed sign- or zero- +// extended to 64 bits. +bool +PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Promotable integer types are required to be promoted by the ABI. + if (isPromotableIntegerTypeForABI(Ty)) + return true; + + // In addition to the usual promotable integer types, we also need to + // extend all 32-bit types, since the ABI requires promotion to 64 bits. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Int: + case BuiltinType::UInt: + return true; + default: + break; + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() < 64) + return true; + + return false; +} + +/// isAlignedParamType - Determine whether a type requires 16-byte or +/// higher alignment in the parameter area. Always returns at least 8. +CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + auto FloatUsesVector = [this](QualType Ty){ + return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics( + Ty) == &llvm::APFloat::IEEEquad(); + }; + + // Only vector types of size 16 bytes need alignment (larger types are + // passed via reference, smaller types are not aligned). + if (Ty->isVectorType()) { + return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); + } else if (FloatUsesVector(Ty)) { + // According to ABI document section 'Optional Save Areas': If extended + // precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION + // format are supported, map them to a single quadword, quadword aligned. + return CharUnits::fromQuantity(16); + } + + // For single-element float/vector structs, we consider the whole type + // to have the same alignment requirements as its single element. + const Type *AlignAsType = nullptr; + const Type *EltType = isSingleElementStruct(Ty, getContext()); + if (EltType) { + const BuiltinType *BT = EltType->getAs<BuiltinType>(); + if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || + (BT && BT->isFloatingPoint())) + AlignAsType = EltType; + } + + // Likewise for ELFv2 homogeneous aggregates. + const Type *Base = nullptr; + uint64_t Members = 0; + if (!AlignAsType && Kind == PPC64_SVR4_ABIKind::ELFv2 && + isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members)) + AlignAsType = Base; + + // With special case aggregates, only vector base types need alignment. + if (AlignAsType) { + bool UsesVector = AlignAsType->isVectorType() || + FloatUsesVector(QualType(AlignAsType, 0)); + return CharUnits::fromQuantity(UsesVector ? 16 : 8); + } + + // Otherwise, we only need alignment for any aggregate type that + // has an alignment requirement of >= 16 bytes. + if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) { + return CharUnits::fromQuantity(16); + } + + return CharUnits::fromQuantity(8); +} + +bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + // Homogeneous aggregates for ELFv2 must have base types of float, + // double, long double, or 128-bit vectors. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + if (BT->getKind() == BuiltinType::Float || + BT->getKind() == BuiltinType::Double || + BT->getKind() == BuiltinType::LongDouble || + BT->getKind() == BuiltinType::Ibm128 || + (getContext().getTargetInfo().hasFloat128Type() && + (BT->getKind() == BuiltinType::Float128))) { + if (IsSoftFloatABI) + return false; + return true; + } + } + if (const VectorType *VT = Ty->getAs<VectorType>()) { + if (getContext().getTypeSize(VT) == 128) + return true; + } + return false; +} + +bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { + // Vector and fp128 types require one register, other floating point types + // require one or two registers depending on their size. + uint32_t NumRegs = + ((getContext().getTargetInfo().hasFloat128Type() && + Base->isFloat128Type()) || + Base->isVectorType()) ? 1 + : (getContext().getTypeSize(Base) + 63) / 64; + + // Homogeneous Aggregates may occupy at most 8 registers. + return Members * NumRegs <= 8; +} + +ABIArgInfo +PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (Ty->isAnyComplexType()) + return ABIArgInfo::getDirect(); + + // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes) + // or via reference (larger than 16 bytes). + if (Ty->isVectorType()) { + uint64_t Size = getContext().getTypeSize(Ty); + if (Size > 128) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + else if (Size < 128) { + llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + if (isAggregateTypeForABI(Ty)) { + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity(); + uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); + + // ELFv2 homogeneous aggregates are passed as array types. + const Type *Base = nullptr; + uint64_t Members = 0; + if (Kind == PPC64_SVR4_ABIKind::ELFv2 && + isHomogeneousAggregate(Ty, Base, Members)) { + llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); + llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); + return ABIArgInfo::getDirect(CoerceTy); + } + + // If an aggregate may end up fully in registers, we do not + // use the ByVal method, but pass the aggregate as array. + // This is usually beneficial since we avoid forcing the + // back-end to store the argument to memory. + uint64_t Bits = getContext().getTypeSize(Ty); + if (Bits > 0 && Bits <= 8 * GPRBits) { + llvm::Type *CoerceTy; + + // Types up to 8 bytes are passed as integer type (which will be + // properly aligned in the argument save area doubleword). + if (Bits <= GPRBits) + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); + // Larger types are passed as arrays, with the base type selected + // according to the required alignment in the save area. + else { + uint64_t RegBits = ABIAlign * 8; + uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits; + llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); + CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); + } + + return ABIArgInfo::getDirect(CoerceTy); + } + + // All other aggregates are passed ByVal. + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), + /*ByVal=*/true, + /*Realign=*/TyAlign > ABIAlign); + } + + return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo +PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirect(); + + // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes) + // or via reference (larger than 16 bytes). + if (RetTy->isVectorType()) { + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size > 128) + return getNaturalAlignIndirect(RetTy); + else if (Size < 128) { + llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); + + if (isAggregateTypeForABI(RetTy)) { + // ELFv2 homogeneous aggregates are returned as array types. + const Type *Base = nullptr; + uint64_t Members = 0; + if (Kind == PPC64_SVR4_ABIKind::ELFv2 && + isHomogeneousAggregate(RetTy, Base, Members)) { + llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); + llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); + return ABIArgInfo::getDirect(CoerceTy); + } + + // ELFv2 small aggregates are returned in up to two registers. + uint64_t Bits = getContext().getTypeSize(RetTy); + if (Kind == PPC64_SVR4_ABIKind::ELFv2 && Bits <= 2 * GPRBits) { + if (Bits == 0) + return ABIArgInfo::getIgnore(); + + llvm::Type *CoerceTy; + if (Bits > GPRBits) { + CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); + CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); + } else + CoerceTy = + llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); + return ABIArgInfo::getDirect(CoerceTy); + } + + // All other aggregates are returned indirectly. + return getNaturalAlignIndirect(RetTy); + } + + return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +// Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. +Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + auto TypeInfo = getContext().getTypeInfoInChars(Ty); + TypeInfo.Align = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(8); + + // If we have a complex type and the base type is smaller than 8 bytes, + // the ABI calls for the real and imaginary parts to be right-adjusted + // in separate doublewords. However, Clang expects us to produce a + // pointer to a structure with the two parts packed tightly. So generate + // loads of the real and imaginary parts relative to the va_list pointer, + // and store them to a temporary structure. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { + CharUnits EltSize = TypeInfo.Width / 2; + if (EltSize < SlotSize) + return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); + } + + // Otherwise, just use the general rule. + // + // The PPC64 ABI passes some arguments in integer registers, even to variadic + // functions. To allow va_list to use the simple "void*" representation, + // variadic calls allocate space in the argument area for the integer argument + // registers, and variadic functions spill their integer argument registers to + // this area in their prologues. When aggregates smaller than a register are + // passed this way, they are passed in the least significant bits of the + // register, which means that after spilling on big-endian targets they will + // be right-aligned in their argument slot. This is uncommon; for a variety of + // reasons, other big-endian targets don't end up right-aligning aggregate + // types this way, and so right-alignment only applies to fundamental types. + // So on PPC64, we must force the use of right-alignment even for aggregates. + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, + SlotSize, /*AllowHigher*/ true, + /*ForceRightAdjust*/ true); +} + +bool +PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( + CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, + /*IsAIX*/ false); +} + +bool +PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, + /*IsAIX*/ false); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createAIXTargetCodeGenInfo(CodeGenModule &CGM, bool Is64Bit) { + return std::make_unique<AIXTargetCodeGenInfo>(CGM.getTypes(), Is64Bit); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createPPC32TargetCodeGenInfo(CodeGenModule &CGM, bool SoftFloatABI) { + bool RetSmallStructInRegABI = PPC32TargetCodeGenInfo::isStructReturnInRegABI( + CGM.getTriple(), CGM.getCodeGenOpts()); + return std::make_unique<PPC32TargetCodeGenInfo>(CGM.getTypes(), SoftFloatABI, + RetSmallStructInRegABI); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createPPC64TargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<PPC64TargetCodeGenInfo>(CGM.getTypes()); +} + +std::unique_ptr<TargetCodeGenInfo> CodeGen::createPPC64_SVR4_TargetCodeGenInfo( + CodeGenModule &CGM, PPC64_SVR4_ABIKind Kind, bool SoftFloatABI) { + return std::make_unique<PPC64_SVR4_TargetCodeGenInfo>(CGM.getTypes(), Kind, + SoftFloatABI); +} diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp new file mode 100644 index 000000000000..b6d8ae462675 --- /dev/null +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -0,0 +1,519 @@ +//===- RISCV.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" +#include "llvm/TargetParser/RISCVTargetParser.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// RISC-V ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class RISCVABIInfo : public DefaultABIInfo { +private: + // Size of the integer ('x') registers in bits. + unsigned XLen; + // Size of the floating point ('f') registers in bits. Note that the target + // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target + // with soft float ABI has FLen==0). + unsigned FLen; + static const int NumArgGPRs = 8; + static const int NumArgFPRs = 8; + bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; + +public: + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} + + // DefaultABIInfo's classifyReturnType and classifyArgumentType are + // non-virtual, but computeInfo is virtual, so we overload it. + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, + int &ArgFPRsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; + + bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; + + ABIArgInfo coerceVLSVector(QualType Ty) const; +}; +} // end anonymous namespace + +void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect, or if the type size is a scalar greater than 2*XLen + // and not a complex type with elements <= FLen. e.g. fp128 is passed direct + // in LLVM IR, relying on the backend lowering code to rewrite the argument + // list and pass indirectly on RV32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + if (!IsRetIndirect && RetTy->isScalarType() && + getContext().getTypeSize(RetTy) > (2 * XLen)) { + if (RetTy->isComplexType() && FLen) { + QualType EltTy = RetTy->castAs<ComplexType>()->getElementType(); + IsRetIndirect = getContext().getTypeSize(EltTy) > FLen; + } else { + // This is a normal scalar > 2*XLen, such as fp128 on RV32. + IsRetIndirect = true; + } + } + + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FLen ? NumArgFPRs : 0; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + bool IsFixed = ArgNum < NumFixedArgs; + ArgInfo.info = + classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); + ArgNum++; + } +} + +// Returns true if the struct is a potential candidate for the floating point +// calling convention. If this function returns true, the caller is +// responsible for checking that if there is only a single field then that +// field is a float. +bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > XLen) + return false; + // Can't be eligible if larger than the FP registers. Handling of half + // precision values has been specified in the ABI, so don't block those. + if (IsFloat && Size > FLen) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs<ComplexType>()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, + Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs<RecordType>()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + if (isEmptyRecord(getContext(), Ty, true)) + return true; + const RecordDecl *RD = RTy->getDecl(); + // Unions aren't eligible unless they're empty (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl()); + CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); + bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, + Field1Ty, Field1Off, Field2Ty, + Field2Off); + if (!Ret) + return false; + } + } + int ZeroWidthBitFieldCount = 0; + for (const FieldDecl *FD : RD->fields()) { + uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Allow a bitfield with a type greater than XLen as long as the + // bitwidth is XLen or less. + if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen) + QTy = getContext().getIntTypeForBitwidth(XLen, false); + if (BitWidth == 0) { + ZeroWidthBitFieldCount++; + continue; + } + } + + bool Ret = detectFPCCEligibleStructHelper( + QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), + Field1Ty, Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + + // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp + // or int+fp structs, but are ignored for a struct with an fp field and + // any number of zero-width bitfields. + if (Field2Ty && ZeroWidthBitFieldCount > 0) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible for passing according to the floating +// point calling convention (i.e., when flattened it contains a single fp +// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and +// NeededArgGPRs are incremented appropriately. +bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off, + int &NeededArgGPRs, + int &NeededArgFPRs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededArgGPRs = 0; + NeededArgFPRs = 0; + bool IsCandidate = detectFPCCEligibleStructHelper( + Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (!IsCandidate) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field1Ty) + NeededArgGPRs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field2Ty) + NeededArgGPRs++; + return true; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector<llvm::Type *, 3> CoerceElts; + SmallVector<llvm::Type *, 2> UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(Field2Ty)); + CharUnits Field1End = Field1Off + + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1End) + Padding = Field2Off - Field1End; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + auto CoerceToType = + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); + auto UnpaddedCoerceToType = + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); + + return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); +} + +// Fixed-length RVV vectors are represented as scalable vectors in function +// args/return and must be coerced from fixed vectors. +ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { + assert(Ty->isVectorType() && "expected vector type!"); + + const auto *VT = Ty->castAs<VectorType>(); + assert(VT->getVectorKind() == VectorType::RVVFixedLengthDataVector && + "Unexpected vector kind"); + + assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); + + const auto *BT = VT->getElementType()->castAs<BuiltinType>(); + unsigned EltSize = getContext().getTypeSize(BT); + llvm::ScalableVectorType *ResType = + llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), + llvm::RISCV::RVVBitsPerBlock / EltSize); + return ABIArgInfo::getDirect(ResType); +} + +ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &ArgGPRsLeft, + int &ArgFPRsLeft) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + + // Pass floating point values via FPRs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FLen >= Size && ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (IsFixed && FLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededArgGPRs = 0; + int NeededArgFPRs = 0; + bool IsCandidate = + detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, + NeededArgGPRs, NeededArgFPRs); + if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && + NeededArgFPRs <= ArgFPRsLeft) { + ArgGPRsLeft -= NeededArgGPRs; + ArgFPRsLeft -= NeededArgFPRs; + return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } + + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + // Determine the number of GPRs needed to pass the current argument + // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededArgGPRs = 1; + if (!IsFixed && NeededAlign == 2 * XLen) + NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + else if (Size > XLen && Size <= 2 * XLen) + NeededArgGPRs = 2; + + if (NeededArgGPRs > ArgGPRsLeft) { + NeededArgGPRs = ArgGPRsLeft; + } + + ArgGPRsLeft -= NeededArgGPRs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to XLen width + if (Size < XLen && Ty->isIntegralOrEnumerationType()) { + return extendType(Ty); + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) { + if (EIT->getNumBits() < XLen) + return extendType(Ty); + if (EIT->getNumBits() > 128 || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } + + return ABIArgInfo::getDirect(); + } + + if (const VectorType *VT = Ty->getAs<VectorType>()) + if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector) + return coerceVLSVector(Ty); + + // Aggregates which are <= 2*XLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * XLen) { + unsigned Alignment = getContext().getTypeAlign(Ty); + + // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is + // required, and a 2-element XLen array if only XLen alignment is required. + if (Size <= XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), XLen)); + } else if (Alignment == 2 * XLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * XLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), XLen), 2)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + int ArgFPRsLeft = FLen ? 2 : 0; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, + ArgFPRsLeft); +} + +Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + return Address(CGF.Builder.CreateLoad(VAListAddr), + CGF.ConvertTypeForMem(Ty), SlotSize); + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*Xlen bytes are passed indirectly. + bool IsIndirect = TInfo.Width > 2 * SlotSize; + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, + SlotSize, /*AllowHigherAlign=*/true); +} + +ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // RV64 ABI requires unsigned 32 bit integers to be sign extended. + if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +} + +namespace { +class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { +public: + RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, + unsigned FLen) + : TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + + const auto *Attr = FD->getAttr<RISCVInterruptAttr>(); + if (!Attr) + return; + + const char *Kind; + switch (Attr->getInterrupt()) { + case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break; + case RISCVInterruptAttr::machine: Kind = "machine"; break; + } + + auto *Fn = cast<llvm::Function>(GV); + + Fn->addFnAttr("interrupt", Kind); + } +}; +} // namespace + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createRISCVTargetCodeGenInfo(CodeGenModule &CGM, unsigned XLen, + unsigned FLen) { + return std::make_unique<RISCVTargetCodeGenInfo>(CGM.getTypes(), XLen, FLen); +} diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp new file mode 100644 index 000000000000..8bacba65617e --- /dev/null +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -0,0 +1,218 @@ +//===- SPIR.cpp -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// Base ABI and target codegen info implementation common between SPIR and +// SPIR-V. +//===----------------------------------------------------------------------===// + +namespace { +class CommonSPIRABIInfo : public DefaultABIInfo { +public: + CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); } + +private: + void setCCs(); +}; + +class SPIRVABIInfo : public CommonSPIRABIInfo { +public: + SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {} + void computeInfo(CGFunctionInfo &FI) const override; + +private: + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; +}; +} // end anonymous namespace +namespace { +class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {} + CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo) + : TargetCodeGenInfo(std::move(ABIInfo)) {} + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + + unsigned getOpenCLKernelCallingConv() const override; + llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override; +}; +class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { +public: + SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {} + void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; +}; +} // End anonymous namespace. + +void CommonSPIRABIInfo::setCCs() { + assert(getRuntimeCC() == llvm::CallingConv::C); + RuntimeCC = llvm::CallingConv::SPIR_FUNC; +} + +ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { + if (getContext().getLangOpts().CUDAIsDevice) { + // Coerce pointer arguments with default address space to CrossWorkGroup + // pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the + // SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space. + llvm::Type *LTy = CGT.ConvertType(Ty); + auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default); + auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device); + auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy); + if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) { + LTy = llvm::PointerType::get(PtrTy->getContext(), GlobalAS); + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); + } + + // Force copying aggregate type in kernel arguments by value when + // compiling CUDA targeting SPIR-V. This is required for the object + // copied to be valid on the device. + // This behavior follows the CUDA spec + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, + // and matches the NVPTX implementation. + if (isAggregateTypeForABI(Ty)) + return getNaturalAlignIndirect(Ty, /* byval */ true); + } + return classifyArgumentType(Ty); +} + +void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const { + // The logic is same as in DefaultABIInfo with an exception on the kernel + // arguments handling. + llvm::CallingConv::ID CC = FI.getCallingConvention(); + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + for (auto &I : FI.arguments()) { + if (CC == llvm::CallingConv::SPIR_KERNEL) { + I.info = classifyKernelArgumentType(I.type); + } else { + I.info = classifyArgumentType(I.type); + } + } +} + +namespace clang { +namespace CodeGen { +void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { + if (CGM.getTarget().getTriple().isSPIRV()) + SPIRVABIInfo(CGM.getTypes()).computeInfo(FI); + else + CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI); +} +} +} + +unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::SPIR_KERNEL; +} + +void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( + const FunctionType *&FT) const { + // Convert HIP kernels to SPIR-V kernels. + if (getABIInfo().getContext().getLangOpts().HIP) { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + return; + } +} + +/// Construct a SPIR-V target extension type for the given OpenCL image type. +static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType, + StringRef OpenCLName, + unsigned AccessQualifier) { + // These parameters compare to the operands of OpTypeImage (see + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpTypeImage + // for more details). The first 6 integer parameters all default to 0, and + // will be changed to 1 only for the image type(s) that set the parameter to + // one. The 7th integer parameter is the access qualifier, which is tacked on + // at the end. + SmallVector<unsigned, 7> IntParams = {0, 0, 0, 0, 0, 0}; + + // Choose the dimension of the image--this corresponds to the Dim enum in + // SPIR-V (first integer parameter of OpTypeImage). + if (OpenCLName.startswith("image2d")) + IntParams[0] = 1; // 1D + else if (OpenCLName.startswith("image3d")) + IntParams[0] = 2; // 2D + else if (OpenCLName == "image1d_buffer") + IntParams[0] = 5; // Buffer + else + assert(OpenCLName.startswith("image1d") && "Unknown image type"); + + // Set the other integer parameters of OpTypeImage if necessary. Note that the + // OpenCL image types don't provide any information for the Sampled or + // Image Format parameters. + if (OpenCLName.contains("_depth")) + IntParams[1] = 1; + if (OpenCLName.contains("_array")) + IntParams[2] = 1; + if (OpenCLName.contains("_msaa")) + IntParams[3] = 1; + + // Access qualifier + IntParams.push_back(AccessQualifier); + + return llvm::TargetExtType::get(Ctx, BaseType, {llvm::Type::getVoidTy(Ctx)}, + IntParams); +} + +llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM, + const Type *Ty) const { + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + if (auto *PipeTy = dyn_cast<PipeType>(Ty)) + return llvm::TargetExtType::get(Ctx, "spirv.Pipe", {}, + {!PipeTy->isReadOnly()}); + if (auto *BuiltinTy = dyn_cast<BuiltinType>(Ty)) { + enum AccessQualifier : unsigned { AQ_ro = 0, AQ_wo = 1, AQ_rw = 2 }; + switch (BuiltinTy->getKind()) { +#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ + case BuiltinType::Id: \ + return getSPIRVImageType(Ctx, "spirv.Image", #ImgType, AQ_##Suffix); +#include "clang/Basic/OpenCLImageTypes.def" + case BuiltinType::OCLSampler: + return llvm::TargetExtType::get(Ctx, "spirv.Sampler"); + case BuiltinType::OCLEvent: + return llvm::TargetExtType::get(Ctx, "spirv.Event"); + case BuiltinType::OCLClkEvent: + return llvm::TargetExtType::get(Ctx, "spirv.DeviceEvent"); + case BuiltinType::OCLQueue: + return llvm::TargetExtType::get(Ctx, "spirv.Queue"); + case BuiltinType::OCLReserveID: + return llvm::TargetExtType::get(Ctx, "spirv.ReserveId"); +#define INTEL_SUBGROUP_AVC_TYPE(Name, Id) \ + case BuiltinType::OCLIntelSubgroupAVC##Id: \ + return llvm::TargetExtType::get(Ctx, "spirv.Avc" #Id "INTEL"); +#include "clang/Basic/OpenCLExtensionTypes.def" + default: + return nullptr; + } + } + + return nullptr; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createCommonSPIRTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<CommonSPIRTargetCodeGenInfo>(CGM.getTypes()); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createSPIRVTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<SPIRVTargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/Sparc.cpp b/clang/lib/CodeGen/Targets/Sparc.cpp new file mode 100644 index 000000000000..f5cafaa97315 --- /dev/null +++ b/clang/lib/CodeGen/Targets/Sparc.cpp @@ -0,0 +1,409 @@ +//===- Sparc.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// SPARC v8 ABI Implementation. +// Based on the SPARC Compliance Definition version 2.4.1. +// +// Ensures that complex values are passed in registers. +// +namespace { +class SparcV8ABIInfo : public DefaultABIInfo { +public: + SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + void computeInfo(CGFunctionInfo &FI) const override; +}; +} // end anonymous namespace + + +ABIArgInfo +SparcV8ABIInfo::classifyReturnType(QualType Ty) const { + if (Ty->isAnyComplexType()) { + return ABIArgInfo::getDirect(); + } + else { + return DefaultABIInfo::classifyReturnType(Ty); + } +} + +void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const { + + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &Arg : FI.arguments()) + Arg.info = classifyArgumentType(Arg.type); +} + +namespace { +class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { +public: + SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {} + + llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + int Offset; + if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType())) + Offset = 12; + else + Offset = 8; + return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, + llvm::ConstantInt::get(CGF.Int32Ty, Offset)); + } + + llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + int Offset; + if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType())) + Offset = -12; + else + Offset = -8; + return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, + llvm::ConstantInt::get(CGF.Int32Ty, Offset)); + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// SPARC v9 ABI Implementation. +// Based on the SPARC Compliance Definition version 2.4.1. +// +// Function arguments a mapped to a nominal "parameter array" and promoted to +// registers depending on their type. Each argument occupies 8 or 16 bytes in +// the array, structs larger than 16 bytes are passed indirectly. +// +// One case requires special care: +// +// struct mixed { +// int i; +// float f; +// }; +// +// When a struct mixed is passed by value, it only occupies 8 bytes in the +// parameter array, but the int is passed in an integer register, and the float +// is passed in a floating point register. This is represented as two arguments +// with the LLVM IR inreg attribute: +// +// declare void f(i32 inreg %i, float inreg %f) +// +// The code generator will only allocate 4 bytes from the parameter array for +// the inreg arguments. All other arguments are allocated a multiple of 8 +// bytes. +// +namespace { +class SparcV9ABIInfo : public ABIInfo { +public: + SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + +private: + ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const; + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + // Coercion type builder for structs passed in registers. The coercion type + // serves two purposes: + // + // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned' + // in registers. + // 2. Expose aligned floating point elements as first-level elements, so the + // code generator knows to pass them in floating point registers. + // + // We also compute the InReg flag which indicates that the struct contains + // aligned 32-bit floats. + // + struct CoerceBuilder { + llvm::LLVMContext &Context; + const llvm::DataLayout &DL; + SmallVector<llvm::Type*, 8> Elems; + uint64_t Size; + bool InReg; + + CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl) + : Context(c), DL(dl), Size(0), InReg(false) {} + + // Pad Elems with integers until Size is ToSize. + void pad(uint64_t ToSize) { + assert(ToSize >= Size && "Cannot remove elements"); + if (ToSize == Size) + return; + + // Finish the current 64-bit word. + uint64_t Aligned = llvm::alignTo(Size, 64); + if (Aligned > Size && Aligned <= ToSize) { + Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); + Size = Aligned; + } + + // Add whole 64-bit words. + while (Size + 64 <= ToSize) { + Elems.push_back(llvm::Type::getInt64Ty(Context)); + Size += 64; + } + + // Final in-word padding. + if (Size < ToSize) { + Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size)); + Size = ToSize; + } + } + + // Add a floating point element at Offset. + void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) { + // Unaligned floats are treated as integers. + if (Offset % Bits) + return; + // The InReg flag is only required if there are any floats < 64 bits. + if (Bits < 64) + InReg = true; + pad(Offset); + Elems.push_back(Ty); + Size = Offset + Bits; + } + + // Add a struct type to the coercion type, starting at Offset (in bits). + void addStruct(uint64_t Offset, llvm::StructType *StrTy) { + const llvm::StructLayout *Layout = DL.getStructLayout(StrTy); + for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) { + llvm::Type *ElemTy = StrTy->getElementType(i); + uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i); + switch (ElemTy->getTypeID()) { + case llvm::Type::StructTyID: + addStruct(ElemOffset, cast<llvm::StructType>(ElemTy)); + break; + case llvm::Type::FloatTyID: + addFloat(ElemOffset, ElemTy, 32); + break; + case llvm::Type::DoubleTyID: + addFloat(ElemOffset, ElemTy, 64); + break; + case llvm::Type::FP128TyID: + addFloat(ElemOffset, ElemTy, 128); + break; + case llvm::Type::PointerTyID: + if (ElemOffset % 64 == 0) { + pad(ElemOffset); + Elems.push_back(ElemTy); + Size += 64; + } + break; + default: + break; + } + } + } + + // Check if Ty is a usable substitute for the coercion type. + bool isUsableType(llvm::StructType *Ty) const { + return llvm::ArrayRef(Elems) == Ty->elements(); + } + + // Get the coercion type as a literal struct type. + llvm::Type *getType() const { + if (Elems.size() == 1) + return Elems.front(); + else + return llvm::StructType::get(Context, Elems); + } + }; +}; +} // end anonymous namespace + +ABIArgInfo +SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { + if (Ty->isVoidType()) + return ABIArgInfo::getIgnore(); + + uint64_t Size = getContext().getTypeSize(Ty); + + // Anything too big to fit in registers is passed with an explicit indirect + // pointer / sret pointer. + if (Size > SizeLimit) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Integer types smaller than a register are extended. + if (Size < 64 && Ty->isIntegerType()) + return ABIArgInfo::getExtend(Ty); + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() < 64) + return ABIArgInfo::getExtend(Ty); + + // Other non-aggregates go in registers. + if (!isAggregateTypeForABI(Ty)) + return ABIArgInfo::getDirect(); + + // If a C++ object has either a non-trivial copy constructor or a non-trivial + // destructor, it is passed with an explicit indirect pointer / sret pointer. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // This is a small aggregate type that should be passed in registers. + // Build a coercion type from the LLVM struct type. + llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)); + if (!StrTy) + return ABIArgInfo::getDirect(); + + CoerceBuilder CB(getVMContext(), getDataLayout()); + CB.addStruct(0, StrTy); + CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64)); + + // Try to use the original type for coercion. + llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); + + if (CB.InReg) + return ABIArgInfo::getDirectInReg(CoerceTy); + else + return ABIArgInfo::getDirect(CoerceTy); +} + +Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + ABIArgInfo AI = classifyType(Ty, 16 * 8); + llvm::Type *ArgTy = CGT.ConvertType(Ty); + if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) + AI.setCoerceToType(ArgTy); + + CharUnits SlotSize = CharUnits::fromQuantity(8); + + CGBuilderTy &Builder = CGF.Builder; + Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"), + getVAListElementType(CGF), SlotSize); + llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); + + auto TypeInfo = getContext().getTypeInfoInChars(Ty); + + Address ArgAddr = Address::invalid(); + CharUnits Stride; + switch (AI.getKind()) { + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + case ABIArgInfo::InAlloca: + llvm_unreachable("Unsupported ABI kind for va_arg"); + + case ABIArgInfo::Extend: { + Stride = SlotSize; + CharUnits Offset = SlotSize - TypeInfo.Width; + ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend"); + break; + } + + case ABIArgInfo::Direct: { + auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); + Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize); + ArgAddr = Addr; + break; + } + + case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: + Stride = SlotSize; + ArgAddr = Addr.withElementType(ArgPtrTy); + ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy, + TypeInfo.Align); + break; + + case ABIArgInfo::Ignore: + return Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeInfo.Align); + } + + // Update VAList. + Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next"); + Builder.CreateStore(NextPtr.getPointer(), VAListAddr); + + return ArgAddr.withElementType(ArgTy); +} + +void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const { + FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8); + for (auto &I : FI.arguments()) + I.info = classifyType(I.type, 16 * 8); +} + +namespace { +class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { +public: + SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {} + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + return 14; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; + + llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, + llvm::ConstantInt::get(CGF.Int32Ty, 8)); + } + + llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + return CGF.Builder.CreateGEP(CGF.Int8Ty, Address, + llvm::ConstantInt::get(CGF.Int32Ty, -8)); + } +}; +} // end anonymous namespace + +bool +SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + // This is calculated from the LLVM and GCC tables and verified + // against gcc output. AFAIK all ABIs use the same encoding. + + CodeGen::CGBuilderTy &Builder = CGF.Builder; + + llvm::IntegerType *i8 = CGF.Int8Ty; + llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); + llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); + + // 0-31: the 8-byte general-purpose registers + AssignToArrayRange(Builder, Address, Eight8, 0, 31); + + // 32-63: f0-31, the 4-byte floating-point registers + AssignToArrayRange(Builder, Address, Four8, 32, 63); + + // Y = 64 + // PSR = 65 + // WIM = 66 + // TBR = 67 + // PC = 68 + // NPC = 69 + // FSR = 70 + // CSR = 71 + AssignToArrayRange(Builder, Address, Eight8, 64, 71); + + // 72-87: d0-15, the 8-byte floating-point registers + AssignToArrayRange(Builder, Address, Eight8, 72, 87); + + return false; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createSparcV8TargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<SparcV8TargetCodeGenInfo>(CGM.getTypes()); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createSparcV9TargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<SparcV9TargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/SystemZ.cpp b/clang/lib/CodeGen/Targets/SystemZ.cpp new file mode 100644 index 000000000000..6eb0c6ef2f7d --- /dev/null +++ b/clang/lib/CodeGen/Targets/SystemZ.cpp @@ -0,0 +1,538 @@ +//===- SystemZ.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" +#include "clang/Basic/Builtins.h" +#include "llvm/IR/IntrinsicsS390.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// SystemZ ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class SystemZABIInfo : public ABIInfo { + bool HasVector; + bool IsSoftFloatABI; + +public: + SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF) + : ABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {} + + bool isPromotableIntegerTypeForABI(QualType Ty) const; + bool isCompoundType(QualType Ty) const; + bool isVectorArgumentType(QualType Ty) const; + bool isFPArgumentType(QualType Ty) const; + QualType GetSingleElementType(QualType Ty) const; + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType ArgTy) const; + + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { + ASTContext &Ctx; + + // These are used for speeding up the search for a visible vector ABI. + mutable bool HasVisibleVecABIFlag = false; + mutable std::set<const Type *> SeenTypes; + + // Returns true (the first time) if Ty is, or is found to include, a vector + // type that exposes the vector ABI. This is any vector >=16 bytes which + // with vector support are aligned to only 8 bytes. When IsParam is true, + // the type belongs to a value as passed between functions. If it is a + // vector <=16 bytes it will be passed in a vector register (if supported). + bool isVectorTypeBased(const Type *Ty, bool IsParam) const; + +public: + SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI) + : TargetCodeGenInfo( + std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)), + Ctx(CGT.getContext()) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); + } + + // The vector ABI is different when the vector facility is present and when + // a module e.g. defines an externally visible vector variable, a flag + // indicating a visible vector ABI is added. Eventually this will result in + // a GNU attribute indicating the vector ABI of the module. Ty is the type + // of a variable or function parameter that is globally visible. + void handleExternallyVisibleObjABI(const Type *Ty, CodeGen::CodeGenModule &M, + bool IsParam) const { + if (!HasVisibleVecABIFlag && isVectorTypeBased(Ty, IsParam)) { + M.getModule().addModuleFlag(llvm::Module::Warning, + "s390x-visible-vector-ABI", 1); + HasVisibleVecABIFlag = true; + } + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override { + if (!D) + return; + + // Check if the vector ABI becomes visible by an externally visible + // variable or function. + if (const auto *VD = dyn_cast<VarDecl>(D)) { + if (VD->isExternallyVisible()) + handleExternallyVisibleObjABI(VD->getType().getTypePtr(), M, + /*IsParam*/false); + } + else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + if (FD->isExternallyVisible()) + handleExternallyVisibleObjABI(FD->getType().getTypePtr(), M, + /*IsParam*/false); + } + } + + llvm::Value *testFPKind(llvm::Value *V, unsigned BuiltinID, + CGBuilderTy &Builder, + CodeGenModule &CGM) const override { + assert(V->getType()->isFloatingPointTy() && "V should have an FP type."); + // Only use TDC in constrained FP mode. + if (!Builder.getIsFPConstrained()) + return nullptr; + + llvm::Type *Ty = V->getType(); + if (Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isFP128Ty()) { + llvm::Module &M = CGM.getModule(); + auto &Ctx = M.getContext(); + llvm::Function *TDCFunc = + llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::s390_tdc, Ty); + unsigned TDCBits = 0; + switch (BuiltinID) { + case Builtin::BI__builtin_isnan: + TDCBits = 0xf; + break; + case Builtin::BIfinite: + case Builtin::BI__finite: + case Builtin::BIfinitef: + case Builtin::BI__finitef: + case Builtin::BIfinitel: + case Builtin::BI__finitel: + case Builtin::BI__builtin_isfinite: + TDCBits = 0xfc0; + break; + case Builtin::BI__builtin_isinf: + TDCBits = 0x30; + break; + default: + break; + } + if (TDCBits) + return Builder.CreateCall( + TDCFunc, + {V, llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), TDCBits)}); + } + return nullptr; + } +}; +} + +bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Promotable integer types are required to be promoted by the ABI. + if (ABIInfo::isPromotableIntegerTypeForABI(Ty)) + return true; + + if (const auto *EIT = Ty->getAs<BitIntType>()) + if (EIT->getNumBits() < 64) + return true; + + // 32-bit values must also be promoted. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Int: + case BuiltinType::UInt: + return true; + default: + return false; + } + return false; +} + +bool SystemZABIInfo::isCompoundType(QualType Ty) const { + return (Ty->isAnyComplexType() || + Ty->isVectorType() || + isAggregateTypeForABI(Ty)); +} + +bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { + return (HasVector && + Ty->isVectorType() && + getContext().getTypeSize(Ty) <= 128); +} + +bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { + if (IsSoftFloatABI) + return false; + + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Float: + case BuiltinType::Double: + return true; + default: + return false; + } + + return false; +} + +QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { + const RecordType *RT = Ty->getAs<RecordType>(); + + if (RT && RT->isStructureOrClassType()) { + const RecordDecl *RD = RT->getDecl(); + QualType Found; + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->hasDefinition()) + for (const auto &I : CXXRD->bases()) { + QualType Base = I.getType(); + + // Empty bases don't affect things either way. + if (isEmptyRecord(getContext(), Base, true)) + continue; + + if (!Found.isNull()) + return Ty; + Found = GetSingleElementType(Base); + } + + // Check the fields. + for (const auto *FD : RD->fields()) { + // Unlike isSingleElementStruct(), empty structure and array fields + // do count. So do anonymous bitfields that aren't zero-sized. + + // Like isSingleElementStruct(), ignore C++20 empty data members. + if (FD->hasAttr<NoUniqueAddressAttr>() && + isEmptyRecord(getContext(), FD->getType(), true)) + continue; + + // Unlike isSingleElementStruct(), arrays do not count. + // Nested structures still do though. + if (!Found.isNull()) + return Ty; + Found = GetSingleElementType(FD->getType()); + } + + // Unlike isSingleElementStruct(), trailing padding is allowed. + // An 8-byte aligned struct s { float f; } is passed as a double. + if (!Found.isNull()) + return Found; + } + + return Ty; +} + +Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + // Assume that va_list type is correct; should be pointer to LLVM type: + // struct { + // i64 __gpr; + // i64 __fpr; + // i8 *__overflow_arg_area; + // i8 *__reg_save_area; + // }; + + // Every non-vector argument occupies 8 bytes and is passed by preference + // in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are + // always passed on the stack. + const SystemZTargetCodeGenInfo &SZCGI = + static_cast<const SystemZTargetCodeGenInfo &>( + CGT.getCGM().getTargetCodeGenInfo()); + Ty = getContext().getCanonicalType(Ty); + auto TyInfo = getContext().getTypeInfoInChars(Ty); + llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty); + llvm::Type *DirectTy = ArgTy; + ABIArgInfo AI = classifyArgumentType(Ty); + bool IsIndirect = AI.isIndirect(); + bool InFPRs = false; + bool IsVector = false; + CharUnits UnpaddedSize; + CharUnits DirectAlign; + SZCGI.handleExternallyVisibleObjABI(Ty.getTypePtr(), CGT.getCGM(), + /*IsParam*/true); + if (IsIndirect) { + DirectTy = llvm::PointerType::getUnqual(DirectTy); + UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8); + } else { + if (AI.getCoerceToType()) + ArgTy = AI.getCoerceToType(); + InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy())); + IsVector = ArgTy->isVectorTy(); + UnpaddedSize = TyInfo.Width; + DirectAlign = TyInfo.Align; + } + CharUnits PaddedSize = CharUnits::fromQuantity(8); + if (IsVector && UnpaddedSize > PaddedSize) + PaddedSize = CharUnits::fromQuantity(16); + assert((UnpaddedSize <= PaddedSize) && "Invalid argument size."); + + CharUnits Padding = (PaddedSize - UnpaddedSize); + + llvm::Type *IndexTy = CGF.Int64Ty; + llvm::Value *PaddedSizeV = + llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity()); + + if (IsVector) { + // Work out the address of a vector argument on the stack. + // Vector arguments are always passed in the high bits of a + // single (8 byte) or double (16 byte) stack slot. + Address OverflowArgAreaPtr = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); + Address OverflowArgArea = + Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), + CGF.Int8Ty, TyInfo.Align); + Address MemAddr = OverflowArgArea.withElementType(DirectTy); + + // Update overflow_arg_area_ptr pointer + llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP( + OverflowArgArea.getElementType(), OverflowArgArea.getPointer(), + PaddedSizeV, "overflow_arg_area"); + CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); + + return MemAddr; + } + + assert(PaddedSize.getQuantity() == 8); + + unsigned MaxRegs, RegCountField, RegSaveIndex; + CharUnits RegPadding; + if (InFPRs) { + MaxRegs = 4; // Maximum of 4 FPR arguments + RegCountField = 1; // __fpr + RegSaveIndex = 16; // save offset for f0 + RegPadding = CharUnits(); // floats are passed in the high bits of an FPR + } else { + MaxRegs = 5; // Maximum of 5 GPR arguments + RegCountField = 0; // __gpr + RegSaveIndex = 2; // save offset for r2 + RegPadding = Padding; // values are passed in the low bits of a GPR + } + + Address RegCountPtr = + CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr"); + llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); + llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); + llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, + "fits_in_regs"); + + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); + + // Emit code to load the value if it was passed in registers. + CGF.EmitBlock(InRegBlock); + + // Work out the address of an argument register. + llvm::Value *ScaledRegCount = + CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count"); + llvm::Value *RegBase = + llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity() + + RegPadding.getQuantity()); + llvm::Value *RegOffset = + CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); + Address RegSaveAreaPtr = + CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr"); + llvm::Value *RegSaveArea = + CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); + Address RawRegAddr( + CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, RegOffset, "raw_reg_addr"), + CGF.Int8Ty, PaddedSize); + Address RegAddr = RawRegAddr.withElementType(DirectTy); + + // Update the register count + llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1); + llvm::Value *NewRegCount = + CGF.Builder.CreateAdd(RegCount, One, "reg_count"); + CGF.Builder.CreateStore(NewRegCount, RegCountPtr); + CGF.EmitBranch(ContBlock); + + // Emit code to load the value if it was passed in memory. + CGF.EmitBlock(InMemBlock); + + // Work out the address of a stack argument. + Address OverflowArgAreaPtr = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); + Address OverflowArgArea = + Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), + CGF.Int8Ty, PaddedSize); + Address RawMemAddr = + CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr"); + Address MemAddr = RawMemAddr.withElementType(DirectTy); + + // Update overflow_arg_area_ptr pointer + llvm::Value *NewOverflowArgArea = + CGF.Builder.CreateGEP(OverflowArgArea.getElementType(), + OverflowArgArea.getPointer(), PaddedSizeV, + "overflow_arg_area"); + CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); + CGF.EmitBranch(ContBlock); + + // Return the appropriate result. + CGF.EmitBlock(ContBlock); + Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, + "va_arg.addr"); + + if (IsIndirect) + ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), ArgTy, + TyInfo.Align); + + return ResAddr; +} + +ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + if (isVectorArgumentType(RetTy)) + return ABIArgInfo::getDirect(); + if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) + return getNaturalAlignIndirect(RetTy); + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { + // Handle the generic C++ ABI. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Integers and enums are extended to full register width. + if (isPromotableIntegerTypeForABI(Ty)) + return ABIArgInfo::getExtend(Ty); + + // Handle vector types and vector-like structure types. Note that + // as opposed to float-like structure types, we do not allow any + // padding for vector-like structures, so verify the sizes match. + uint64_t Size = getContext().getTypeSize(Ty); + QualType SingleElementTy = GetSingleElementType(Ty); + if (isVectorArgumentType(SingleElementTy) && + getContext().getTypeSize(SingleElementTy) == Size) + return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy)); + + // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly. + if (Size != 8 && Size != 16 && Size != 32 && Size != 64) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + + // Handle small structures. + if (const RecordType *RT = Ty->getAs<RecordType>()) { + // Structures with flexible arrays have variable length, so really + // fail the size test above. + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + + // The structure is passed as an unextended integer, a float, or a double. + llvm::Type *PassTy; + if (isFPArgumentType(SingleElementTy)) { + assert(Size == 32 || Size == 64); + if (Size == 32) + PassTy = llvm::Type::getFloatTy(getVMContext()); + else + PassTy = llvm::Type::getDoubleTy(getVMContext()); + } else + PassTy = llvm::IntegerType::get(getVMContext(), Size); + return ABIArgInfo::getDirect(PassTy); + } + + // Non-structure compounds are passed indirectly. + if (isCompoundType(Ty)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + + return ABIArgInfo::getDirect(nullptr); +} + +void SystemZABIInfo::computeInfo(CGFunctionInfo &FI) const { + const SystemZTargetCodeGenInfo &SZCGI = + static_cast<const SystemZTargetCodeGenInfo &>( + CGT.getCGM().getTargetCodeGenInfo()); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + unsigned Idx = 0; + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type); + if (FI.isVariadic() && Idx++ >= FI.getNumRequiredArgs()) + // Check if a vararg vector argument is passed, in which case the + // vector ABI becomes visible as the va_list could be passed on to + // other functions. + SZCGI.handleExternallyVisibleObjABI(I.type.getTypePtr(), CGT.getCGM(), + /*IsParam*/true); + } +} + +bool SystemZTargetCodeGenInfo::isVectorTypeBased(const Type *Ty, + bool IsParam) const { + if (!SeenTypes.insert(Ty).second) + return false; + + if (IsParam) { + // A narrow (<16 bytes) vector will as a parameter also expose the ABI as + // it will be passed in a vector register. A wide (>16 bytes) vector will + // be passed via "hidden" pointer where any extra alignment is not + // required (per GCC). + const Type *SingleEltTy = getABIInfo<SystemZABIInfo>() + .GetSingleElementType(QualType(Ty, 0)) + .getTypePtr(); + bool SingleVecEltStruct = SingleEltTy != Ty && SingleEltTy->isVectorType() && + Ctx.getTypeSize(SingleEltTy) == Ctx.getTypeSize(Ty); + if (Ty->isVectorType() || SingleVecEltStruct) + return Ctx.getTypeSize(Ty) / 8 <= 16; + } + + // Assume pointers are dereferenced. + while (Ty->isPointerType() || Ty->isArrayType()) + Ty = Ty->getPointeeOrArrayElementType(); + + // Vectors >= 16 bytes expose the ABI through alignment requirements. + if (Ty->isVectorType() && Ctx.getTypeSize(Ty) / 8 >= 16) + return true; + + if (const auto *RecordTy = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RecordTy->getDecl(); + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->hasDefinition()) + for (const auto &I : CXXRD->bases()) + if (isVectorTypeBased(I.getType().getTypePtr(), /*IsParam*/false)) + return true; + for (const auto *FD : RD->fields()) + if (isVectorTypeBased(FD->getType().getTypePtr(), /*IsParam*/false)) + return true; + } + + if (const auto *FT = Ty->getAs<FunctionType>()) + if (isVectorTypeBased(FT->getReturnType().getTypePtr(), /*IsParam*/true)) + return true; + if (const FunctionProtoType *Proto = Ty->getAs<FunctionProtoType>()) + for (const auto &ParamType : Proto->getParamTypes()) + if (isVectorTypeBased(ParamType.getTypePtr(), /*IsParam*/true)) + return true; + + return false; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createSystemZTargetCodeGenInfo(CodeGenModule &CGM, bool HasVector, + bool SoftFloatABI) { + return std::make_unique<SystemZTargetCodeGenInfo>(CGM.getTypes(), HasVector, + SoftFloatABI); +} diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp new file mode 100644 index 000000000000..d7178b4b8a94 --- /dev/null +++ b/clang/lib/CodeGen/Targets/TCE.cpp @@ -0,0 +1,82 @@ +//===- TCE.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. +// Currently subclassed only to implement custom OpenCL C function attribute +// handling. +//===----------------------------------------------------------------------===// + +namespace { + +class TCETargetCodeGenInfo : public TargetCodeGenInfo { +public: + TCETargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; +}; + +void TCETargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (GV->isDeclaration()) + return; + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; + + llvm::Function *F = cast<llvm::Function>(GV); + + if (M.getLangOpts().OpenCL) { + if (FD->hasAttr<OpenCLKernelAttr>()) { + // OpenCL C Kernel functions are not subject to inlining + F->addFnAttr(llvm::Attribute::NoInline); + const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>(); + if (Attr) { + // Convert the reqd_work_group_size() attributes to metadata. + llvm::LLVMContext &Context = F->getContext(); + llvm::NamedMDNode *OpenCLMetadata = + M.getModule().getOrInsertNamedMetadata( + "opencl.kernel_wg_size_info"); + + SmallVector<llvm::Metadata *, 5> Operands; + Operands.push_back(llvm::ConstantAsMetadata::get(F)); + + Operands.push_back( + llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( + M.Int32Ty, llvm::APInt(32, Attr->getXDim())))); + Operands.push_back( + llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( + M.Int32Ty, llvm::APInt(32, Attr->getYDim())))); + Operands.push_back( + llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( + M.Int32Ty, llvm::APInt(32, Attr->getZDim())))); + + // Add a boolean constant operand for "required" (true) or "hint" + // (false) for implementing the work_group_size_hint attr later. + // Currently always true as the hint is not yet implemented. + Operands.push_back( + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context))); + OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands)); + } + } + } +} + +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createTCETargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<TCETargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/VE.cpp b/clang/lib/CodeGen/Targets/VE.cpp new file mode 100644 index 000000000000..a7acc249cc2b --- /dev/null +++ b/clang/lib/CodeGen/Targets/VE.cpp @@ -0,0 +1,71 @@ +//===- VE.cpp -------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// VE ABI Implementation. +// +namespace { +class VEABIInfo : public DefaultABIInfo { +public: + VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; + void computeInfo(CGFunctionInfo &FI) const override; +}; +} // end anonymous namespace + +ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const { + if (Ty->isAnyComplexType()) + return ABIArgInfo::getDirect(); + uint64_t Size = getContext().getTypeSize(Ty); + if (Size < 64 && Ty->isIntegerType()) + return ABIArgInfo::getExtend(Ty); + return DefaultABIInfo::classifyReturnType(Ty); +} + +ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const { + if (Ty->isAnyComplexType()) + return ABIArgInfo::getDirect(); + uint64_t Size = getContext().getTypeSize(Ty); + if (Size < 64 && Ty->isIntegerType()) + return ABIArgInfo::getExtend(Ty); + return DefaultABIInfo::classifyArgumentType(Ty); +} + +void VEABIInfo::computeInfo(CGFunctionInfo &FI) const { + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &Arg : FI.arguments()) + Arg.info = classifyArgumentType(Arg.type); +} + +namespace { +class VETargetCodeGenInfo : public TargetCodeGenInfo { +public: + VETargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {} + // VE ABI requires the arguments of variadic and prototype-less functions + // are passed in both registers and memory. + bool isNoProtoCallVariadic(const CallArgList &args, + const FunctionNoProtoType *fnType) const override { + return true; + } +}; +} // end anonymous namespace + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createVETargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<VETargetCodeGenInfo>(CGM.getTypes()); +} diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp b/clang/lib/CodeGen/Targets/WebAssembly.cpp new file mode 100644 index 000000000000..bd332228ce5b --- /dev/null +++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp @@ -0,0 +1,173 @@ +//===- WebAssembly.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// WebAssembly ABI Implementation +// +// This is a very simple ABI that relies a lot on DefaultABIInfo. +//===----------------------------------------------------------------------===// + +class WebAssemblyABIInfo final : public ABIInfo { + DefaultABIInfo defaultInfo; + WebAssemblyABIKind Kind; + +public: + explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, + WebAssemblyABIKind Kind) + : ABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType Ty) const; + + // DefaultABIInfo's classifyReturnType and classifyArgumentType are + // non-virtual, but computeInfo and EmitVAArg are virtual, so we + // overload them. + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &Arg : FI.arguments()) + Arg.info = classifyArgumentType(Arg.type); + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { +public: + explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, + WebAssemblyABIKind K) + : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false); + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B(GV->getContext()); + B.addAttribute("wasm-import-module", Attr->getImportModule()); + Fn->addFnAttrs(B); + } + if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B(GV->getContext()); + B.addAttribute("wasm-import-name", Attr->getImportName()); + Fn->addFnAttrs(B); + } + if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B(GV->getContext()); + B.addAttribute("wasm-export-name", Attr->getExportName()); + Fn->addFnAttrs(B); + } + } + + if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + llvm::Function *Fn = cast<llvm::Function>(GV); + if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) + Fn->addFnAttr("no-prototype"); + } + } + + /// Return the WebAssembly externref reference type. + virtual llvm::Type *getWasmExternrefReferenceType() const override { + return llvm::Type::getWasm_ExternrefTy(getABIInfo().getVMContext()); + } + /// Return the WebAssembly funcref reference type. + virtual llvm::Type *getWasmFuncrefReferenceType() const override { + return llvm::Type::getWasm_FuncrefTy(getABIInfo().getVMContext()); + } +}; + +/// Classify argument of given type \p Ty. +ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + // For the experimental multivalue ABI, fully expand all other aggregates + if (Kind == WebAssemblyABIKind::ExperimentalMV) { + const RecordType *RT = Ty->getAs<RecordType>(); + assert(RT); + bool HasBitField = false; + for (auto *Field : RT->getDecl()->fields()) { + if (Field->isBitField()) { + HasBitField = true; + break; + } + } + if (!HasBitField) + return ABIArgInfo::getExpand(); + } + } + + // Otherwise just do the default thing. + return defaultInfo.classifyArgumentType(Ty); +} + +ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { + if (isAggregateTypeForABI(RetTy)) { + // Records with non-trivial destructors/copy-constructors should not be + // returned by value. + if (!getRecordArgABI(RetTy, getCXXABI())) { + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + // Lower single-element structs to just return a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using + // ABIArgInfo::getDirect(). + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + // For the experimental multivalue ABI, return all other aggregates + if (Kind == WebAssemblyABIKind::ExperimentalMV) + return ABIArgInfo::getDirect(); + } + } + + // Otherwise just do the default thing. + return defaultInfo.classifyReturnType(RetTy); +} + +Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + bool IsIndirect = isAggregateTypeForABI(Ty) && + !isEmptyRecord(getContext(), Ty, true) && + !isSingleElementStruct(Ty, getContext()); + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), + /*AllowHigherAlign=*/true); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createWebAssemblyTargetCodeGenInfo(CodeGenModule &CGM, + WebAssemblyABIKind K) { + return std::make_unique<WebAssemblyTargetCodeGenInfo>(CGM.getTypes(), K); +} diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp new file mode 100644 index 000000000000..31679d899a44 --- /dev/null +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -0,0 +1,3402 @@ +//===- X86.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" +#include "clang/Basic/DiagnosticFrontend.h" +#include "llvm/ADT/SmallBitVector.h" + +using namespace clang; +using namespace clang::CodeGen; + +namespace { + +/// IsX86_MMXType - Return true if this is an MMX type. +bool IsX86_MMXType(llvm::Type *IRType) { + // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. + return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && + cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && + IRType->getScalarSizeInBits() != 64; +} + +static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, + StringRef Constraint, + llvm::Type* Ty) { + bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) + .Cases("y", "&y", "^Ym", true) + .Default(false); + if (IsMMXCons && Ty->isVectorTy()) { + if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() != + 64) { + // Invalid MMX constraint + return nullptr; + } + + return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); + } + + // No operation needed + return Ty; +} + +/// Returns true if this type can be passed in SSE registers with the +/// X86_VectorCall calling convention. Shared between x86_32 and x86_64. +static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { + if (BT->getKind() == BuiltinType::LongDouble) { + if (&Context.getTargetInfo().getLongDoubleFormat() == + &llvm::APFloat::x87DoubleExtended()) + return false; + } + return true; + } + } else if (const VectorType *VT = Ty->getAs<VectorType>()) { + // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX + // registers specially. + unsigned VecSize = Context.getTypeSize(VT); + if (VecSize == 128 || VecSize == 256 || VecSize == 512) + return true; + } + return false; +} + +/// Returns true if this aggregate is small enough to be passed in SSE registers +/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. +static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { + return NumMembers <= 4; +} + +/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. +static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { + auto AI = ABIArgInfo::getDirect(T); + AI.setInReg(true); + AI.setCanBeFlattened(false); + return AI; +} + +//===----------------------------------------------------------------------===// +// X86-32 ABI Implementation +//===----------------------------------------------------------------------===// + +/// Similar to llvm::CCState, but for Clang. +struct CCState { + CCState(CGFunctionInfo &FI) + : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} + + llvm::SmallBitVector IsPreassigned; + unsigned CC = CallingConv::CC_C; + unsigned FreeRegs = 0; + unsigned FreeSSERegs = 0; +}; + +/// X86_32ABIInfo - The X86-32 ABI information. +class X86_32ABIInfo : public ABIInfo { + enum Class { + Integer, + Float + }; + + static const unsigned MinABIStackAlignInBytes = 4; + + bool IsDarwinVectorABI; + bool IsRetSmallStructInRegABI; + bool IsWin32StructABI; + bool IsSoftFloatABI; + bool IsMCUABI; + bool IsLinuxABI; + unsigned DefaultNumRegisterParameters; + + static bool isRegisterSize(unsigned Size) { + return (Size == 8 || Size == 16 || Size == 32 || Size == 64); + } + + bool isHomogeneousAggregateBaseType(QualType Ty) const override { + // FIXME: Assumes vectorcall is in use. + return isX86VectorTypeForVectorCall(getContext(), Ty); + } + + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t NumMembers) const override { + // FIXME: Assumes vectorcall is in use. + return isX86VectorCallAggregateSmallEnough(NumMembers); + } + + bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; + + /// getIndirectResult - Give a source type \arg Ty, return a suitable result + /// such that the argument will be passed in memory. + ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; + + ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; + + /// Return the alignment to use for the given type on the stack. + unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; + + Class classify(QualType Ty) const; + ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + + /// Updates the number of available free registers, returns + /// true if any registers were allocated. + bool updateFreeRegs(QualType Ty, CCState &State) const; + + bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, + bool &NeedsPadding) const; + bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; + + bool canExpandIndirectArgument(QualType Ty) const; + + /// Rewrite the function info so that all memory arguments use + /// inalloca. + void rewriteWithInAlloca(CGFunctionInfo &FI) const; + + void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, + CharUnits &StackOffset, ABIArgInfo &Info, + QualType Type) const; + void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; + +public: + + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, + bool RetSmallStructInRegABI, bool Win32StructABI, + unsigned NumRegisterParameters, bool SoftFloatABI) + : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI), + IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), + IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || + CGT.getTarget().getTriple().isOSCygMing()), + DefaultNumRegisterParameters(NumRegisterParameters) {} +}; + +class X86_32SwiftABIInfo : public SwiftABIInfo { +public: + explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} + + bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, + bool AsReturnValue) const override { + // LLVM's x86-32 lowering currently only assigns up to three + // integer registers and three fp registers. Oddly, it'll use up to + // four vector registers for vectors, but those can overlap with the + // scalar registers. + return occupiesMoreThan(ComponentTys, /*total=*/3); + } +}; + +class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { +public: + X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, + bool RetSmallStructInRegABI, bool Win32StructABI, + unsigned NumRegisterParameters, bool SoftFloatABI) + : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( + CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, + NumRegisterParameters, SoftFloatABI)) { + SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); + } + + static bool isStructReturnInRegABI( + const llvm::Triple &Triple, const CodeGenOptions &Opts); + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { + // Darwin uses different dwarf register numbers for EH. + if (CGM.getTarget().getTriple().isOSDarwin()) return 5; + return 4; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; + + llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, + StringRef Constraint, + llvm::Type* Ty) const override { + return X86AdjustInlineAsmType(CGF, Constraint, Ty); + } + + void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, + std::string &Constraints, + std::vector<llvm::Type *> &ResultRegTypes, + std::vector<llvm::Type *> &ResultTruncRegTypes, + std::vector<LValue> &ResultRegDests, + std::string &AsmString, + unsigned NumOutputs) const override; + + StringRef getARCRetainAutoreleasedReturnValueMarker() const override { + return "movl\t%ebp, %ebp" + "\t\t// marker for objc_retainAutoreleaseReturnValue"; + } +}; + +} + +/// Rewrite input constraint references after adding some output constraints. +/// In the case where there is one output and one input and we add one output, +/// we need to replace all operand references greater than or equal to 1: +/// mov $0, $1 +/// mov eax, $1 +/// The result will be: +/// mov $0, $2 +/// mov eax, $2 +static void rewriteInputConstraintReferences(unsigned FirstIn, + unsigned NumNewOuts, + std::string &AsmString) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + size_t Pos = 0; + while (Pos < AsmString.size()) { + size_t DollarStart = AsmString.find('$', Pos); + if (DollarStart == std::string::npos) + DollarStart = AsmString.size(); + size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); + if (DollarEnd == std::string::npos) + DollarEnd = AsmString.size(); + OS << StringRef(&AsmString[Pos], DollarEnd - Pos); + Pos = DollarEnd; + size_t NumDollars = DollarEnd - DollarStart; + if (NumDollars % 2 != 0 && Pos < AsmString.size()) { + // We have an operand reference. + size_t DigitStart = Pos; + if (AsmString[DigitStart] == '{') { + OS << '{'; + ++DigitStart; + } + size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); + if (DigitEnd == std::string::npos) + DigitEnd = AsmString.size(); + StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); + unsigned OperandIndex; + if (!OperandStr.getAsInteger(10, OperandIndex)) { + if (OperandIndex >= FirstIn) + OperandIndex += NumNewOuts; + OS << OperandIndex; + } else { + OS << OperandStr; + } + Pos = DigitEnd; + } + } + AsmString = std::move(OS.str()); +} + +/// Add output constraints for EAX:EDX because they are return registers. +void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( + CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, + std::vector<llvm::Type *> &ResultRegTypes, + std::vector<llvm::Type *> &ResultTruncRegTypes, + std::vector<LValue> &ResultRegDests, std::string &AsmString, + unsigned NumOutputs) const { + uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); + + // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is + // larger. + if (!Constraints.empty()) + Constraints += ','; + if (RetWidth <= 32) { + Constraints += "={eax}"; + ResultRegTypes.push_back(CGF.Int32Ty); + } else { + // Use the 'A' constraint for EAX:EDX. + Constraints += "=A"; + ResultRegTypes.push_back(CGF.Int64Ty); + } + + // Truncate EAX or EAX:EDX to an integer of the appropriate size. + llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); + ResultTruncRegTypes.push_back(CoerceTy); + + // Coerce the integer by bitcasting the return slot pointer. + ReturnSlot.setAddress(ReturnSlot.getAddress(CGF).withElementType(CoerceTy)); + ResultRegDests.push_back(ReturnSlot); + + rewriteInputConstraintReferences(NumOutputs, 1, AsmString); +} + +/// shouldReturnTypeInRegister - Determine if the given type should be +/// returned in a register (for the Darwin and MCU ABI). +bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, + ASTContext &Context) const { + uint64_t Size = Context.getTypeSize(Ty); + + // For i386, type must be register sized. + // For the MCU ABI, it only needs to be <= 8-byte + if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) + return false; + + if (Ty->isVectorType()) { + // 64- and 128- bit vectors inside structures are not returned in + // registers. + if (Size == 64 || Size == 128) + return false; + + return true; + } + + // If this is a builtin, pointer, enum, complex type, member pointer, or + // member function pointer it is ok. + if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || + Ty->isAnyComplexType() || Ty->isEnumeralType() || + Ty->isBlockPointerType() || Ty->isMemberPointerType()) + return true; + + // Arrays are treated like records. + if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) + return shouldReturnTypeInRegister(AT->getElementType(), Context); + + // Otherwise, it must be a record type. + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) return false; + + // FIXME: Traverse bases here too. + + // Structure types are passed in register if all fields would be + // passed in a register. + for (const auto *FD : RT->getDecl()->fields()) { + // Empty fields are ignored. + if (isEmptyField(Context, FD, true)) + continue; + + // Check fields recursively. + if (!shouldReturnTypeInRegister(FD->getType(), Context)) + return false; + } + return true; +} + +static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) + if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) + return false; + + uint64_t Size = Context.getTypeSize(Ty); + return Size == 32 || Size == 64; +} + +static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, + uint64_t &Size) { + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), Context)) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += Context.getTypeSize(FD->getType()); + } + return true; +} + +static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, + uint64_t &Size) { + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : RD->bases()) { + if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), + Size)) + return false; + } + if (!addFieldSizes(Context, RD, Size)) + return false; + return true; +} + +/// Test whether an argument type which is to be passed indirectly (on the +/// stack) would have the equivalent layout if it was expanded into separate +/// arguments. If so, we prefer to do the latter to avoid inhibiting +/// optimizations. +bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { + // We can only expand structure types. + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + uint64_t Size = 0; + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (!IsWin32StructABI) { + // On non-Windows, we have to conservatively match our old bitcode + // prototypes in order to be ABI-compatible at the bitcode level. + if (!CXXRD->isCLike()) + return false; + } else { + // Don't do this for dynamic classes. + if (CXXRD->isDynamicClass()) + return false; + } + if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) + return false; + } else { + if (!addFieldSizes(getContext(), RD, Size)) + return false; + } + + // We can do this if there was no alignment padding. + return Size == getContext().getTypeSize(Ty); +} + +ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { + // If the return value is indirect, then the hidden argument is consuming one + // integer register. + if (State.FreeRegs) { + --State.FreeRegs; + if (!IsMCUABI) + return getNaturalAlignIndirectInReg(RetTy); + } + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); +} + +ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, + CCState &State) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + const Type *Base = nullptr; + uint64_t NumElts = 0; + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && + isHomogeneousAggregate(RetTy, Base, NumElts)) { + // The LLVM struct type for such an aggregate should lower properly. + return ABIArgInfo::getDirect(); + } + + if (const VectorType *VT = RetTy->getAs<VectorType>()) { + // On Darwin, some vectors are returned in registers. + if (IsDarwinVectorABI) { + uint64_t Size = getContext().getTypeSize(RetTy); + + // 128-bit vectors are a special case; they are returned in + // registers and we need to make sure to pick a type the LLVM + // backend will like. + if (Size == 128) + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getInt64Ty(getVMContext()), 2)); + + // Always return in register if it fits in a general purpose + // register, or if it is 64 bits and has a single element. + if ((Size == 8 || Size == 16 || Size == 32) || + (Size == 64 && VT->getNumElements() == 1)) + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), + Size)); + + return getIndirectReturnResult(RetTy, State); + } + + return ABIArgInfo::getDirect(); + } + + if (isAggregateTypeForABI(RetTy)) { + if (const RecordType *RT = RetTy->getAs<RecordType>()) { + // Structures with flexible arrays are always indirect. + if (RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectReturnResult(RetTy, State); + } + + // If specified, structs and unions are always indirect. + if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) + return getIndirectReturnResult(RetTy, State); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Return complex of _Float16 as <2 x half> so the backend will use xmm0. + if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { + QualType ET = getContext().getCanonicalType(CT->getElementType()); + if (ET->isFloat16Type()) + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getHalfTy(getVMContext()), 2)); + } + + // Small structures which are register sized are generally returned + // in a register. + if (shouldReturnTypeInRegister(RetTy, getContext())) { + uint64_t Size = getContext().getTypeSize(RetTy); + + // As a special-case, if the struct is a "single-element" struct, and + // the field is of type "float" or "double", return it in a + // floating-point register. (MSVC does not apply this special case.) + // We apply a similar transformation for pointer types to improve the + // quality of the generated IR. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) + || SeltTy->hasPointerRepresentation()) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + // FIXME: We should be able to narrow this integer in cases with dead + // padding. + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); + } + + return getIndirectReturnResult(RetTy, State); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (const auto *EIT = RetTy->getAs<BitIntType>()) + if (EIT->getNumBits() > 64) + return getIndirectReturnResult(RetTy, State); + + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, + unsigned Align) const { + // Otherwise, if the alignment is less than or equal to the minimum ABI + // alignment, just use the default; the backend will handle this. + if (Align <= MinABIStackAlignInBytes) + return 0; // Use default alignment. + + if (IsLinuxABI) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // + // If the vector type is __m128/__m256/__m512, return the default alignment. + if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) + return Align; + } + // On non-Darwin, the stack type alignment is always 4. + if (!IsDarwinVectorABI) { + // Set explicit alignment, since we may need to realign the top. + return MinABIStackAlignInBytes; + } + + // Otherwise, if the type contains an SSE vector type, the alignment is 16. + if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || + isRecordWithSIMDVectorType(getContext(), Ty))) + return 16; + + return MinABIStackAlignInBytes; +} + +ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, + CCState &State) const { + if (!ByVal) { + if (State.FreeRegs) { + --State.FreeRegs; // Non-byval indirects just use one pointer. + if (!IsMCUABI) + return getNaturalAlignIndirectInReg(Ty); + } + return getNaturalAlignIndirect(Ty, false); + } + + // Compute the byval alignment. + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); + if (StackAlign == 0) + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); + + // If the stack alignment is less than the type alignment, realign the + // argument. + bool Realign = TypeAlign > StackAlign; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), + /*ByVal=*/true, Realign); +} + +X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { + const Type *T = isSingleElementStruct(Ty, getContext()); + if (!T) + T = Ty.getTypePtr(); + + if (const BuiltinType *BT = T->getAs<BuiltinType>()) { + BuiltinType::Kind K = BT->getKind(); + if (K == BuiltinType::Float || K == BuiltinType::Double) + return Float; + } + return Integer; +} + +bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { + if (!IsSoftFloatABI) { + Class C = classify(Ty); + if (C == Float) + return false; + } + + unsigned Size = getContext().getTypeSize(Ty); + unsigned SizeInRegs = (Size + 31) / 32; + + if (SizeInRegs == 0) + return false; + + if (!IsMCUABI) { + if (SizeInRegs > State.FreeRegs) { + State.FreeRegs = 0; + return false; + } + } else { + // The MCU psABI allows passing parameters in-reg even if there are + // earlier parameters that are passed on the stack. Also, + // it does not allow passing >8-byte structs in-register, + // even if there are 3 free registers available. + if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) + return false; + } + + State.FreeRegs -= SizeInRegs; + return true; +} + +bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, + bool &InReg, + bool &NeedsPadding) const { + // On Windows, aggregates other than HFAs are never passed in registers, and + // they do not consume register slots. Homogenous floating-point aggregates + // (HFAs) have already been dealt with at this point. + if (IsWin32StructABI && isAggregateTypeForABI(Ty)) + return false; + + NeedsPadding = false; + InReg = !IsMCUABI; + + if (!updateFreeRegs(Ty, State)) + return false; + + if (IsMCUABI) + return true; + + if (State.CC == llvm::CallingConv::X86_FastCall || + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { + if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) + NeedsPadding = true; + + return false; + } + + return true; +} + +bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { + bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && + (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || + Ty->isReferenceType()); + + if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || + State.CC == llvm::CallingConv::X86_VectorCall)) + return false; + + if (!updateFreeRegs(Ty, State)) + return false; + + if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) + return false; + + // Return true to apply inreg to all legal parameters except for MCU targets. + return !IsMCUABI; +} + +void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // In the second pass over the arguments, HVAs are passed in the remaining + // vector registers if possible, or indirectly by address. The address will be + // passed in ECX/EDX if available. Any other arguments are passed according to + // the usual fastcall rules. + MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); + for (int I = 0, E = Args.size(); I < E; ++I) { + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType &Ty = Args[I].type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + Args[I].info = ABIArgInfo::getDirectInReg(); + State.IsPreassigned.set(I); + } + } + } +} + +ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, + CCState &State) const { + // FIXME: Set alignment on indirect arguments. + bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; + bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; + bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; + + Ty = useFirstFieldIfTransparentUnion(Ty); + TypeInfo TI = getContext().getTypeInfo(Ty); + + // Check with the C++ ABI first. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) { + return getIndirectResult(Ty, false, State); + } else if (RAA == CGCXXABI::RAA_DirectInMemory) { + // The field index doesn't matter, we'll fix it up later. + return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); + } + } + + // Regcall uses the concept of a homogenous vector aggregate, similar + // to other targets. + const Type *Base = nullptr; + uint64_t NumElts = 0; + if ((IsRegCall || IsVectorCall) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + + // Vectorcall passes HVAs directly and does not flatten them, but regcall + // does. + if (IsVectorCall) + return getDirectX86Hva(); + + if (Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + // FIXME: This should not be byval! + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, true, State); + + // Ignore empty structs/unions on non-Windows. + if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + bool NeedsPadding = false; + bool InReg; + if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { + unsigned SizeInRegs = (TI.Width + 31) / 32; + SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + if (InReg) + return ABIArgInfo::getDirectInReg(Result); + else + return ABIArgInfo::getDirect(Result); + } + llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; + + // Pass over-aligned aggregates on Windows indirectly. This behavior was + // added in MSVC 2015. Use the required alignment from the record layout, + // since that may be less than the regular type alignment, and types with + // required alignment of less than 4 bytes are not passed indirectly. + if (IsWin32StructABI) { + unsigned AlignInBits = 0; + if (RT) { + const ASTRecordLayout &Layout = + getContext().getASTRecordLayout(RT->getDecl()); + AlignInBits = getContext().toBits(Layout.getRequiredAlignment()); + } else if (TI.isAlignRequired()) { + AlignInBits = TI.Align; + } + if (AlignInBits > 32) + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + + // Expand small (<= 128-bit) record types when we know that the stack layout + // of those arguments will match the struct. This is important because the + // LLVM backend isn't smart enough to remove byval, which inhibits many + // optimizations. + // Don't do this for the MCU if there are still free integer registers + // (see X86_64 ABI for full explanation). + if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && + canExpandIndirectArgument(Ty)) + return ABIArgInfo::getExpandWithPadding( + IsFastCall || IsVectorCall || IsRegCall, PaddingType); + + return getIndirectResult(Ty, true, State); + } + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // On Windows, vectors are passed directly if registers are available, or + // indirectly if not. This avoids the need to align argument memory. Pass + // user-defined vector types larger than 512 bits indirectly for simplicity. + if (IsWin32StructABI) { + if (TI.Width <= 512 && State.FreeSSERegs > 0) { + --State.FreeSSERegs; + return ABIArgInfo::getDirectInReg(); + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + + // On Darwin, some vectors are passed in memory, we handle this by passing + // it as an i8/i16/i32/i64. + if (IsDarwinVectorABI) { + if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || + (TI.Width == 64 && VT->getNumElements() == 1)) + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), TI.Width)); + } + + if (IsX86_MMXType(CGT.ConvertType(Ty))) + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); + + return ABIArgInfo::getDirect(); + } + + + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + bool InReg = shouldPrimitiveUseInReg(Ty, State); + + if (isPromotableIntegerTypeForABI(Ty)) { + if (InReg) + return ABIArgInfo::getExtendInReg(Ty); + return ABIArgInfo::getExtend(Ty); + } + + if (const auto *EIT = Ty->getAs<BitIntType>()) { + if (EIT->getNumBits() <= 64) { + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); +} + +void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { + CCState State(FI); + if (IsMCUABI) + State.FreeRegs = 3; + else if (State.CC == llvm::CallingConv::X86_FastCall) { + State.FreeRegs = 2; + State.FreeSSERegs = 3; + } else if (State.CC == llvm::CallingConv::X86_VectorCall) { + State.FreeRegs = 2; + State.FreeSSERegs = 6; + } else if (FI.getHasRegParm()) + State.FreeRegs = FI.getRegParm(); + else if (State.CC == llvm::CallingConv::X86_RegCall) { + State.FreeRegs = 5; + State.FreeSSERegs = 8; + } else if (IsWin32StructABI) { + // Since MSVC 2015, the first three SSE vectors have been passed in + // registers. The rest are passed indirectly. + State.FreeRegs = DefaultNumRegisterParameters; + State.FreeSSERegs = 3; + } else + State.FreeRegs = DefaultNumRegisterParameters; + + if (!::classifyReturnType(getCXXABI(), FI, *this)) { + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); + } else if (FI.getReturnInfo().isIndirect()) { + // The C++ ABI is not aware of register usage, so we have to check if the + // return value was sret and put it in a register ourselves if appropriate. + if (State.FreeRegs) { + --State.FreeRegs; // The sret parameter consumes a register. + if (!IsMCUABI) + FI.getReturnInfo().setInReg(true); + } + } + + // The chain argument effectively gives us another free register. + if (FI.isChainCall()) + ++State.FreeRegs; + + // For vectorcall, do a first pass over the arguments, assigning FP and vector + // arguments to XMM registers as available. + if (State.CC == llvm::CallingConv::X86_VectorCall) + runVectorCallFirstPass(FI, State); + + bool UsedInAlloca = false; + MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); + for (int I = 0, E = Args.size(); I < E; ++I) { + // Skip arguments that have already been assigned. + if (State.IsPreassigned.test(I)) + continue; + + Args[I].info = classifyArgumentType(Args[I].type, State); + UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); + } + + // If we needed to use inalloca for any argument, do a second pass and rewrite + // all the memory arguments to use inalloca. + if (UsedInAlloca) + rewriteWithInAlloca(FI); +} + +void +X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, + CharUnits &StackOffset, ABIArgInfo &Info, + QualType Type) const { + // Arguments are always 4-byte-aligned. + CharUnits WordSize = CharUnits::fromQuantity(4); + assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); + + // sret pointers and indirect things will require an extra pointer + // indirection, unless they are byval. Most things are byval, and will not + // require this indirection. + bool IsIndirect = false; + if (Info.isIndirect() && !Info.getIndirectByVal()) + IsIndirect = true; + Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); + llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); + if (IsIndirect) + LLTy = llvm::PointerType::getUnqual(getVMContext()); + FrameFields.push_back(LLTy); + StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); + + // Insert padding bytes to respect alignment. + CharUnits FieldEnd = StackOffset; + StackOffset = FieldEnd.alignTo(WordSize); + if (StackOffset != FieldEnd) { + CharUnits NumBytes = StackOffset - FieldEnd; + llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); + Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); + FrameFields.push_back(Ty); + } +} + +static bool isArgInAlloca(const ABIArgInfo &Info) { + // Leave ignored and inreg arguments alone. + switch (Info.getKind()) { + case ABIArgInfo::InAlloca: + return true; + case ABIArgInfo::Ignore: + case ABIArgInfo::IndirectAliased: + return false; + case ABIArgInfo::Indirect: + case ABIArgInfo::Direct: + case ABIArgInfo::Extend: + return !Info.getInReg(); + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + // These are aggregate types which are never passed in registers when + // inalloca is involved. + return true; + } + llvm_unreachable("invalid enum"); +} + +void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { + assert(IsWin32StructABI && "inalloca only supported on win32"); + + // Build a packed struct type for all of the arguments in memory. + SmallVector<llvm::Type *, 6> FrameFields; + + // The stack alignment is always 4. + CharUnits StackAlign = CharUnits::fromQuantity(4); + + CharUnits StackOffset; + CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); + + // Put 'this' into the struct before 'sret', if necessary. + bool IsThisCall = + FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; + ABIArgInfo &Ret = FI.getReturnInfo(); + if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && + isArgInAlloca(I->info)) { + addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); + ++I; + } + + // Put the sret parameter into the inalloca struct if it's in memory. + if (Ret.isIndirect() && !Ret.getInReg()) { + addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); + // On Windows, the hidden sret parameter is always returned in eax. + Ret.setInAllocaSRet(IsWin32StructABI); + } + + // Skip the 'this' parameter in ecx. + if (IsThisCall) + ++I; + + // Put arguments passed in memory into the struct. + for (; I != E; ++I) { + if (isArgInAlloca(I->info)) + addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); + } + + FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, + /*isPacked=*/true), + StackAlign); +} + +Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, + Address VAListAddr, QualType Ty) const { + + auto TypeInfo = getContext().getTypeInfoInChars(Ty); + + // x86-32 changes the alignment of certain arguments on the stack. + // + // Just messing with TypeInfo like this works because we never pass + // anything indirectly. + TypeInfo.Align = CharUnits::fromQuantity( + getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, + TypeInfo, CharUnits::fromQuantity(4), + /*AllowHigherAlign*/ true); +} + +bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( + const llvm::Triple &Triple, const CodeGenOptions &Opts) { + assert(Triple.getArch() == llvm::Triple::x86); + + switch (Opts.getStructReturnConvention()) { + case CodeGenOptions::SRCK_Default: + break; + case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return + return false; + case CodeGenOptions::SRCK_InRegs: // -freg-struct-return + return true; + } + + if (Triple.isOSDarwin() || Triple.isOSIAMCU()) + return true; + + switch (Triple.getOS()) { + case llvm::Triple::DragonFly: + case llvm::Triple::FreeBSD: + case llvm::Triple::OpenBSD: + case llvm::Triple::Win32: + return true; + default: + return false; + } +} + +static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) { + if (!FD->hasAttr<AnyX86InterruptAttr>()) + return; + + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->setCallingConv(llvm::CallingConv::X86_INTR); + if (FD->getNumParams() == 0) + return; + + auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); + llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); + llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( + Fn->getContext(), ByValTy); + Fn->addParamAttr(0, NewAttr); +} + +void X86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + if (GV->isDeclaration()) + return; + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); + } + + addX86InterruptAttrs(FD, GV, CGM); + } +} + +bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( + CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + CodeGen::CGBuilderTy &Builder = CGF.Builder; + + llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); + + // 0-7 are the eight integer registers; the order is different + // on Darwin (for EH), but the range is the same. + // 8 is %eip. + AssignToArrayRange(Builder, Address, Four8, 0, 8); + + if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { + // 12-16 are st(0..4). Not sure why we stop at 4. + // These have size 16, which is sizeof(long double) on + // platforms with 8-byte alignment for that type. + llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); + AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); + + } else { + // 9 is %eflags, which doesn't get a size on Darwin for some + // reason. + Builder.CreateAlignedStore( + Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), + CharUnits::One()); + + // 11-16 are st(0..5). Not sure why we stop at 5. + // These have size 12, which is sizeof(long double) on + // platforms with 4-byte alignment for that type. + llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); + AssignToArrayRange(Builder, Address, Twelve8, 11, 16); + } + + return false; +} + +//===----------------------------------------------------------------------===// +// X86-64 ABI Implementation +//===----------------------------------------------------------------------===// + + +namespace { + +/// \p returns the size in bits of the largest (native) vector for \p AVXLevel. +static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { + switch (AVXLevel) { + case X86AVXABILevel::AVX512: + return 512; + case X86AVXABILevel::AVX: + return 256; + case X86AVXABILevel::None: + return 128; + } + llvm_unreachable("Unknown AVXLevel"); +} + +/// X86_64ABIInfo - The X86_64 ABI information. +class X86_64ABIInfo : public ABIInfo { + enum Class { + Integer = 0, + SSE, + SSEUp, + X87, + X87Up, + ComplexX87, + NoClass, + Memory + }; + + /// merge - Implement the X86_64 ABI merging algorithm. + /// + /// Merge an accumulating classification \arg Accum with a field + /// classification \arg Field. + /// + /// \param Accum - The accumulating classification. This should + /// always be either NoClass or the result of a previous merge + /// call. In addition, this should never be Memory (the caller + /// should just return Memory for the aggregate). + static Class merge(Class Accum, Class Field); + + /// postMerge - Implement the X86_64 ABI post merging algorithm. + /// + /// Post merger cleanup, reduces a malformed Hi and Lo pair to + /// final MEMORY or SSE classes when necessary. + /// + /// \param AggregateSize - The size of the current aggregate in + /// the classification process. + /// + /// \param Lo - The classification for the parts of the type + /// residing in the low word of the containing object. + /// + /// \param Hi - The classification for the parts of the type + /// residing in the higher words of the containing object. + /// + void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; + + /// classify - Determine the x86_64 register classes in which the + /// given type T should be passed. + /// + /// \param Lo - The classification for the parts of the type + /// residing in the low word of the containing object. + /// + /// \param Hi - The classification for the parts of the type + /// residing in the high word of the containing object. + /// + /// \param OffsetBase - The bit offset of this type in the + /// containing object. Some parameters are classified different + /// depending on whether they straddle an eightbyte boundary. + /// + /// \param isNamedArg - Whether the argument in question is a "named" + /// argument, as used in AMD64-ABI 3.5.7. + /// + /// \param IsRegCall - Whether the calling conversion is regcall. + /// + /// If a word is unused its result will be NoClass; if a type should + /// be passed in Memory then at least the classification of \arg Lo + /// will be Memory. + /// + /// The \arg Lo class will be NoClass iff the argument is ignored. + /// + /// If the \arg Lo class is ComplexX87, then the \arg Hi class will + /// also be ComplexX87. + void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, + bool isNamedArg, bool IsRegCall = false) const; + + llvm::Type *GetByteVectorType(QualType Ty) const; + llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, + unsigned IROffset, QualType SourceTy, + unsigned SourceOffset) const; + llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, + unsigned IROffset, QualType SourceTy, + unsigned SourceOffset) const; + + /// getIndirectResult - Give a source type \arg Ty, return a suitable result + /// such that the argument will be returned in memory. + ABIArgInfo getIndirectReturnResult(QualType Ty) const; + + /// getIndirectResult - Give a source type \arg Ty, return a suitable result + /// such that the argument will be passed in memory. + /// + /// \param freeIntRegs - The number of free integer registers remaining + /// available. + ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; + + ABIArgInfo classifyReturnType(QualType RetTy) const; + + ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, + bool isNamedArg, + bool IsRegCall = false) const; + + ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const; + + ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const; + + bool IsIllegalVectorType(QualType Ty) const; + + /// The 0.98 ABI revision clarified a lot of ambiguities, + /// unfortunately in ways that were not always consistent with + /// certain previous compilers. In particular, platforms which + /// required strict binary compatibility with older versions of GCC + /// may need to exempt themselves. + bool honorsRevision0_98() const { + return !getTarget().getTriple().isOSDarwin(); + } + + /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to + /// classify it as INTEGER (for compatibility with older clang compilers). + bool classifyIntegerMMXAsSSE() const { + // Clang <= 3.8 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver3_8) + return false; + + const llvm::Triple &Triple = getTarget().getTriple(); + if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD()) + return false; + return true; + } + + // GCC classifies vectors of __int128 as memory. + bool passInt128VectorsInMem() const { + // Clang <= 9.0 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver9) + return false; + + const llvm::Triple &T = getTarget().getTriple(); + return T.isOSLinux() || T.isOSNetBSD(); + } + + X86AVXABILevel AVXLevel; + // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on + // 64-bit hardware. + bool Has64BitPointers; + +public: + X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : ABIInfo(CGT), AVXLevel(AVXLevel), + Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} + + bool isPassedUsingAVXType(QualType type) const { + unsigned neededInt, neededSSE; + // The freeIntRegs argument doesn't matter here. + ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, + /*isNamedArg*/true); + if (info.isDirect()) { + llvm::Type *ty = info.getCoerceToType(); + if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) + return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; + } + return false; + } + + void computeInfo(CGFunctionInfo &FI) const override; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + bool has64BitPointers() const { + return Has64BitPointers; + } +}; + +/// WinX86_64ABIInfo - The Windows X86_64 ABI information. +class WinX86_64ABIInfo : public ABIInfo { +public: + WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : ABIInfo(CGT), AVXLevel(AVXLevel), + IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} + + void computeInfo(CGFunctionInfo &FI) const override; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override { + // FIXME: Assumes vectorcall is in use. + return isX86VectorTypeForVectorCall(getContext(), Ty); + } + + bool isHomogeneousAggregateSmallEnough(const Type *Ty, + uint64_t NumMembers) const override { + // FIXME: Assumes vectorcall is in use. + return isX86VectorCallAggregateSmallEnough(NumMembers); + } + +private: + ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, + bool IsVectorCall, bool IsRegCall) const; + ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const; + + X86AVXABILevel AVXLevel; + + bool IsMingw64; +}; + +class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { +public: + X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); + } + + /// Disable tail call on x86-64. The epilogue code before the tail jump blocks + /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. + bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { + return 7; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); + + // 0-15 are the 16 integer registers. + // 16 is %rip. + AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); + return false; + } + + llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, + StringRef Constraint, + llvm::Type* Ty) const override { + return X86AdjustInlineAsmType(CGF, Constraint, Ty); + } + + bool isNoProtoCallVariadic(const CallArgList &args, + const FunctionNoProtoType *fnType) const override { + // The default CC on x86-64 sets %al to the number of SSA + // registers used, and GCC sets this when calling an unprototyped + // function, so we override the default behavior. However, don't do + // that when AVX types are involved: the ABI explicitly states it is + // undefined, and it doesn't work in practice because of how the ABI + // defines varargs anyway. + if (fnType->getCallConv() == CC_C) { + bool HasAVXType = false; + for (CallArgList::const_iterator + it = args.begin(), ie = args.end(); it != ie; ++it) { + if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(it->Ty)) { + HasAVXType = true; + break; + } + } + + if (!HasAVXType) + return true; + } + + return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + if (GV->isDeclaration()) + return; + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); + } + + addX86InterruptAttrs(FD, GV, CGM); + } + } + + void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, + const FunctionDecl *Caller, + const FunctionDecl *Callee, + const CallArgList &Args) const override; +}; +} // namespace + +static void initFeatureMaps(const ASTContext &Ctx, + llvm::StringMap<bool> &CallerMap, + const FunctionDecl *Caller, + llvm::StringMap<bool> &CalleeMap, + const FunctionDecl *Callee) { + if (CalleeMap.empty() && CallerMap.empty()) { + // The caller is potentially nullptr in the case where the call isn't in a + // function. In this case, the getFunctionFeatureMap ensures we just get + // the TU level setting (since it cannot be modified by 'target'.. + Ctx.getFunctionFeatureMap(CallerMap, Caller); + Ctx.getFunctionFeatureMap(CalleeMap, Callee); + } +} + +static bool checkAVXParamFeature(DiagnosticsEngine &Diag, + SourceLocation CallLoc, + const llvm::StringMap<bool> &CallerMap, + const llvm::StringMap<bool> &CalleeMap, + QualType Ty, StringRef Feature, + bool IsArgument) { + bool CallerHasFeat = CallerMap.lookup(Feature); + bool CalleeHasFeat = CalleeMap.lookup(Feature); + if (!CallerHasFeat && !CalleeHasFeat) + return Diag.Report(CallLoc, diag::warn_avx_calling_convention) + << IsArgument << Ty << Feature; + + // Mixing calling conventions here is very clearly an error. + if (!CallerHasFeat || !CalleeHasFeat) + return Diag.Report(CallLoc, diag::err_avx_calling_convention) + << IsArgument << Ty << Feature; + + // Else, both caller and callee have the required feature, so there is no need + // to diagnose. + return false; +} + +static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, + SourceLocation CallLoc, + const llvm::StringMap<bool> &CallerMap, + const llvm::StringMap<bool> &CalleeMap, QualType Ty, + bool IsArgument) { + uint64_t Size = Ctx.getTypeSize(Ty); + if (Size > 256) + return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + "avx512f", IsArgument); + + if (Size > 128) + return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", + IsArgument); + + return false; +} + +void X86_64TargetCodeGenInfo::checkFunctionCallABI( + CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, + const FunctionDecl *Callee, const CallArgList &Args) const { + llvm::StringMap<bool> CallerMap; + llvm::StringMap<bool> CalleeMap; + unsigned ArgIndex = 0; + + // We need to loop through the actual call arguments rather than the + // function's parameters, in case this variadic. + for (const CallArg &Arg : Args) { + // The "avx" feature changes how vectors >128 in size are passed. "avx512f" + // additionally changes how vectors >256 in size are passed. Like GCC, we + // warn when a function is called with an argument where this will change. + // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, + // the caller and callee features are mismatched. + // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can + // change its ABI with attribute-target after this call. + if (Arg.getType()->isVectorType() && + CGM.getContext().getTypeSize(Arg.getType()) > 128) { + initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); + QualType Ty = Arg.getType(); + // The CallArg seems to have desugared the type already, so for clearer + // diagnostics, replace it with the type in the FunctionDecl if possible. + if (ArgIndex < Callee->getNumParams()) + Ty = Callee->getParamDecl(ArgIndex)->getType(); + + if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, + CalleeMap, Ty, /*IsArgument*/ true)) + return; + } + ++ArgIndex; + } + + // Check return always, as we don't have a good way of knowing in codegen + // whether this value is used, tail-called, etc. + if (Callee->getReturnType()->isVectorType() && + CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { + initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); + checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, + CalleeMap, Callee->getReturnType(), + /*IsArgument*/ false); + } +} + +std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) { + // If the argument does not end in .lib, automatically add the suffix. + // If the argument contains a space, enclose it in quotes. + // This matches the behavior of MSVC. + bool Quote = Lib.contains(' '); + std::string ArgStr = Quote ? "\"" : ""; + ArgStr += Lib; + if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a")) + ArgStr += ".lib"; + ArgStr += Quote ? "\"" : ""; + return ArgStr; +} + +namespace { +class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { +public: + WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, + bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, + unsigned NumRegisterParameters) + : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, + Win32StructABI, NumRegisterParameters, false) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:"; + Opt += qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, + llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; +} // namespace + +void WinX86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); +} + +namespace { +class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { +public: + WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, + X86AVXABILevel AVXLevel) + : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { + SwiftInfo = + std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { + return 7; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override { + llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); + + // 0-15 are the 16 integer registers. + // 16 is %rip. + AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); + return false; + } + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:"; + Opt += qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, + llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; +} // namespace + +void WinX86_64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); + } + + addX86InterruptAttrs(FD, GV, CGM); + } + + addStackProbeTargetAttributes(D, GV, CGM); +} + +void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, + Class &Hi) const { + // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: + // + // (a) If one of the classes is Memory, the whole argument is passed in + // memory. + // + // (b) If X87UP is not preceded by X87, the whole argument is passed in + // memory. + // + // (c) If the size of the aggregate exceeds two eightbytes and the first + // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole + // argument is passed in memory. NOTE: This is necessary to keep the + // ABI working for processors that don't support the __m256 type. + // + // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. + // + // Some of these are enforced by the merging logic. Others can arise + // only with unions; for example: + // union { _Complex double; unsigned; } + // + // Note that clauses (b) and (c) were added in 0.98. + // + if (Hi == Memory) + Lo = Memory; + if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) + Lo = Memory; + if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) + Lo = Memory; + if (Hi == SSEUp && Lo != SSE) + Hi = SSE; +} + +X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { + // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is + // classified recursively so that always two fields are + // considered. The resulting class is calculated according to + // the classes of the fields in the eightbyte: + // + // (a) If both classes are equal, this is the resulting class. + // + // (b) If one of the classes is NO_CLASS, the resulting class is + // the other class. + // + // (c) If one of the classes is MEMORY, the result is the MEMORY + // class. + // + // (d) If one of the classes is INTEGER, the result is the + // INTEGER. + // + // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, + // MEMORY is used as class. + // + // (f) Otherwise class SSE is used. + + // Accum should never be memory (we should have returned) or + // ComplexX87 (because this cannot be passed in a structure). + assert((Accum != Memory && Accum != ComplexX87) && + "Invalid accumulated classification during merge."); + if (Accum == Field || Field == NoClass) + return Accum; + if (Field == Memory) + return Memory; + if (Accum == NoClass) + return Field; + if (Accum == Integer || Field == Integer) + return Integer; + if (Field == X87 || Field == X87Up || Field == ComplexX87 || + Accum == X87 || Accum == X87Up) + return Memory; + return SSE; +} + +void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, + Class &Hi, bool isNamedArg, bool IsRegCall) const { + // FIXME: This code can be simplified by introducing a simple value class for + // Class pairs with appropriate constructor methods for the various + // situations. + + // FIXME: Some of the split computations are wrong; unaligned vectors + // shouldn't be passed in registers for example, so there is no chance they + // can straddle an eightbyte. Verify & simplify. + + Lo = Hi = NoClass; + + Class &Current = OffsetBase < 64 ? Lo : Hi; + Current = Memory; + + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + BuiltinType::Kind k = BT->getKind(); + + if (k == BuiltinType::Void) { + Current = NoClass; + } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { + Lo = Integer; + Hi = Integer; + } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { + Current = Integer; + } else if (k == BuiltinType::Float || k == BuiltinType::Double || + k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { + Current = SSE; + } else if (k == BuiltinType::LongDouble) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::IEEEquad()) { + Lo = SSE; + Hi = SSEUp; + } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { + Lo = X87; + Hi = X87Up; + } else if (LDF == &llvm::APFloat::IEEEdouble()) { + Current = SSE; + } else + llvm_unreachable("unexpected long double representation!"); + } + // FIXME: _Decimal32 and _Decimal64 are SSE. + // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). + return; + } + + if (const EnumType *ET = Ty->getAs<EnumType>()) { + // Classify the underlying integer type. + classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); + return; + } + + if (Ty->hasPointerRepresentation()) { + Current = Integer; + return; + } + + if (Ty->isMemberPointerType()) { + if (Ty->isMemberFunctionPointerType()) { + if (Has64BitPointers) { + // If Has64BitPointers, this is an {i64, i64}, so classify both + // Lo and Hi now. + Lo = Hi = Integer; + } else { + // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that + // straddles an eightbyte boundary, Hi should be classified as well. + uint64_t EB_FuncPtr = (OffsetBase) / 64; + uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; + if (EB_FuncPtr != EB_ThisAdj) { + Lo = Hi = Integer; + } else { + Current = Integer; + } + } + } else { + Current = Integer; + } + return; + } + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + uint64_t Size = getContext().getTypeSize(VT); + if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { + // gcc passes the following as integer: + // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> + // 2 bytes - <2 x char>, <1 x short> + // 1 byte - <1 x char> + Current = Integer; + + // If this type crosses an eightbyte boundary, it should be + // split. + uint64_t EB_Lo = (OffsetBase) / 64; + uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; + if (EB_Lo != EB_Hi) + Hi = Lo; + } else if (Size == 64) { + QualType ElementType = VT->getElementType(); + + // gcc passes <1 x double> in memory. :( + if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) + return; + + // gcc passes <1 x long long> as SSE but clang used to unconditionally + // pass them as integer. For platforms where clang is the de facto + // platform compiler, we must continue to use integer. + if (!classifyIntegerMMXAsSSE() && + (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || + ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || + ElementType->isSpecificBuiltinType(BuiltinType::Long) || + ElementType->isSpecificBuiltinType(BuiltinType::ULong))) + Current = Integer; + else + Current = SSE; + + // If this type crosses an eightbyte boundary, it should be + // split. + if (OffsetBase && OffsetBase != 64) + Hi = Lo; + } else if (Size == 128 || + (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + QualType ElementType = VT->getElementType(); + + // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( + if (passInt128VectorsInMem() && Size != 128 && + (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || + ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) + return; + + // Arguments of 256-bits are split into four eightbyte chunks. The + // least significant one belongs to class SSE and all the others to class + // SSEUP. The original Lo and Hi design considers that types can't be + // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. + // This design isn't correct for 256-bits, but since there're no cases + // where the upper parts would need to be inspected, avoid adding + // complexity and just consider Hi to match the 64-256 part. + // + // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in + // registers if they are "named", i.e. not part of the "..." of a + // variadic function. + // + // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are + // split into eight eightbyte chunks, one SSE and seven SSEUP. + Lo = SSE; + Hi = SSEUp; + } + return; + } + + if (const ComplexType *CT = Ty->getAs<ComplexType>()) { + QualType ET = getContext().getCanonicalType(CT->getElementType()); + + uint64_t Size = getContext().getTypeSize(Ty); + if (ET->isIntegralOrEnumerationType()) { + if (Size <= 64) + Current = Integer; + else if (Size <= 128) + Lo = Hi = Integer; + } else if (ET->isFloat16Type() || ET == getContext().FloatTy || + ET->isBFloat16Type()) { + Current = SSE; + } else if (ET == getContext().DoubleTy) { + Lo = Hi = SSE; + } else if (ET == getContext().LongDoubleTy) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::IEEEquad()) + Current = Memory; + else if (LDF == &llvm::APFloat::x87DoubleExtended()) + Current = ComplexX87; + else if (LDF == &llvm::APFloat::IEEEdouble()) + Lo = Hi = SSE; + else + llvm_unreachable("unexpected long double representation!"); + } + + // If this complex type crosses an eightbyte boundary then it + // should be split. + uint64_t EB_Real = (OffsetBase) / 64; + uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; + if (Hi == NoClass && EB_Real != EB_Imag) + Hi = Lo; + + return; + } + + if (const auto *EITy = Ty->getAs<BitIntType>()) { + if (EITy->getNumBits() <= 64) + Current = Integer; + else if (EITy->getNumBits() <= 128) + Lo = Hi = Integer; + // Larger values need to get passed in memory. + return; + } + + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { + // Arrays are treated like structures. + + uint64_t Size = getContext().getTypeSize(Ty); + + // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger + // than eight eightbytes, ..., it has class MEMORY. + // regcall ABI doesn't have limitation to an object. The only limitation + // is the free registers, which will be checked in computeInfo. + if (!IsRegCall && Size > 512) + return; + + // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned + // fields, it has class MEMORY. + // + // Only need to check alignment of array base. + if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) + return; + + // Otherwise implement simplified merge. We could be smarter about + // this, but it isn't worth it and would be harder to verify. + Current = NoClass; + uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); + uint64_t ArraySize = AT->getSize().getZExtValue(); + + // The only case a 256-bit wide vector could be used is when the array + // contains a single 256-bit element. Since Lo and Hi logic isn't extended + // to work for sizes wider than 128, early check and fallback to memory. + // + if (Size > 128 && + (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) + return; + + for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { + Class FieldLo, FieldHi; + classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); + Lo = merge(Lo, FieldLo); + Hi = merge(Hi, FieldHi); + if (Lo == Memory || Hi == Memory) + break; + } + + postMerge(Size, Lo, Hi); + assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); + return; + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + uint64_t Size = getContext().getTypeSize(Ty); + + // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) + return; + + // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial + // copy constructor or a non-trivial destructor, it is passed by invisible + // reference. + if (getRecordArgABI(RT, getCXXABI())) + return; + + const RecordDecl *RD = RT->getDecl(); + + // Assume variable sized types are passed in memory. + if (RD->hasFlexibleArrayMember()) + return; + + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + + // Reset Lo class, this will be recomputed. + Current = NoClass; + + // If this is a C++ record, classify the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const auto &I : CXXRD->bases()) { + assert(!I.isVirtual() && !I.getType()->isDependentType() && + "Unexpected base class!"); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); + + // Classify this field. + // + // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a + // single eightbyte, each is classified separately. Each eightbyte gets + // initialized to class NO_CLASS. + Class FieldLo, FieldHi; + uint64_t Offset = + OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); + classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); + Lo = merge(Lo, FieldLo); + Hi = merge(Hi, FieldHi); + if (Lo == Memory || Hi == Memory) { + postMerge(Size, Lo, Hi); + return; + } + } + } + + // Classify the fields one at a time, merging the results. + unsigned idx = 0; + bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver11 || + getContext().getTargetInfo().getTriple().isPS(); + bool IsUnion = RT->isUnionType() && !UseClang11Compat; + + for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); + i != e; ++i, ++idx) { + uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); + bool BitField = i->isBitField(); + + // Ignore padding bit-fields. + if (BitField && i->isUnnamedBitfield()) + continue; + + // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than + // eight eightbytes, or it contains unaligned fields, it has class MEMORY. + // + // The only case a 256-bit or a 512-bit wide vector could be used is when + // the struct contains a single 256-bit or 512-bit element. Early check + // and fallback to memory. + // + // FIXME: Extended the Lo and Hi logic properly to work for size wider + // than 128. + if (Size > 128 && + ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || + Size > getNativeVectorSizeForAVXABI(AVXLevel))) { + Lo = Memory; + postMerge(Size, Lo, Hi); + return; + } + // Note, skip this test for bit-fields, see below. + if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { + Lo = Memory; + postMerge(Size, Lo, Hi); + return; + } + + // Classify this field. + // + // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate + // exceeds a single eightbyte, each is classified + // separately. Each eightbyte gets initialized to class + // NO_CLASS. + Class FieldLo, FieldHi; + + // Bit-fields require special handling, they do not force the + // structure to be passed in memory even if unaligned, and + // therefore they can straddle an eightbyte. + if (BitField) { + assert(!i->isUnnamedBitfield()); + uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); + uint64_t Size = i->getBitWidthValue(getContext()); + + uint64_t EB_Lo = Offset / 64; + uint64_t EB_Hi = (Offset + Size - 1) / 64; + + if (EB_Lo) { + assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); + FieldLo = NoClass; + FieldHi = Integer; + } else { + FieldLo = Integer; + FieldHi = EB_Hi ? Integer : NoClass; + } + } else + classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); + Lo = merge(Lo, FieldLo); + Hi = merge(Hi, FieldHi); + if (Lo == Memory || Hi == Memory) + break; + } + + postMerge(Size, Lo, Hi); + } +} + +ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { + // If this is a scalar LLVM value then assume LLVM will pass it in the right + // place naturally. + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (Ty->isBitIntType()) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); + } + + return getNaturalAlignIndirect(Ty); +} + +bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { + if (const VectorType *VecTy = Ty->getAs<VectorType>()) { + uint64_t Size = getContext().getTypeSize(VecTy); + unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); + if (Size <= 64 || Size > LargestVector) + return true; + QualType EltTy = VecTy->getElementType(); + if (passInt128VectorsInMem() && + (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || + EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) + return true; + } + + return false; +} + +ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, + unsigned freeIntRegs) const { + // If this is a scalar LLVM value then assume LLVM will pass it in the right + // place naturally. + // + // This assumption is optimistic, as there could be free registers available + // when we need to pass this argument in memory, and LLVM could try to pass + // the argument in the free register. This does not seem to happen currently, + // but this code would be much safer if we could mark the argument with + // 'onstack'. See PR12193. + if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && + !Ty->isBitIntType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); + } + + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Compute the byval alignment. We specify the alignment of the byval in all + // cases so that the mid-level optimizer knows the alignment of the byval. + unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); + + // Attempt to avoid passing indirect results using byval when possible. This + // is important for good codegen. + // + // We do this by coercing the value into a scalar type which the backend can + // handle naturally (i.e., without using byval). + // + // For simplicity, we currently only do this when we have exhausted all of the + // free integer registers. Doing this when there are free integer registers + // would require more care, as we would have to ensure that the coerced value + // did not claim the unused register. That would require either reording the + // arguments to the function (so that any subsequent inreg values came first), + // or only doing this optimization when there were no following arguments that + // might be inreg. + // + // We currently expect it to be rare (particularly in well written code) for + // arguments to be passed on the stack when there are still free integer + // registers available (this would typically imply large structs being passed + // by value), so this seems like a fair tradeoff for now. + // + // We can revisit this if the backend grows support for 'onstack' parameter + // attributes. See PR12193. + if (freeIntRegs == 0) { + uint64_t Size = getContext().getTypeSize(Ty); + + // If this type fits in an eightbyte, coerce it into the matching integral + // type, which will end up on the stack (with alignment 8). + if (Align == 8 && Size <= 64) + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), + Size)); + } + + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); +} + +/// The ABI specifies that a value should be passed in a full vector XMM/YMM +/// register. Pick an LLVM IR type that will be passed as a vector register. +llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { + // Wrapper structs/arrays that only contain vectors are passed just like + // vectors; strip them off if present. + if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) + Ty = QualType(InnerTy, 0); + + llvm::Type *IRType = CGT.ConvertType(Ty); + if (isa<llvm::VectorType>(IRType)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (passInt128VectorsInMem() && + cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { + // Use a vXi64 vector. + uint64_t Size = getContext().getTypeSize(Ty); + return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); + } + + return IRType; + } + + if (IRType->getTypeID() == llvm::Type::FP128TyID) + return IRType; + + // We couldn't find the preferred IR vector type for 'Ty'. + uint64_t Size = getContext().getTypeSize(Ty); + assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); + + + // Return a LLVM IR vector type based on the size of 'Ty'. + return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), + Size / 64); +} + +/// BitsContainNoUserData - Return true if the specified [start,end) bit range +/// is known to either be off the end of the specified type or being in +/// alignment padding. The user type specified is known to be at most 128 bits +/// in size, and have passed through X86_64ABIInfo::classify with a successful +/// classification that put one of the two halves in the INTEGER class. +/// +/// It is conservatively correct to return false. +static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, + unsigned EndBit, ASTContext &Context) { + // If the bytes being queried are off the end of the type, there is no user + // data hiding here. This handles analysis of builtins, vectors and other + // types that don't contain interesting padding. + unsigned TySize = (unsigned)Context.getTypeSize(Ty); + if (TySize <= StartBit) + return true; + + if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { + unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); + unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); + + // Check each element to see if the element overlaps with the queried range. + for (unsigned i = 0; i != NumElts; ++i) { + // If the element is after the span we care about, then we're done.. + unsigned EltOffset = i*EltSize; + if (EltOffset >= EndBit) break; + + unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; + if (!BitsContainNoUserData(AT->getElementType(), EltStart, + EndBit-EltOffset, Context)) + return false; + } + // If it overlaps no elements, then it is safe to process as padding. + return true; + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const auto &I : CXXRD->bases()) { + assert(!I.isVirtual() && !I.getType()->isDependentType() && + "Unexpected base class!"); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); + + // If the base is after the span we care about, ignore it. + unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); + if (BaseOffset >= EndBit) continue; + + unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; + if (!BitsContainNoUserData(I.getType(), BaseStart, + EndBit-BaseOffset, Context)) + return false; + } + } + + // Verify that no field has data that overlaps the region of interest. Yes + // this could be sped up a lot by being smarter about queried fields, + // however we're only looking at structs up to 16 bytes, so we don't care + // much. + unsigned idx = 0; + for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); + i != e; ++i, ++idx) { + unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); + + // If we found a field after the region we care about, then we're done. + if (FieldOffset >= EndBit) break; + + unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; + if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, + Context)) + return false; + } + + // If nothing in this record overlapped the area of interest, then we're + // clean. + return true; + } + + return false; +} + +/// getFPTypeAtOffset - Return a floating point type at the specified offset. +static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, + const llvm::DataLayout &TD) { + if (IROffset == 0 && IRType->isFloatingPointTy()) + return IRType; + + // If this is a struct, recurse into the field at the specified offset. + if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { + if (!STy->getNumContainedTypes()) + return nullptr; + + const llvm::StructLayout *SL = TD.getStructLayout(STy); + unsigned Elt = SL->getElementContainingOffset(IROffset); + IROffset -= SL->getElementOffset(Elt); + return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); + } + + // If this is an array, recurse into the field at the specified offset. + if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { + llvm::Type *EltTy = ATy->getElementType(); + unsigned EltSize = TD.getTypeAllocSize(EltTy); + IROffset -= IROffset / EltSize * EltSize; + return getFPTypeAtOffset(EltTy, IROffset, TD); + } + + return nullptr; +} + +/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the +/// low 8 bytes of an XMM register, corresponding to the SSE class. +llvm::Type *X86_64ABIInfo:: +GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, + QualType SourceTy, unsigned SourceOffset) const { + const llvm::DataLayout &TD = getDataLayout(); + unsigned SourceSize = + (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; + llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); + if (!T0 || T0->isDoubleTy()) + return llvm::Type::getDoubleTy(getVMContext()); + + // Get the adjacent FP type. + llvm::Type *T1 = nullptr; + unsigned T0Size = TD.getTypeAllocSize(T0); + if (SourceSize > T0Size) + T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); + if (T1 == nullptr) { + // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due + // to its alignment. + if (T0->is16bitFPTy() && SourceSize > 4) + T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); + // If we can't get a second FP type, return a simple half or float. + // avx512fp16-abi.c:pr51813_2 shows it works to return float for + // {float, i8} too. + if (T1 == nullptr) + return T0; + } + + if (T0->isFloatTy() && T1->isFloatTy()) + return llvm::FixedVectorType::get(T0, 2); + + if (T0->is16bitFPTy() && T1->is16bitFPTy()) { + llvm::Type *T2 = nullptr; + if (SourceSize > 4) + T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); + if (T2 == nullptr) + return llvm::FixedVectorType::get(T0, 2); + return llvm::FixedVectorType::get(T0, 4); + } + + if (T0->is16bitFPTy() || T1->is16bitFPTy()) + return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); + + return llvm::Type::getDoubleTy(getVMContext()); +} + + +/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in +/// an 8-byte GPR. This means that we either have a scalar or we are talking +/// about the high or low part of an up-to-16-byte struct. This routine picks +/// the best LLVM IR type to represent this, which may be i64 or may be anything +/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, +/// etc). +/// +/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for +/// the source type. IROffset is an offset in bytes into the LLVM IR type that +/// the 8-byte value references. PrefType may be null. +/// +/// SourceTy is the source-level type for the entire argument. SourceOffset is +/// an offset into this that we're processing (which is always either 0 or 8). +/// +llvm::Type *X86_64ABIInfo:: +GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, + QualType SourceTy, unsigned SourceOffset) const { + // If we're dealing with an un-offset LLVM IR type, then it means that we're + // returning an 8-byte unit starting with it. See if we can safely use it. + if (IROffset == 0) { + // Pointers and int64's always fill the 8-byte unit. + if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || + IRType->isIntegerTy(64)) + return IRType; + + // If we have a 1/2/4-byte integer, we can use it only if the rest of the + // goodness in the source type is just tail padding. This is allowed to + // kick in for struct {double,int} on the int, but not on + // struct{double,int,int} because we wouldn't return the second int. We + // have to do this analysis on the source type because we can't depend on + // unions being lowered a specific way etc. + if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || + IRType->isIntegerTy(32) || + (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { + unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : + cast<llvm::IntegerType>(IRType)->getBitWidth(); + + if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, + SourceOffset*8+64, getContext())) + return IRType; + } + } + + if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { + // If this is a struct, recurse into the field at the specified offset. + const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); + if (IROffset < SL->getSizeInBytes()) { + unsigned FieldIdx = SL->getElementContainingOffset(IROffset); + IROffset -= SL->getElementOffset(FieldIdx); + + return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, + SourceTy, SourceOffset); + } + } + + if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { + llvm::Type *EltTy = ATy->getElementType(); + unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); + unsigned EltOffset = IROffset/EltSize*EltSize; + return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, + SourceOffset); + } + + // Okay, we don't have any better idea of what to pass, so we pass this in an + // integer register that isn't too big to fit the rest of the struct. + unsigned TySizeInBytes = + (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); + + assert(TySizeInBytes != SourceOffset && "Empty field?"); + + // It is always safe to classify this as an integer type up to i64 that + // isn't larger than the structure. + return llvm::IntegerType::get(getVMContext(), + std::min(TySizeInBytes-SourceOffset, 8U)*8); +} + + +/// GetX86_64ByValArgumentPair - Given a high and low type that can ideally +/// be used as elements of a two register pair to pass or return, return a +/// first class aggregate to represent them. For example, if the low part of +/// a by-value argument should be passed as i32* and the high part as float, +/// return {i32*, float}. +static llvm::Type * +GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, + const llvm::DataLayout &TD) { + // In order to correctly satisfy the ABI, we need to the high part to start + // at offset 8. If the high and low parts we inferred are both 4-byte types + // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have + // the second element at offset 8. Check for this: + unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); + llvm::Align HiAlign = TD.getABITypeAlign(Hi); + unsigned HiStart = llvm::alignTo(LoSize, HiAlign); + assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); + + // To handle this, we have to increase the size of the low part so that the + // second element will start at an 8 byte offset. We can't increase the size + // of the second element because it might make us access off the end of the + // struct. + if (HiStart != 8) { + // There are usually two sorts of types the ABI generation code can produce + // for the low part of a pair that aren't 8 bytes in size: half, float or + // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and + // NaCl). + // Promote these to a larger type. + if (Lo->isHalfTy() || Lo->isFloatTy()) + Lo = llvm::Type::getDoubleTy(Lo->getContext()); + else { + assert((Lo->isIntegerTy() || Lo->isPointerTy()) + && "Invalid/unknown lo type"); + Lo = llvm::Type::getInt64Ty(Lo->getContext()); + } + } + + llvm::StructType *Result = llvm::StructType::get(Lo, Hi); + + // Verify that the second element is at an 8-byte offset. + assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && + "Invalid x86-64 argument pair!"); + return Result; +} + +ABIArgInfo X86_64ABIInfo:: +classifyReturnType(QualType RetTy) const { + // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the + // classification algorithm. + X86_64ABIInfo::Class Lo, Hi; + classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); + + // Check some invariants. + assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); + assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); + + llvm::Type *ResType = nullptr; + switch (Lo) { + case NoClass: + if (Hi == NoClass) + return ABIArgInfo::getIgnore(); + // If the low part is just padding, it takes no register, leave ResType + // null. + assert((Hi == SSE || Hi == Integer || Hi == X87Up) && + "Unknown missing lo part"); + break; + + case SSEUp: + case X87Up: + llvm_unreachable("Invalid classification for lo word."); + + // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via + // hidden argument. + case Memory: + return getIndirectReturnResult(RetTy); + + // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next + // available register of the sequence %rax, %rdx is used. + case Integer: + ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); + + // If we have a sign or zero extended integer, make sure to return Extend + // so that the parameter gets the right LLVM IR attributes. + if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (RetTy->isIntegralOrEnumerationType() && + isPromotableIntegerTypeForABI(RetTy)) + return ABIArgInfo::getExtend(RetTy); + } + break; + + // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next + // available SSE register of the sequence %xmm0, %xmm1 is used. + case SSE: + ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); + break; + + // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is + // returned on the X87 stack in %st0 as 80-bit x87 number. + case X87: + ResType = llvm::Type::getX86_FP80Ty(getVMContext()); + break; + + // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real + // part of the value is returned in %st0 and the imaginary part in + // %st1. + case ComplexX87: + assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); + ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), + llvm::Type::getX86_FP80Ty(getVMContext())); + break; + } + + llvm::Type *HighPart = nullptr; + switch (Hi) { + // Memory was handled previously and X87 should + // never occur as a hi class. + case Memory: + case X87: + llvm_unreachable("Invalid classification for hi word."); + + case ComplexX87: // Previously handled. + case NoClass: + break; + + case Integer: + HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); + if (Lo == NoClass) // Return HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + break; + case SSE: + HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); + if (Lo == NoClass) // Return HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + break; + + // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte + // is passed in the next available eightbyte chunk if the last used + // vector register. + // + // SSEUP should always be preceded by SSE, just widen. + case SSEUp: + assert(Lo == SSE && "Unexpected SSEUp classification."); + ResType = GetByteVectorType(RetTy); + break; + + // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is + // returned together with the previous X87 value in %st0. + case X87Up: + // If X87Up is preceded by X87, we don't need to do + // anything. However, in some cases with unions it may not be + // preceded by X87. In such situations we follow gcc and pass the + // extra bits in an SSE reg. + if (Lo != X87) { + HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); + if (Lo == NoClass) // Return HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + } + break; + } + + // If a high part was specified, merge it together with the low part. It is + // known to pass in the high eightbyte of the result. We do this by forming a + // first class struct aggregate with the high and low part: {low, high} + if (HighPart) + ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); + + return ABIArgInfo::getDirect(ResType); +} + +ABIArgInfo +X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, + bool isNamedArg, bool IsRegCall) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + X86_64ABIInfo::Class Lo, Hi; + classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); + + // Check some invariants. + // FIXME: Enforce these by construction. + assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); + assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); + + neededInt = 0; + neededSSE = 0; + llvm::Type *ResType = nullptr; + switch (Lo) { + case NoClass: + if (Hi == NoClass) + return ABIArgInfo::getIgnore(); + // If the low part is just padding, it takes no register, leave ResType + // null. + assert((Hi == SSE || Hi == Integer || Hi == X87Up) && + "Unknown missing lo part"); + break; + + // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument + // on the stack. + case Memory: + + // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or + // COMPLEX_X87, it is passed in memory. + case X87: + case ComplexX87: + if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) + ++neededInt; + return getIndirectResult(Ty, freeIntRegs); + + case SSEUp: + case X87Up: + llvm_unreachable("Invalid classification for lo word."); + + // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next + // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 + // and %r9 is used. + case Integer: + ++neededInt; + + // Pick an 8-byte type based on the preferred type. + ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); + + // If we have a sign or zero extended integer, make sure to return Extend + // so that the parameter gets the right LLVM IR attributes. + if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (Ty->isIntegralOrEnumerationType() && + isPromotableIntegerTypeForABI(Ty)) + return ABIArgInfo::getExtend(Ty); + } + + break; + + // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next + // available SSE register is used, the registers are taken in the + // order from %xmm0 to %xmm7. + case SSE: { + llvm::Type *IRType = CGT.ConvertType(Ty); + ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); + ++neededSSE; + break; + } + } + + llvm::Type *HighPart = nullptr; + switch (Hi) { + // Memory was handled previously, ComplexX87 and X87 should + // never occur as hi classes, and X87Up must be preceded by X87, + // which is passed in memory. + case Memory: + case X87: + case ComplexX87: + llvm_unreachable("Invalid classification for hi word."); + + case NoClass: break; + + case Integer: + ++neededInt; + // Pick an 8-byte type based on the preferred type. + HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); + + if (Lo == NoClass) // Pass HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + break; + + // X87Up generally doesn't occur here (long double is passed in + // memory), except in situations involving unions. + case X87Up: + case SSE: + HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); + + if (Lo == NoClass) // Pass HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + + ++neededSSE; + break; + + // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the + // eightbyte is passed in the upper half of the last used SSE + // register. This only happens when 128-bit vectors are passed. + case SSEUp: + assert(Lo == SSE && "Unexpected SSEUp classification"); + ResType = GetByteVectorType(Ty); + break; + } + + // If a high part was specified, merge it together with the low part. It is + // known to pass in the high eightbyte of the result. We do this by forming a + // first class struct aggregate with the high and low part: {low, high} + if (HighPart) + ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); + + return ABIArgInfo::getDirect(ResType); +} + +ABIArgInfo +X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const { + auto RT = Ty->getAs<RecordType>(); + assert(RT && "classifyRegCallStructType only valid with struct types"); + + if (RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectReturnResult(Ty); + + // Sum up bases + if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { + if (CXXRD->isDynamicClass()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + + for (const auto &I : CXXRD->bases()) + if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, + MaxVectorWidth) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } + + // Sum up members + for (const auto *FD : RT->getDecl()->fields()) { + QualType MTy = FD->getType(); + if (MTy->isRecordType() && !MTy->isUnionType()) { + if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, + MaxVectorWidth) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } else { + unsigned LocalNeededInt, LocalNeededSSE; + if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, + true, true) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + if (const auto *AT = getContext().getAsConstantArrayType(MTy)) + MTy = AT->getElementType(); + if (const auto *VT = MTy->getAs<VectorType>()) + if (getContext().getTypeSize(VT) > MaxVectorWidth) + MaxVectorWidth = getContext().getTypeSize(VT); + NeededInt += LocalNeededInt; + NeededSSE += LocalNeededSSE; + } + } + + return ABIArgInfo::getDirect(); +} + +ABIArgInfo +X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const { + + NeededInt = 0; + NeededSSE = 0; + MaxVectorWidth = 0; + + return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, + MaxVectorWidth); +} + +void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + + const unsigned CallingConv = FI.getCallingConvention(); + // It is possible to force Win64 calling convention on any x86_64 target by + // using __attribute__((ms_abi)). In such case to correctly emit Win64 + // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. + if (CallingConv == llvm::CallingConv::Win64) { + WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); + Win64ABIInfo.computeInfo(FI); + return; + } + + bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; + + // Keep track of the number of assigned registers. + unsigned FreeIntRegs = IsRegCall ? 11 : 6; + unsigned FreeSSERegs = IsRegCall ? 16 : 8; + unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; + + if (!::classifyReturnType(getCXXABI(), FI, *this)) { + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = classifyRegCallStructType( + FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && + getContext().getCanonicalType(FI.getReturnType() + ->getAs<ComplexType>() + ->getElementType()) == + getContext().LongDoubleTy) + // Complex Long Double Type is passed in Memory when Regcall + // calling convention is used. + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + else + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + } + + // If the return value is indirect, then the hidden argument is consuming one + // integer register. + if (FI.getReturnInfo().isIndirect()) + --FreeIntRegs; + else if (NeededSSE && MaxVectorWidth > 0) + FI.setMaxVectorWidth(MaxVectorWidth); + + // The chain argument effectively gives us another free register. + if (FI.isChainCall()) + ++FreeIntRegs; + + unsigned NumRequiredArgs = FI.getNumRequiredArgs(); + // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers + // get assigned (in left-to-right order) for passing as follows... + unsigned ArgNo = 0; + for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); + it != ie; ++it, ++ArgNo) { + bool IsNamedArg = ArgNo < NumRequiredArgs; + + if (IsRegCall && it->type->isStructureOrClassType()) + it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, + MaxVectorWidth); + else + it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, + NeededSSE, IsNamedArg); + + // AMD64-ABI 3.2.3p3: If there are no registers available for any + // eightbyte of an argument, the whole argument is passed on the + // stack. If registers have already been assigned for some + // eightbytes of such an argument, the assignments get reverted. + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + if (MaxVectorWidth > FI.getMaxVectorWidth()) + FI.setMaxVectorWidth(MaxVectorWidth); + } else { + it->info = getIndirectResult(it->type, FreeIntRegs); + } + } +} + +static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, + Address VAListAddr, QualType Ty) { + Address overflow_arg_area_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); + llvm::Value *overflow_arg_area = + CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); + + // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 + // byte boundary if alignment needed by type exceeds 8 byte boundary. + // It isn't stated explicitly in the standard, but in practice we use + // alignment greater than 16 where necessary. + CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); + if (Align > CharUnits::fromQuantity(8)) { + overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, + Align); + } + + // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. + llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); + llvm::Value *Res = + CGF.Builder.CreateBitCast(overflow_arg_area, + llvm::PointerType::getUnqual(LTy)); + + // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: + // l->overflow_arg_area + sizeof(type). + // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to + // an 8 byte boundary. + + uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; + llvm::Value *Offset = + llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); + overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, + Offset, "overflow_arg_area.next"); + CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); + + // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. + return Address(Res, LTy, Align); +} + +Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + // Assume that va_list type is correct; should be pointer to LLVM type: + // struct { + // i32 gp_offset; + // i32 fp_offset; + // i8* overflow_arg_area; + // i8* reg_save_area; + // }; + unsigned neededInt, neededSSE; + + Ty = getContext().getCanonicalType(Ty); + ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, + /*isNamedArg*/false); + + // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed + // in the registers. If not go to step 7. + if (!neededInt && !neededSSE) + return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); + + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of + // general purpose registers needed to pass type and num_fp to hold + // the number of floating point registers needed. + + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or + // l->fp_offset > 304 - num_fp * 16 go to step 7. + // + // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of + // register save space). + + llvm::Value *InRegs = nullptr; + Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); + llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; + if (neededInt) { + gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); + gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); + InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); + InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); + } + + if (neededSSE) { + fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); + fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); + llvm::Value *FitsInFP = + llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); + FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); + InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; + } + + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); + + // Emit code to load the value if it was passed in registers. + + CGF.EmitBlock(InRegBlock); + + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with + // an offset of l->gp_offset and/or l->fp_offset. This may require + // copying to a temporary location in case the parameter is passed + // in different register classes or requires an alignment greater + // than 8 for general purpose registers and 16 for XMM registers. + // + // FIXME: This really results in shameful code when we end up needing to + // collect arguments from different places; often what should result in a + // simple assembling of a structure from scattered addresses has many more + // loads than necessary. Can we clean this up? + llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); + llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( + CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); + + Address RegAddr = Address::invalid(); + if (neededInt && neededSSE) { + // FIXME: Cleanup. + assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); + llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); + Address Tmp = CGF.CreateMemTemp(Ty); + Tmp = Tmp.withElementType(ST); + assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); + llvm::Type *TyLo = ST->getElementType(0); + llvm::Type *TyHi = ST->getElementType(1); + assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && + "Unexpected ABI info for mixed regs"); + llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); + llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); + llvm::Value *GPAddr = + CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); + llvm::Value *FPAddr = + CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); + llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; + llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; + + // Copy the first element. + // FIXME: Our choice of alignment here and below is probably pessimistic. + llvm::Value *V = CGF.Builder.CreateAlignedLoad( + TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); + + // Copy the second element. + V = CGF.Builder.CreateAlignedLoad( + TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), + CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); + + RegAddr = Tmp.withElementType(LTy); + } else if (neededInt) { + RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), + LTy, CharUnits::fromQuantity(8)); + + // Copy to a temporary if necessary to ensure the appropriate alignment. + auto TInfo = getContext().getTypeInfoInChars(Ty); + uint64_t TySize = TInfo.Width.getQuantity(); + CharUnits TyAlign = TInfo.Align; + + // Copy into a temporary if the type is more aligned than the + // register save area. + if (TyAlign.getQuantity() > 8) { + Address Tmp = CGF.CreateMemTemp(Ty); + CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); + RegAddr = Tmp; + } + + } else if (neededSSE == 1) { + RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), + LTy, CharUnits::fromQuantity(16)); + } else { + assert(neededSSE == 2 && "Invalid number of needed registers!"); + // SSE registers are spaced 16 bytes apart in the register save + // area, we need to collect the two eightbytes together. + // The ABI isn't explicit about this, but it seems reasonable + // to assume that the slots are 16-byte aligned, since the stack is + // naturally 16-byte aligned and the prologue is expected to store + // all the SSE registers to the RSA. + Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, + fp_offset), + CGF.Int8Ty, CharUnits::fromQuantity(16)); + Address RegAddrHi = + CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, + CharUnits::fromQuantity(16)); + llvm::Type *ST = AI.canHaveCoerceToType() + ? AI.getCoerceToType() + : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); + llvm::Value *V; + Address Tmp = CGF.CreateMemTemp(Ty); + Tmp = Tmp.withElementType(ST); + V = CGF.Builder.CreateLoad( + RegAddrLo.withElementType(ST->getStructElementType(0))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); + V = CGF.Builder.CreateLoad( + RegAddrHi.withElementType(ST->getStructElementType(1))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); + + RegAddr = Tmp.withElementType(LTy); + } + + // AMD64-ABI 3.5.7p5: Step 5. Set: + // l->gp_offset = l->gp_offset + num_gp * 8 + // l->fp_offset = l->fp_offset + num_fp * 16. + if (neededInt) { + llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); + CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), + gp_offset_p); + } + if (neededSSE) { + llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); + CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), + fp_offset_p); + } + CGF.EmitBranch(ContBlock); + + // Emit code to load the value if it was passed in memory. + + CGF.EmitBlock(InMemBlock); + Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); + + // Return the appropriate result. + + CGF.EmitBlock(ContBlock); + Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, + "vaarg.addr"); + return ResAddr; +} + +Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + uint64_t Width = getContext().getTypeSize(Ty); + bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + +ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( + QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + return current; +} + +ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, + bool IsReturnType, bool IsVectorCall, + bool IsRegCall) const { + + if (Ty->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + TypeInfo Info = getContext().getTypeInfo(Ty); + uint64_t Width = Info.Width; + CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); + + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + if (!IsReturnType) { + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + } + + if (RT->getDecl()->hasFlexibleArrayMember()) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + + } + + const Type *Base = nullptr; + uint64_t NumElts = 0; + // vectorcall adds the concept of a homogenous vector aggregate, similar to + // other targets. + if ((IsVectorCall || IsRegCall) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (IsRegCall) { + if (FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + } + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } else if (IsVectorCall) { + if (FreeSSERegs >= NumElts && + (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { + FreeSSERegs -= NumElts; + return ABIArgInfo::getDirect(); + } else if (IsReturnType) { + return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVAs are delayed and reclassified in the 2nd step. + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + } + } + + if (Ty->isMemberPointerType()) { + // If the member pointer is represented by an LLVM int or ptr, pass it + // directly. + llvm::Type *LLTy = CGT.ConvertType(Ty); + if (LLTy->isPointerTy() || LLTy->isIntegerTy()) + return ABIArgInfo::getDirect(); + } + + if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + if (Width > 64 || !llvm::isPowerOf2_64(Width)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + + // Otherwise, coerce it to a small integer. + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); + } + + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + switch (BT->getKind()) { + case BuiltinType::Bool: + // Bool type is always extended to the ABI, other builtin types are not + // extended. + return ABIArgInfo::getExtend(Ty); + + case BuiltinType::LongDouble: + // Mingw64 GCC uses the old 80 bit extended precision floating point + // unit. It passes them indirectly through memory. + if (IsMingw64) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::x87DoubleExtended()) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + break; + + case BuiltinType::Int128: + case BuiltinType::UInt128: + // If it's a parameter type, the normal ABI rule is that arguments larger + // than 8 bytes are passed indirectly. GCC follows it. We follow it too, + // even though it isn't particularly efficient. + if (!IsReturnType) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + + // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. + // Clang matches them for compatibility. + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getInt64Ty(getVMContext()), 2)); + + default: + break; + } + } + + if (Ty->isBitIntType()) { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, + // or 8 bytes anyway as long is it fits in them, so we don't have to check + // the power of 2. + if (Width <= 64) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + + return ABIArgInfo::getDirect(); +} + +void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + const unsigned CC = FI.getCallingConvention(); + bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; + bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; + + // If __attribute__((sysv_abi)) is in use, use the SysV argument + // classification rules. + if (CC == llvm::CallingConv::X86_64_SysV) { + X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); + SysVABIInfo.computeInfo(FI); + return; + } + + unsigned FreeSSERegs = 0; + if (IsVectorCall) { + // We can use up to 4 SSE return registers with vectorcall. + FreeSSERegs = 4; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers. + FreeSSERegs = 16; + } + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, + IsVectorCall, IsRegCall); + + if (IsVectorCall) { + // We can use up to 6 SSE register parameters with vectorcall. + FreeSSERegs = 6; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers, we can reuse the return registers. + FreeSSERegs = 16; + } + + unsigned ArgNum = 0; + unsigned ZeroSSERegs = 0; + for (auto &I : FI.arguments()) { + // Vectorcall in x64 only permits the first 6 arguments to be passed as + // XMM/YMM registers. After the sixth argument, pretend no vector + // registers are left. + unsigned *MaybeFreeSSERegs = + (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; + I.info = + classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); + ++ArgNum; + } + + if (IsVectorCall) { + // For vectorcall, assign aggregate HVAs to any free vector registers in a + // second pass. + for (auto &I : FI.arguments()) + I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); + } +} + +Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + uint64_t Width = getContext().getTypeSize(Ty); + bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, + CGF.getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(8), + /*allowHigherAlign*/ false); +} + +std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo( + CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, + unsigned NumRegisterParameters, bool SoftFloatABI) { + bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( + CGM.getTriple(), CGM.getCodeGenOpts()); + return std::make_unique<X86_32TargetCodeGenInfo>( + CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, + NumRegisterParameters, SoftFloatABI); +} + +std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo( + CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, + unsigned NumRegisterParameters) { + bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( + CGM.getTriple(), CGM.getCodeGenOpts()); + return std::make_unique<WinX86_32TargetCodeGenInfo>( + CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, + NumRegisterParameters); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM, + X86AVXABILevel AVXLevel) { + return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, + X86AVXABILevel AVXLevel) { + return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); +} diff --git a/clang/lib/CodeGen/Targets/XCore.cpp b/clang/lib/CodeGen/Targets/XCore.cpp new file mode 100644 index 000000000000..8be240c018d0 --- /dev/null +++ b/clang/lib/CodeGen/Targets/XCore.cpp @@ -0,0 +1,662 @@ +//===- XCore.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "TargetInfo.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// XCore ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +/// A SmallStringEnc instance is used to build up the TypeString by passing +/// it by reference between functions that append to it. +typedef llvm::SmallString<128> SmallStringEnc; + +/// TypeStringCache caches the meta encodings of Types. +/// +/// The reason for caching TypeStrings is two fold: +/// 1. To cache a type's encoding for later uses; +/// 2. As a means to break recursive member type inclusion. +/// +/// A cache Entry can have a Status of: +/// NonRecursive: The type encoding is not recursive; +/// Recursive: The type encoding is recursive; +/// Incomplete: An incomplete TypeString; +/// IncompleteUsed: An incomplete TypeString that has been used in a +/// Recursive type encoding. +/// +/// A NonRecursive entry will have all of its sub-members expanded as fully +/// as possible. Whilst it may contain types which are recursive, the type +/// itself is not recursive and thus its encoding may be safely used whenever +/// the type is encountered. +/// +/// A Recursive entry will have all of its sub-members expanded as fully as +/// possible. The type itself is recursive and it may contain other types which +/// are recursive. The Recursive encoding must not be used during the expansion +/// of a recursive type's recursive branch. For simplicity the code uses +/// IncompleteCount to reject all usage of Recursive encodings for member types. +/// +/// An Incomplete entry is always a RecordType and only encodes its +/// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and +/// are placed into the cache during type expansion as a means to identify and +/// handle recursive inclusion of types as sub-members. If there is recursion +/// the entry becomes IncompleteUsed. +/// +/// During the expansion of a RecordType's members: +/// +/// If the cache contains a NonRecursive encoding for the member type, the +/// cached encoding is used; +/// +/// If the cache contains a Recursive encoding for the member type, the +/// cached encoding is 'Swapped' out, as it may be incorrect, and... +/// +/// If the member is a RecordType, an Incomplete encoding is placed into the +/// cache to break potential recursive inclusion of itself as a sub-member; +/// +/// Once a member RecordType has been expanded, its temporary incomplete +/// entry is removed from the cache. If a Recursive encoding was swapped out +/// it is swapped back in; +/// +/// If an incomplete entry is used to expand a sub-member, the incomplete +/// entry is marked as IncompleteUsed. The cache keeps count of how many +/// IncompleteUsed entries it currently contains in IncompleteUsedCount; +/// +/// If a member's encoding is found to be a NonRecursive or Recursive viz: +/// IncompleteUsedCount==0, the member's encoding is added to the cache. +/// Else the member is part of a recursive type and thus the recursion has +/// been exited too soon for the encoding to be correct for the member. +/// +class TypeStringCache { + enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed}; + struct Entry { + std::string Str; // The encoded TypeString for the type. + enum Status State; // Information about the encoding in 'Str'. + std::string Swapped; // A temporary place holder for a Recursive encoding + // during the expansion of RecordType's members. + }; + std::map<const IdentifierInfo *, struct Entry> Map; + unsigned IncompleteCount; // Number of Incomplete entries in the Map. + unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map. +public: + TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {} + void addIncomplete(const IdentifierInfo *ID, std::string StubEnc); + bool removeIncomplete(const IdentifierInfo *ID); + void addIfComplete(const IdentifierInfo *ID, StringRef Str, + bool IsRecursive); + StringRef lookupStr(const IdentifierInfo *ID); +}; + +/// TypeString encodings for enum & union fields must be order. +/// FieldEncoding is a helper for this ordering process. +class FieldEncoding { + bool HasName; + std::string Enc; +public: + FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} + StringRef str() { return Enc; } + bool operator<(const FieldEncoding &rhs) const { + if (HasName != rhs.HasName) return HasName; + return Enc < rhs.Enc; + } +}; + +class XCoreABIInfo : public DefaultABIInfo { +public: + XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { + mutable TypeStringCache TSC; + void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, + const CodeGen::CodeGenModule &M) const; + +public: + XCoreTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {} + void emitTargetMetadata(CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> + &MangledDeclNames) const override; +}; + +} // End anonymous namespace. + +// TODO: this implementation is likely now redundant with the default +// EmitVAArg. +Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CGBuilderTy &Builder = CGF.Builder; + + // Get the VAList. + CharUnits SlotSize = CharUnits::fromQuantity(4); + Address AP = Address(Builder.CreateLoad(VAListAddr), + getVAListElementType(CGF), SlotSize); + + // Handle the argument. + ABIArgInfo AI = classifyArgumentType(Ty); + CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty); + llvm::Type *ArgTy = CGT.ConvertType(Ty); + if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) + AI.setCoerceToType(ArgTy); + llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); + + Address Val = Address::invalid(); + CharUnits ArgSize = CharUnits::Zero(); + switch (AI.getKind()) { + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + case ABIArgInfo::InAlloca: + llvm_unreachable("Unsupported ABI kind for va_arg"); + case ABIArgInfo::Ignore: + Val = Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeAlign); + ArgSize = CharUnits::Zero(); + break; + case ABIArgInfo::Extend: + case ABIArgInfo::Direct: + Val = AP.withElementType(ArgTy); + ArgSize = CharUnits::fromQuantity( + getDataLayout().getTypeAllocSize(AI.getCoerceToType())); + ArgSize = ArgSize.alignTo(SlotSize); + break; + case ABIArgInfo::Indirect: + case ABIArgInfo::IndirectAliased: + Val = AP.withElementType(ArgPtrTy); + Val = Address(Builder.CreateLoad(Val), ArgTy, TypeAlign); + ArgSize = SlotSize; + break; + } + + // Increment the VAList. + if (!ArgSize.isZero()) { + Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize); + Builder.CreateStore(APN.getPointer(), VAListAddr); + } + + return Val; +} + +/// During the expansion of a RecordType, an incomplete TypeString is placed +/// into the cache as a means to identify and break recursion. +/// If there is a Recursive encoding in the cache, it is swapped out and will +/// be reinserted by removeIncomplete(). +/// All other types of encoding should have been used rather than arriving here. +void TypeStringCache::addIncomplete(const IdentifierInfo *ID, + std::string StubEnc) { + if (!ID) + return; + Entry &E = Map[ID]; + assert( (E.Str.empty() || E.State == Recursive) && + "Incorrectly use of addIncomplete"); + assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()"); + E.Swapped.swap(E.Str); // swap out the Recursive + E.Str.swap(StubEnc); + E.State = Incomplete; + ++IncompleteCount; +} + +/// Once the RecordType has been expanded, the temporary incomplete TypeString +/// must be removed from the cache. +/// If a Recursive was swapped out by addIncomplete(), it will be replaced. +/// Returns true if the RecordType was defined recursively. +bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) { + if (!ID) + return false; + auto I = Map.find(ID); + assert(I != Map.end() && "Entry not present"); + Entry &E = I->second; + assert( (E.State == Incomplete || + E.State == IncompleteUsed) && + "Entry must be an incomplete type"); + bool IsRecursive = false; + if (E.State == IncompleteUsed) { + // We made use of our Incomplete encoding, thus we are recursive. + IsRecursive = true; + --IncompleteUsedCount; + } + if (E.Swapped.empty()) + Map.erase(I); + else { + // Swap the Recursive back. + E.Swapped.swap(E.Str); + E.Swapped.clear(); + E.State = Recursive; + } + --IncompleteCount; + return IsRecursive; +} + +/// Add the encoded TypeString to the cache only if it is NonRecursive or +/// Recursive (viz: all sub-members were expanded as fully as possible). +void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str, + bool IsRecursive) { + if (!ID || IncompleteUsedCount) + return; // No key or it is an incomplete sub-type so don't add. + Entry &E = Map[ID]; + if (IsRecursive && !E.Str.empty()) { + assert(E.State==Recursive && E.Str.size() == Str.size() && + "This is not the same Recursive entry"); + // The parent container was not recursive after all, so we could have used + // this Recursive sub-member entry after all, but we assumed the worse when + // we started viz: IncompleteCount!=0. + return; + } + assert(E.Str.empty() && "Entry already present"); + E.Str = Str.str(); + E.State = IsRecursive? Recursive : NonRecursive; +} + +/// Return a cached TypeString encoding for the ID. If there isn't one, or we +/// are recursively expanding a type (IncompleteCount != 0) and the cached +/// encoding is Recursive, return an empty StringRef. +StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { + if (!ID) + return StringRef(); // We have no key. + auto I = Map.find(ID); + if (I == Map.end()) + return StringRef(); // We have no encoding. + Entry &E = I->second; + if (E.State == Recursive && IncompleteCount) + return StringRef(); // We don't use Recursive encodings for member types. + + if (E.State == Incomplete) { + // The incomplete type is being used to break out of recursion. + E.State = IncompleteUsed; + ++IncompleteUsedCount; + } + return E.Str; +} + +/// The XCore ABI includes a type information section that communicates symbol +/// type information to the linker. The linker uses this information to verify +/// safety/correctness of things such as array bound and pointers et al. +/// The ABI only requires C (and XC) language modules to emit TypeStrings. +/// This type information (TypeString) is emitted into meta data for all global +/// symbols: definitions, declarations, functions & variables. +/// +/// The TypeString carries type, qualifier, name, size & value details. +/// Please see 'Tools Development Guide' section 2.16.2 for format details: +/// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf +/// The output is tested by test/CodeGen/xcore-stringtype.c. +/// +static bool getTypeString(SmallStringEnc &Enc, const Decl *D, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC); + +/// XCore uses emitTargetMD to emit TypeString metadata for global symbols. +void XCoreTargetCodeGenInfo::emitTargetMD( + const Decl *D, llvm::GlobalValue *GV, + const CodeGen::CodeGenModule &CGM) const { + SmallStringEnc Enc; + if (getTypeString(Enc, D, CGM, TSC)) { + llvm::LLVMContext &Ctx = CGM.getModule().getContext(); + llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), + llvm::MDString::get(Ctx, Enc.str())}; + llvm::NamedMDNode *MD = + CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); + } +} + +void XCoreTargetCodeGenInfo::emitTargetMetadata( + CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const { + // Warning, new MangledDeclNames may be appended within this loop. + // We rely on MapVector insertions adding new elements to the end + // of the container. + for (unsigned I = 0; I != MangledDeclNames.size(); ++I) { + auto Val = *(MangledDeclNames.begin() + I); + llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second); + if (GV) { + const Decl *D = Val.first.getDecl()->getMostRecentDecl(); + emitTargetMD(D, GV, CGM); + } + } +} + +static bool appendType(SmallStringEnc &Enc, QualType QType, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC); + +/// Helper function for appendRecordType(). +/// Builds a SmallVector containing the encoded field types in declaration +/// order. +static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE, + const RecordDecl *RD, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC) { + for (const auto *Field : RD->fields()) { + SmallStringEnc Enc; + Enc += "m("; + Enc += Field->getName(); + Enc += "){"; + if (Field->isBitField()) { + Enc += "b("; + llvm::raw_svector_ostream OS(Enc); + OS << Field->getBitWidthValue(CGM.getContext()); + Enc += ':'; + } + if (!appendType(Enc, Field->getType(), CGM, TSC)) + return false; + if (Field->isBitField()) + Enc += ')'; + Enc += '}'; + FE.emplace_back(!Field->getName().empty(), Enc); + } + return true; +} + +/// Appends structure and union types to Enc and adds encoding to cache. +/// Recursively calls appendType (via extractFieldType) for each field. +/// Union types have their fields ordered according to the ABI. +static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC, const IdentifierInfo *ID) { + // Append the cached TypeString if we have one. + StringRef TypeString = TSC.lookupStr(ID); + if (!TypeString.empty()) { + Enc += TypeString; + return true; + } + + // Start to emit an incomplete TypeString. + size_t Start = Enc.size(); + Enc += (RT->isUnionType()? 'u' : 's'); + Enc += '('; + if (ID) + Enc += ID->getName(); + Enc += "){"; + + // We collect all encoded fields and order as necessary. + bool IsRecursive = false; + const RecordDecl *RD = RT->getDecl()->getDefinition(); + if (RD && !RD->field_empty()) { + // An incomplete TypeString stub is placed in the cache for this RecordType + // so that recursive calls to this RecordType will use it whilst building a + // complete TypeString for this RecordType. + SmallVector<FieldEncoding, 16> FE; + std::string StubEnc(Enc.substr(Start).str()); + StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString. + TSC.addIncomplete(ID, std::move(StubEnc)); + if (!extractFieldType(FE, RD, CGM, TSC)) { + (void) TSC.removeIncomplete(ID); + return false; + } + IsRecursive = TSC.removeIncomplete(ID); + // The ABI requires unions to be sorted but not structures. + // See FieldEncoding::operator< for sort algorithm. + if (RT->isUnionType()) + llvm::sort(FE); + // We can now complete the TypeString. + unsigned E = FE.size(); + for (unsigned I = 0; I != E; ++I) { + if (I) + Enc += ','; + Enc += FE[I].str(); + } + } + Enc += '}'; + TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive); + return true; +} + +/// Appends enum types to Enc and adds the encoding to the cache. +static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, + TypeStringCache &TSC, + const IdentifierInfo *ID) { + // Append the cached TypeString if we have one. + StringRef TypeString = TSC.lookupStr(ID); + if (!TypeString.empty()) { + Enc += TypeString; + return true; + } + + size_t Start = Enc.size(); + Enc += "e("; + if (ID) + Enc += ID->getName(); + Enc += "){"; + + // We collect all encoded enumerations and order them alphanumerically. + if (const EnumDecl *ED = ET->getDecl()->getDefinition()) { + SmallVector<FieldEncoding, 16> FE; + for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E; + ++I) { + SmallStringEnc EnumEnc; + EnumEnc += "m("; + EnumEnc += I->getName(); + EnumEnc += "){"; + I->getInitVal().toString(EnumEnc); + EnumEnc += '}'; + FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); + } + llvm::sort(FE); + unsigned E = FE.size(); + for (unsigned I = 0; I != E; ++I) { + if (I) + Enc += ','; + Enc += FE[I].str(); + } + } + Enc += '}'; + TSC.addIfComplete(ID, Enc.substr(Start), false); + return true; +} + +/// Appends type's qualifier to Enc. +/// This is done prior to appending the type's encoding. +static void appendQualifier(SmallStringEnc &Enc, QualType QT) { + // Qualifiers are emitted in alphabetical order. + static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"}; + int Lookup = 0; + if (QT.isConstQualified()) + Lookup += 1<<0; + if (QT.isRestrictQualified()) + Lookup += 1<<1; + if (QT.isVolatileQualified()) + Lookup += 1<<2; + Enc += Table[Lookup]; +} + +/// Appends built-in types to Enc. +static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) { + const char *EncType; + switch (BT->getKind()) { + case BuiltinType::Void: + EncType = "0"; + break; + case BuiltinType::Bool: + EncType = "b"; + break; + case BuiltinType::Char_U: + EncType = "uc"; + break; + case BuiltinType::UChar: + EncType = "uc"; + break; + case BuiltinType::SChar: + EncType = "sc"; + break; + case BuiltinType::UShort: + EncType = "us"; + break; + case BuiltinType::Short: + EncType = "ss"; + break; + case BuiltinType::UInt: + EncType = "ui"; + break; + case BuiltinType::Int: + EncType = "si"; + break; + case BuiltinType::ULong: + EncType = "ul"; + break; + case BuiltinType::Long: + EncType = "sl"; + break; + case BuiltinType::ULongLong: + EncType = "ull"; + break; + case BuiltinType::LongLong: + EncType = "sll"; + break; + case BuiltinType::Float: + EncType = "ft"; + break; + case BuiltinType::Double: + EncType = "d"; + break; + case BuiltinType::LongDouble: + EncType = "ld"; + break; + default: + return false; + } + Enc += EncType; + return true; +} + +/// Appends a pointer encoding to Enc before calling appendType for the pointee. +static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC) { + Enc += "p("; + if (!appendType(Enc, PT->getPointeeType(), CGM, TSC)) + return false; + Enc += ')'; + return true; +} + +/// Appends array encoding to Enc before calling appendType for the element. +static bool appendArrayType(SmallStringEnc &Enc, QualType QT, + const ArrayType *AT, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC, StringRef NoSizeEnc) { + if (AT->getSizeModifier() != ArrayType::Normal) + return false; + Enc += "a("; + if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) + CAT->getSize().toStringUnsigned(Enc); + else + Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "". + Enc += ':'; + // The Qualifiers should be attached to the type rather than the array. + appendQualifier(Enc, QT); + if (!appendType(Enc, AT->getElementType(), CGM, TSC)) + return false; + Enc += ')'; + return true; +} + +/// Appends a function encoding to Enc, calling appendType for the return type +/// and the arguments. +static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC) { + Enc += "f{"; + if (!appendType(Enc, FT->getReturnType(), CGM, TSC)) + return false; + Enc += "}("; + if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) { + // N.B. we are only interested in the adjusted param types. + auto I = FPT->param_type_begin(); + auto E = FPT->param_type_end(); + if (I != E) { + do { + if (!appendType(Enc, *I, CGM, TSC)) + return false; + ++I; + if (I != E) + Enc += ','; + } while (I != E); + if (FPT->isVariadic()) + Enc += ",va"; + } else { + if (FPT->isVariadic()) + Enc += "va"; + else + Enc += '0'; + } + } + Enc += ')'; + return true; +} + +/// Handles the type's qualifier before dispatching a call to handle specific +/// type encodings. +static bool appendType(SmallStringEnc &Enc, QualType QType, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC) { + + QualType QT = QType.getCanonicalType(); + + if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) + // The Qualifiers should be attached to the type rather than the array. + // Thus we don't call appendQualifier() here. + return appendArrayType(Enc, QT, AT, CGM, TSC, ""); + + appendQualifier(Enc, QT); + + if (const BuiltinType *BT = QT->getAs<BuiltinType>()) + return appendBuiltinType(Enc, BT); + + if (const PointerType *PT = QT->getAs<PointerType>()) + return appendPointerType(Enc, PT, CGM, TSC); + + if (const EnumType *ET = QT->getAs<EnumType>()) + return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier()); + + if (const RecordType *RT = QT->getAsStructureType()) + return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); + + if (const RecordType *RT = QT->getAsUnionType()) + return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); + + if (const FunctionType *FT = QT->getAs<FunctionType>()) + return appendFunctionType(Enc, FT, CGM, TSC); + + return false; +} + +static bool getTypeString(SmallStringEnc &Enc, const Decl *D, + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC) { + if (!D) + return false; + + if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + if (FD->getLanguageLinkage() != CLanguageLinkage) + return false; + return appendType(Enc, FD->getType(), CGM, TSC); + } + + if (const VarDecl *VD = dyn_cast<VarDecl>(D)) { + if (VD->getLanguageLinkage() != CLanguageLinkage) + return false; + QualType QT = VD->getType().getCanonicalType(); + if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) { + // Global ArrayTypes are given a size of '*' if the size is unknown. + // The Qualifiers should be attached to the type rather than the array. + // Thus we don't call appendQualifier() here. + return appendArrayType(Enc, QT, AT, CGM, TSC, "*"); + } + return appendType(Enc, QT, CGM, TSC); + } + return false; +} + +std::unique_ptr<TargetCodeGenInfo> +CodeGen::createXCoreTargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique<XCoreTargetCodeGenInfo>(CGM.getTypes()); +} |
