diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp | 76 |
1 files changed, 62 insertions, 14 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp index 81f40011f11c..c2c7b8bf653b 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/TargetInfo.cpp @@ -833,10 +833,13 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false, + bool IsIndirect = isAggregateTypeForABI(Ty) && + !isEmptyRecord(getContext(), Ty, true) && + !isSingleElementStruct(Ty, getContext()); + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), - /*AllowHigherAlign=*/ true); + /*AllowHigherAlign=*/true); } //===----------------------------------------------------------------------===// @@ -2177,6 +2180,17 @@ class X86_64ABIInfo : public SwiftABIInfo { return true; } + // GCC classifies vectors of __int128 as memory. + bool passInt128VectorsInMem() const { + // Clang <= 9.0 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver9) + return false; + + const llvm::Triple &T = getTarget().getTriple(); + return T.isOSLinux() || T.isOSNetBSD(); + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -2657,6 +2671,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + QualType ElementType = VT->getElementType(); + + // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( + if (passInt128VectorsInMem() && Size != 128 && + (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || + ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) + return; + // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -2787,8 +2809,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); - const CXXRecordDecl *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // Classify this field. // @@ -2899,6 +2921,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; + QualType EltTy = VecTy->getElementType(); + if (passInt128VectorsInMem() && + (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || + EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) + return true; } return false; @@ -2973,14 +3000,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); - if (isa<llvm::VectorType>(IRType) || - IRType->getTypeID() == llvm::Type::FP128TyID) + if (isa<llvm::VectorType>(IRType)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (passInt128VectorsInMem() && + IRType->getVectorElementType()->isIntegerTy(128)) { + // Use a vXi64 vector. + uint64_t Size = getContext().getTypeSize(Ty); + return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); + } + + return IRType; + } + + if (IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); + // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); @@ -3030,8 +3071,8 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); - const CXXRecordDecl *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // If the base is after the span we care about, ignore it. unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); @@ -7909,8 +7950,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) || - (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && + + const bool IsOpenCLKernel = M.getLangOpts().OpenCL && + FD->hasAttr<OpenCLKernelAttr>(); + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr<CUDAGlobalAttr>(); + if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7936,6 +7981,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); + } else if (IsOpenCLKernel || IsHIPKernel) { + // By default, restrict the maximum size to 256. + F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { @@ -9329,7 +9377,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, if (const auto *RTy = Ty->getAs<RecordType>()) { // Structures with either a non-trivial destructor or a non-trivial // copy constructor are not eligible for the FP calling convention. - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, CGT.getCXXABI())) + if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; if (isEmptyRecord(getContext(), Ty, true)) return true; @@ -9390,7 +9438,7 @@ bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) - return IsCandidate = false; + return false; if (!IsCandidate) return false; if (Field1Ty && Field1Ty->isFloatingPointTy()) @@ -9484,7 +9532,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, // Complex types for the hard float ABI must be passed direct rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { - QualType EltTy = Ty->getAs<ComplexType>()->getElementType(); + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); if (getContext().getTypeSize(EltTy) <= FLen) { ArgFPRsLeft -= 2; return ABIArgInfo::getDirect(); @@ -9906,7 +9954,7 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( Builder.SetInsertPoint(BB); unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); - BlockPtr->setAlignment(BlockAlign); + BlockPtr->setAlignment(llvm::MaybeAlign(BlockAlign)); Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); llvm::SmallVector<llvm::Value *, 2> Args; |