diff options
Diffstat (limited to 'clang/lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | clang/lib/CodeGen/TargetInfo.cpp | 300 |
1 files changed, 195 insertions, 105 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index c2c7b8bf653b..682ef18da73b 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -17,10 +17,12 @@ #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" +#include "clang/AST/Attr.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -28,7 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> // std::sort +#include <algorithm> // std::sort using namespace clang; using namespace CodeGen; @@ -778,6 +780,12 @@ public: B.addAttribute("wasm-import-name", Attr->getImportName()); Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } + if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-export-name", Attr->getExportName()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } } if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { @@ -989,11 +997,13 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { /// Similar to llvm::CCState, but for Clang. struct CCState { - CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {} + CCState(CGFunctionInfo &FI) + : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} - unsigned CC; - unsigned FreeRegs; - unsigned FreeSSERegs; + llvm::SmallBitVector IsPreassigned; + unsigned CC = CallingConv::CC_C; + unsigned FreeRegs = 0; + unsigned FreeSSERegs = 0; }; enum { @@ -1064,8 +1074,7 @@ class X86_32ABIInfo : public SwiftABIInfo { void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; - void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, - bool &UsedInAlloca) const; + void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; public: @@ -1180,6 +1189,10 @@ static void rewriteInputConstraintReferences(unsigned FirstIn, if (NumDollars % 2 != 0 && Pos < AsmString.size()) { // We have an operand reference. size_t DigitStart = Pos; + if (AsmString[DigitStart] == '{') { + OS << '{'; + ++DigitStart; + } size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); if (DigitEnd == std::string::npos) DigitEnd = AsmString.size(); @@ -1225,7 +1238,7 @@ void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( ResultTruncRegTypes.push_back(CoerceTy); // Coerce the integer by bitcasting the return slot pointer. - ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(), + ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(CGF), CoerceTy->getPointerTo())); ResultRegDests.push_back(ReturnSlot); @@ -1629,9 +1642,38 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } +void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { + // Vectorcall x86 works subtly different than in x64, so the format is + // a bit different than the x64 version. First, all vector types (not HVAs) + // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. + // This differs from the x64 implementation, where the first 6 by INDEX get + // registers. + // In the second pass over the arguments, HVAs are passed in the remaining + // vector registers if possible, or indirectly by address. The address will be + // passed in ECX/EDX if available. Any other arguments are passed according to + // the usual fastcall rules. + MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); + for (int I = 0, E = Args.size(); I < E; ++I) { + const Type *Base = nullptr; + uint64_t NumElts = 0; + const QualType &Ty = Args[I].type; + if ((Ty->isVectorType() || Ty->isBuiltinType()) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + Args[I].info = ABIArgInfo::getDirect(); + State.IsPreassigned.set(I); + } + } + } +} + ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. + bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; + bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; + bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; Ty = useFirstFieldIfTransparentUnion(Ty); @@ -1651,11 +1693,16 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_RegCall && + if ((IsRegCall || IsVectorCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; + + // Vectorcall passes HVAs directly and does not flatten them, but regcall + // does. + if (IsVectorCall) + return getDirectX86Hva(); + if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); @@ -1697,10 +1744,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, if (getContext().getTypeSize(Ty) <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( - State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall || - State.CC == llvm::CallingConv::X86_RegCall, - PaddingType); + IsFastCall || IsVectorCall || IsRegCall, PaddingType); return getIndirectResult(Ty, true, State); } @@ -1739,60 +1783,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(); } -void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, - bool &UsedInAlloca) const { - // Vectorcall x86 works subtly different than in x64, so the format is - // a bit different than the x64 version. First, all vector types (not HVAs) - // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers. - // This differs from the x64 implementation, where the first 6 by INDEX get - // registers. - // After that, integers AND HVAs are assigned Left to Right in the same pass. - // Integers are passed as ECX/EDX if one is available (in order). HVAs will - // first take up the remaining YMM/XMM registers. If insufficient registers - // remain but an integer register (ECX/EDX) is available, it will be passed - // in that, else, on the stack. - for (auto &I : FI.arguments()) { - // First pass do all the vector types. - const Type *Base = nullptr; - uint64_t NumElts = 0; - const QualType& Ty = I.type; - if ((Ty->isVectorType() || Ty->isBuiltinType()) && - isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - I.info = ABIArgInfo::getDirect(); - } else { - I.info = classifyArgumentType(Ty, State); - } - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - } - } - - for (auto &I : FI.arguments()) { - // Second pass, do the rest! - const Type *Base = nullptr; - uint64_t NumElts = 0; - const QualType& Ty = I.type; - bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts); - - if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) { - // Assign true HVAs (non vector/native FP types). - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - I.info = getDirectX86Hva(); - } else { - I.info = getIndirectResult(Ty, /*ByVal=*/false, State); - } - } else if (!IsHva) { - // Assign all Non-HVAs, so this will exclude Vector/FP args. - I.info = classifyArgumentType(Ty, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - } - } -} - void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { - CCState State(FI.getCallingConvention()); + CCState State(FI); if (IsMCUABI) State.FreeRegs = 3; else if (State.CC == llvm::CallingConv::X86_FastCall) @@ -1824,15 +1816,20 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { if (FI.isChainCall()) ++State.FreeRegs; + // For vectorcall, do a first pass over the arguments, assigning FP and vector + // arguments to XMM registers as available. + if (State.CC == llvm::CallingConv::X86_VectorCall) + runVectorCallFirstPass(FI, State); + bool UsedInAlloca = false; - if (State.CC == llvm::CallingConv::X86_VectorCall) { - computeVectorCallArgs(FI, State, UsedInAlloca); - } else { - // If not vectorcall, revert to normal behavior. - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); - } + MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); + for (int I = 0, E = Args.size(); I < E; ++I) { + // Skip arguments that have already been assigned. + if (State.IsPreassigned.test(I)) + continue; + + Args[I].info = classifyArgumentType(Args[I].type, State); + UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); } // If we needed to use inalloca for any argument, do a second pass and rewrite @@ -4991,7 +4988,7 @@ private: ABIKind getABIKind() const { return Kind; } bool isDarwinPCS() const { return Kind == DarwinPCS; } - ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, @@ -5001,7 +4998,8 @@ private: void computeInfo(CGFunctionInfo &FI) const override { if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + FI.getReturnInfo() = + classifyReturnType(FI.getReturnType(), FI.isVariadic()); for (auto &it : FI.arguments()) it.info = classifyArgumentType(it.type); @@ -5055,23 +5053,38 @@ public: const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; - llvm::Function *Fn = cast<llvm::Function>(GV); - auto Kind = CGM.getCodeGenOpts().getSignReturnAddress(); - if (Kind != CodeGenOptions::SignReturnAddressScope::None) { + CodeGenOptions::SignReturnAddressScope Scope = CGM.getCodeGenOpts().getSignReturnAddress(); + CodeGenOptions::SignReturnAddressKeyValue Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); + bool BranchTargetEnforcement = CGM.getCodeGenOpts().BranchTargetEnforcement; + if (const auto *TA = FD->getAttr<TargetAttr>()) { + ParsedTargetAttr Attr = TA->parse(); + if (!Attr.BranchProtection.empty()) { + TargetInfo::BranchProtectionInfo BPI; + StringRef Error; + (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + BPI, Error); + assert(Error.empty()); + Scope = BPI.SignReturnAddr; + Key = BPI.SignKey; + BranchTargetEnforcement = BPI.BranchTargetEnforcement; + } + } + + auto *Fn = cast<llvm::Function>(GV); + if (Scope != CodeGenOptions::SignReturnAddressScope::None) { Fn->addFnAttr("sign-return-address", - Kind == CodeGenOptions::SignReturnAddressScope::All + Scope == CodeGenOptions::SignReturnAddressScope::All ? "all" : "non-leaf"); - auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); Fn->addFnAttr("sign-return-address-key", Key == CodeGenOptions::SignReturnAddressKeyValue::AKey ? "a_key" : "b_key"); } - if (CGM.getCodeGenOpts().BranchTargetEnforcement) + if (BranchTargetEnforcement) Fn->addFnAttr("branch-target-enforcement"); } }; @@ -5184,23 +5197,24 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Alignment = getContext().getTypeUnadjustedAlign(Ty); Alignment = Alignment < 128 ? 64 : 128; } else { - Alignment = getContext().getTypeAlign(Ty); + Alignment = std::max(getContext().getTypeAlign(Ty), + (unsigned)getTarget().getPointerWidth(0)); } - Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes + Size = llvm::alignTo(Size, Alignment); // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. - if (Alignment < 128 && Size == 128) { - llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); - } - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); + llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment); + return ABIArgInfo::getDirect( + Size == Alignment ? BaseTy + : llvm::ArrayType::get(BaseTy, Size / Alignment)); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } -ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { +ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, + bool IsVariadic) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); @@ -5224,7 +5238,9 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(RetTy, Base, Members)) + if (isHomogeneousAggregate(RetTy, Base, Members) && + !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && + IsVariadic)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); @@ -5259,6 +5275,14 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; + + // arm64_32 has to be compatible with the ARM logic here, which allows huge + // vectors for some reason. + llvm::Triple Triple = getTarget().getTriple(); + if (Triple.getArch() == llvm::Triple::aarch64_32 && + Triple.isOSBinFormatMachO()) + return Size <= 32; + return Size != 64 && (Size != 128 || NumElements == 1); } return false; @@ -5550,7 +5574,8 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); - CharUnits SlotSize = CharUnits::fromQuantity(8); + uint64_t PointerSize = getTarget().getPointerWidth(0) / 8; + CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { @@ -7555,7 +7580,7 @@ public: bool shouldUseInReg(QualType Ty, CCState &State) const; void computeInfo(CGFunctionInfo &FI) const override { - CCState State(FI.getCallingConvention()); + CCState State(FI); // Lanai uses 4 registers to pass arguments unless the function has the // regparm attribute set. if (FI.getHasRegParm()) { @@ -7685,6 +7710,42 @@ private: bool isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const override; + // Coerce HIP pointer arguments from generic pointers to global ones. + llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, + unsigned ToAS) const { + // Structure types. + if (auto STy = dyn_cast<llvm::StructType>(Ty)) { + SmallVector<llvm::Type *, 8> EltTys; + bool Changed = false; + for (auto T : STy->elements()) { + auto NT = coerceKernelArgumentType(T, FromAS, ToAS); + EltTys.push_back(NT); + Changed |= (NT != T); + } + // Skip if there is no change in element types. + if (!Changed) + return STy; + if (STy->hasName()) + return llvm::StructType::create( + EltTys, (STy->getName() + ".coerce").str(), STy->isPacked()); + return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked()); + } + // Arrary types. + if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) { + auto T = ATy->getElementType(); + auto NT = coerceKernelArgumentType(T, FromAS, ToAS); + // Skip if there is no change in that element type. + if (NT == T) + return ATy; + return llvm::ArrayType::get(NT, ATy->getNumElements()); + } + // Single value types. + if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS) + return llvm::PointerType::get( + cast<llvm::PointerType>(Ty)->getElementType(), ToAS); + return Ty; + } + public: explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} @@ -7694,6 +7755,8 @@ public: ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; }; bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { @@ -7757,6 +7820,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { } } +Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + llvm_unreachable("AMDGPU does not support varargs"); +} + ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) { // Records with non-trivial destructors/copy-constructors should not be @@ -7805,14 +7873,22 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { // TODO: Can we omit empty structs? - // Coerce single element structs to its element. + llvm::Type *LTy = nullptr; if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) - return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + LTy = CGT.ConvertType(QualType(SeltTy, 0)); + + if (getContext().getLangOpts().HIP) { + if (!LTy) + LTy = CGT.ConvertType(Ty); + LTy = coerceKernelArgumentType( + LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), + /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); + } // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we // have to set it to false here. Other args of getDirect() are just defaults. - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, @@ -7982,8 +8058,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( } else assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { - // By default, restrict the maximum size to 256. - F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); + // By default, restrict the maximum size to a value specified by + // --gpu-max-threads-per-block=n or its default value. + std::string AttrVal = + std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { @@ -8477,7 +8556,7 @@ private: } void computeInfo(CGFunctionInfo &FI) const override { - CCState State(FI.getCallingConvention()); + CCState State(FI); // ARC uses 8 registers to pass arguments. State.FreeRegs = 8; @@ -9284,11 +9363,21 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(RetTy); // IsRetIndirect is true if classifyArgumentType indicated the value should - // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128 - // is passed direct in LLVM IR, relying on the backend lowering code to - // rewrite the argument list and pass indirectly on RV32. - bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || - getContext().getTypeSize(RetTy) > (2 * XLen); + // be passed indirect, or if the type size is a scalar greater than 2*XLen + // and not a complex type with elements <= FLen. e.g. fp128 is passed direct + // in LLVM IR, relying on the backend lowering code to rewrite the argument + // list and pass indirectly on RV32. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; + if (!IsRetIndirect && RetTy->isScalarType() && + getContext().getTypeSize(RetTy) > (2 * XLen)) { + if (RetTy->isComplexType() && FLen) { + QualType EltTy = RetTy->getAs<ComplexType>()->getElementType(); + IsRetIndirect = getContext().getTypeSize(EltTy) > FLen; + } else { + // This is a normal scalar > 2*XLen, such as fp128 on RV32. + IsRetIndirect = true; + } + } // We must track the number of GPRs used in order to conform to the RISC-V // ABI, as integer scalars passed in registers should have signext/zeroext @@ -9722,6 +9811,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new AVRTargetCodeGenInfo(Types)); case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") |