summaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/TargetInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/TargetInfo.cpp')
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp300
1 files changed, 195 insertions, 105 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index c2c7b8bf653b..682ef18da73b 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -17,10 +17,12 @@
#include "CGCXXABI.h"
#include "CGValue.h"
#include "CodeGenFunction.h"
+#include "clang/AST/Attr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -28,7 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm> // std::sort
+#include <algorithm> // std::sort
using namespace clang;
using namespace CodeGen;
@@ -778,6 +780,12 @@ public:
B.addAttribute("wasm-import-name", Attr->getImportName());
Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
}
+ if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) {
+ llvm::Function *Fn = cast<llvm::Function>(GV);
+ llvm::AttrBuilder B;
+ B.addAttribute("wasm-export-name", Attr->getExportName());
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
}
if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
@@ -989,11 +997,13 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
/// Similar to llvm::CCState, but for Clang.
struct CCState {
- CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
+ CCState(CGFunctionInfo &FI)
+ : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
- unsigned CC;
- unsigned FreeRegs;
- unsigned FreeSSERegs;
+ llvm::SmallBitVector IsPreassigned;
+ unsigned CC = CallingConv::CC_C;
+ unsigned FreeRegs = 0;
+ unsigned FreeSSERegs = 0;
};
enum {
@@ -1064,8 +1074,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const;
- void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
- bool &UsedInAlloca) const;
+ void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;
public:
@@ -1180,6 +1189,10 @@ static void rewriteInputConstraintReferences(unsigned FirstIn,
if (NumDollars % 2 != 0 && Pos < AsmString.size()) {
// We have an operand reference.
size_t DigitStart = Pos;
+ if (AsmString[DigitStart] == '{') {
+ OS << '{';
+ ++DigitStart;
+ }
size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart);
if (DigitEnd == std::string::npos)
DigitEnd = AsmString.size();
@@ -1225,7 +1238,7 @@ void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
ResultTruncRegTypes.push_back(CoerceTy);
// Coerce the integer by bitcasting the return slot pointer.
- ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(),
+ ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(CGF),
CoerceTy->getPointerTo()));
ResultRegDests.push_back(ReturnSlot);
@@ -1629,9 +1642,38 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
return true;
}
+void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
+ // Vectorcall x86 works subtly different than in x64, so the format is
+ // a bit different than the x64 version. First, all vector types (not HVAs)
+ // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
+ // This differs from the x64 implementation, where the first 6 by INDEX get
+ // registers.
+ // In the second pass over the arguments, HVAs are passed in the remaining
+ // vector registers if possible, or indirectly by address. The address will be
+ // passed in ECX/EDX if available. Any other arguments are passed according to
+ // the usual fastcall rules.
+ MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
+ for (int I = 0, E = Args.size(); I < E; ++I) {
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType &Ty = Args[I].type;
+ if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ Args[I].info = ABIArgInfo::getDirect();
+ State.IsPreassigned.set(I);
+ }
+ }
+ }
+}
+
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
+ bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
+ bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
+ bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
Ty = useFirstFieldIfTransparentUnion(Ty);
@@ -1651,11 +1693,16 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
// to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if (State.CC == llvm::CallingConv::X86_RegCall &&
+ if ((IsRegCall || IsVectorCall) &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
-
if (State.FreeSSERegs >= NumElts) {
State.FreeSSERegs -= NumElts;
+
+ // Vectorcall passes HVAs directly and does not flatten them, but regcall
+ // does.
+ if (IsVectorCall)
+ return getDirectX86Hva();
+
if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
return ABIArgInfo::getExpand();
@@ -1697,10 +1744,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
if (getContext().getTypeSize(Ty) <= 4 * 32 &&
(!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
return ABIArgInfo::getExpandWithPadding(
- State.CC == llvm::CallingConv::X86_FastCall ||
- State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall,
- PaddingType);
+ IsFastCall || IsVectorCall || IsRegCall, PaddingType);
return getIndirectResult(Ty, true, State);
}
@@ -1739,60 +1783,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
return ABIArgInfo::getDirect();
}
-void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
- bool &UsedInAlloca) const {
- // Vectorcall x86 works subtly different than in x64, so the format is
- // a bit different than the x64 version. First, all vector types (not HVAs)
- // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
- // This differs from the x64 implementation, where the first 6 by INDEX get
- // registers.
- // After that, integers AND HVAs are assigned Left to Right in the same pass.
- // Integers are passed as ECX/EDX if one is available (in order). HVAs will
- // first take up the remaining YMM/XMM registers. If insufficient registers
- // remain but an integer register (ECX/EDX) is available, it will be passed
- // in that, else, on the stack.
- for (auto &I : FI.arguments()) {
- // First pass do all the vector types.
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
- const QualType& Ty = I.type;
- if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
- isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- I.info = ABIArgInfo::getDirect();
- } else {
- I.info = classifyArgumentType(Ty, State);
- }
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- }
- }
-
- for (auto &I : FI.arguments()) {
- // Second pass, do the rest!
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
- const QualType& Ty = I.type;
- bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
-
- if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
- // Assign true HVAs (non vector/native FP types).
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- I.info = getDirectX86Hva();
- } else {
- I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
- }
- } else if (!IsHva) {
- // Assign all Non-HVAs, so this will exclude Vector/FP args.
- I.info = classifyArgumentType(Ty, State);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- }
- }
-}
-
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
- CCState State(FI.getCallingConvention());
+ CCState State(FI);
if (IsMCUABI)
State.FreeRegs = 3;
else if (State.CC == llvm::CallingConv::X86_FastCall)
@@ -1824,15 +1816,20 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (FI.isChainCall())
++State.FreeRegs;
+ // For vectorcall, do a first pass over the arguments, assigning FP and vector
+ // arguments to XMM registers as available.
+ if (State.CC == llvm::CallingConv::X86_VectorCall)
+ runVectorCallFirstPass(FI, State);
+
bool UsedInAlloca = false;
- if (State.CC == llvm::CallingConv::X86_VectorCall) {
- computeVectorCallArgs(FI, State, UsedInAlloca);
- } else {
- // If not vectorcall, revert to normal behavior.
- for (auto &I : FI.arguments()) {
- I.info = classifyArgumentType(I.type, State);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- }
+ MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
+ for (int I = 0, E = Args.size(); I < E; ++I) {
+ // Skip arguments that have already been assigned.
+ if (State.IsPreassigned.test(I))
+ continue;
+
+ Args[I].info = classifyArgumentType(Args[I].type, State);
+ UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
}
// If we needed to use inalloca for any argument, do a second pass and rewrite
@@ -4991,7 +4988,7 @@ private:
ABIKind getABIKind() const { return Kind; }
bool isDarwinPCS() const { return Kind == DarwinPCS; }
- ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
@@ -5001,7 +4998,8 @@ private:
void computeInfo(CGFunctionInfo &FI) const override {
if (!::classifyReturnType(getCXXABI(), FI, *this))
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ FI.getReturnInfo() =
+ classifyReturnType(FI.getReturnType(), FI.isVariadic());
for (auto &it : FI.arguments())
it.info = classifyArgumentType(it.type);
@@ -5055,23 +5053,38 @@ public:
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
- llvm::Function *Fn = cast<llvm::Function>(GV);
- auto Kind = CGM.getCodeGenOpts().getSignReturnAddress();
- if (Kind != CodeGenOptions::SignReturnAddressScope::None) {
+ CodeGenOptions::SignReturnAddressScope Scope = CGM.getCodeGenOpts().getSignReturnAddress();
+ CodeGenOptions::SignReturnAddressKeyValue Key = CGM.getCodeGenOpts().getSignReturnAddressKey();
+ bool BranchTargetEnforcement = CGM.getCodeGenOpts().BranchTargetEnforcement;
+ if (const auto *TA = FD->getAttr<TargetAttr>()) {
+ ParsedTargetAttr Attr = TA->parse();
+ if (!Attr.BranchProtection.empty()) {
+ TargetInfo::BranchProtectionInfo BPI;
+ StringRef Error;
+ (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
+ BPI, Error);
+ assert(Error.empty());
+ Scope = BPI.SignReturnAddr;
+ Key = BPI.SignKey;
+ BranchTargetEnforcement = BPI.BranchTargetEnforcement;
+ }
+ }
+
+ auto *Fn = cast<llvm::Function>(GV);
+ if (Scope != CodeGenOptions::SignReturnAddressScope::None) {
Fn->addFnAttr("sign-return-address",
- Kind == CodeGenOptions::SignReturnAddressScope::All
+ Scope == CodeGenOptions::SignReturnAddressScope::All
? "all"
: "non-leaf");
- auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey();
Fn->addFnAttr("sign-return-address-key",
Key == CodeGenOptions::SignReturnAddressKeyValue::AKey
? "a_key"
: "b_key");
}
- if (CGM.getCodeGenOpts().BranchTargetEnforcement)
+ if (BranchTargetEnforcement)
Fn->addFnAttr("branch-target-enforcement");
}
};
@@ -5184,23 +5197,24 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
Alignment = getContext().getTypeUnadjustedAlign(Ty);
Alignment = Alignment < 128 ? 64 : 128;
} else {
- Alignment = getContext().getTypeAlign(Ty);
+ Alignment = std::max(getContext().getTypeAlign(Ty),
+ (unsigned)getTarget().getPointerWidth(0));
}
- Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
+ Size = llvm::alignTo(Size, Alignment);
// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
- if (Alignment < 128 && Size == 128) {
- llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
- return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
- }
- return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+ llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
+ return ABIArgInfo::getDirect(
+ Size == Alignment ? BaseTy
+ : llvm::ArrayType::get(BaseTy, Size / Alignment));
}
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}
-ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
+ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
+ bool IsVariadic) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
@@ -5224,7 +5238,9 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
const Type *Base = nullptr;
uint64_t Members = 0;
- if (isHomogeneousAggregate(RetTy, Base, Members))
+ if (isHomogeneousAggregate(RetTy, Base, Members) &&
+ !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
+ IsVariadic))
// Homogeneous Floating-point Aggregates (HFAs) are returned directly.
return ABIArgInfo::getDirect();
@@ -5259,6 +5275,14 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
// NumElements should be power of 2.
if (!llvm::isPowerOf2_32(NumElements))
return true;
+
+ // arm64_32 has to be compatible with the ARM logic here, which allows huge
+ // vectors for some reason.
+ llvm::Triple Triple = getTarget().getTriple();
+ if (Triple.getArch() == llvm::Triple::aarch64_32 &&
+ Triple.isOSBinFormatMachO())
+ return Size <= 32;
+
return Size != 64 && (Size != 128 || NumElements == 1);
}
return false;
@@ -5550,7 +5574,8 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
- CharUnits SlotSize = CharUnits::fromQuantity(8);
+ uint64_t PointerSize = getTarget().getPointerWidth(0) / 8;
+ CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
@@ -7555,7 +7580,7 @@ public:
bool shouldUseInReg(QualType Ty, CCState &State) const;
void computeInfo(CGFunctionInfo &FI) const override {
- CCState State(FI.getCallingConvention());
+ CCState State(FI);
// Lanai uses 4 registers to pass arguments unless the function has the
// regparm attribute set.
if (FI.getHasRegParm()) {
@@ -7685,6 +7710,42 @@ private:
bool isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const override;
+ // Coerce HIP pointer arguments from generic pointers to global ones.
+ llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
+ unsigned ToAS) const {
+ // Structure types.
+ if (auto STy = dyn_cast<llvm::StructType>(Ty)) {
+ SmallVector<llvm::Type *, 8> EltTys;
+ bool Changed = false;
+ for (auto T : STy->elements()) {
+ auto NT = coerceKernelArgumentType(T, FromAS, ToAS);
+ EltTys.push_back(NT);
+ Changed |= (NT != T);
+ }
+ // Skip if there is no change in element types.
+ if (!Changed)
+ return STy;
+ if (STy->hasName())
+ return llvm::StructType::create(
+ EltTys, (STy->getName() + ".coerce").str(), STy->isPacked());
+ return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked());
+ }
+ // Arrary types.
+ if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) {
+ auto T = ATy->getElementType();
+ auto NT = coerceKernelArgumentType(T, FromAS, ToAS);
+ // Skip if there is no change in that element type.
+ if (NT == T)
+ return ATy;
+ return llvm::ArrayType::get(NT, ATy->getNumElements());
+ }
+ // Single value types.
+ if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS)
+ return llvm::PointerType::get(
+ cast<llvm::PointerType>(Ty)->getElementType(), ToAS);
+ return Ty;
+ }
+
public:
explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
DefaultABIInfo(CGT) {}
@@ -7694,6 +7755,8 @@ public:
ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
};
bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
@@ -7757,6 +7820,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
}
}
+Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ llvm_unreachable("AMDGPU does not support varargs");
+}
+
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
if (isAggregateTypeForABI(RetTy)) {
// Records with non-trivial destructors/copy-constructors should not be
@@ -7805,14 +7873,22 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
// TODO: Can we omit empty structs?
- // Coerce single element structs to its element.
+ llvm::Type *LTy = nullptr;
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
- return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+ LTy = CGT.ConvertType(QualType(SeltTy, 0));
+
+ if (getContext().getLangOpts().HIP) {
+ if (!LTy)
+ LTy = CGT.ConvertType(Ty);
+ LTy = coerceKernelArgumentType(
+ LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
+ /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
+ }
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
// have to set it to false here. Other args of getDirect() are just defaults.
- return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+ return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
@@ -7982,8 +8058,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
} else
assert(Max == 0 && "Max must be zero");
} else if (IsOpenCLKernel || IsHIPKernel) {
- // By default, restrict the maximum size to 256.
- F->addFnAttr("amdgpu-flat-work-group-size", "1,256");
+ // By default, restrict the maximum size to a value specified by
+ // --gpu-max-threads-per-block=n or its default value.
+ std::string AttrVal =
+ std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
+ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
}
if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
@@ -8477,7 +8556,7 @@ private:
}
void computeInfo(CGFunctionInfo &FI) const override {
- CCState State(FI.getCallingConvention());
+ CCState State(FI);
// ARC uses 8 registers to pass arguments.
State.FreeRegs = 8;
@@ -9284,11 +9363,21 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
FI.getReturnInfo() = classifyReturnType(RetTy);
// IsRetIndirect is true if classifyArgumentType indicated the value should
- // be passed indirect or if the type size is greater than 2*xlen. e.g. fp128
- // is passed direct in LLVM IR, relying on the backend lowering code to
- // rewrite the argument list and pass indirectly on RV32.
- bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect ||
- getContext().getTypeSize(RetTy) > (2 * XLen);
+ // be passed indirect, or if the type size is a scalar greater than 2*XLen
+ // and not a complex type with elements <= FLen. e.g. fp128 is passed direct
+ // in LLVM IR, relying on the backend lowering code to rewrite the argument
+ // list and pass indirectly on RV32.
+ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
+ if (!IsRetIndirect && RetTy->isScalarType() &&
+ getContext().getTypeSize(RetTy) > (2 * XLen)) {
+ if (RetTy->isComplexType() && FLen) {
+ QualType EltTy = RetTy->getAs<ComplexType>()->getElementType();
+ IsRetIndirect = getContext().getTypeSize(EltTy) > FLen;
+ } else {
+ // This is a normal scalar > 2*XLen, such as fp128 on RV32.
+ IsRetIndirect = true;
+ }
+ }
// We must track the number of GPRs used in order to conform to the RISC-V
// ABI, as integer scalars passed in registers should have signext/zeroext
@@ -9722,6 +9811,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new AVRTargetCodeGenInfo(Types));
case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be: {
AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
if (getTarget().getABI() == "darwinpcs")