aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-09-02 21:17:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-08 17:34:50 +0000
commit06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
parentcf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp401
1 files changed, 265 insertions, 136 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
index ee5b76ab2120..bd272e016e92 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
@@ -25,13 +25,13 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/Basic/CodeGenOptions.h"
-#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
@@ -1286,7 +1286,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
//
// FIXME: Assert that we aren't truncating non-padding bits when have access
// to that information.
- Src = CGF.Builder.CreateElementBitCast(Src, Ty);
+ Src = Src.withElementType(Ty);
return CGF.Builder.CreateLoad(Src);
}
@@ -1311,7 +1311,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
auto *UndefVec = llvm::UndefValue::get(ScalableDst);
auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
llvm::Value *Result = CGF.Builder.CreateInsertVector(
- ScalableDst, UndefVec, Load, Zero, "castScalableSve");
+ ScalableDst, UndefVec, Load, Zero, "cast.scalable");
if (NeedsBitcast)
Result = CGF.Builder.CreateBitCast(Result, OrigType);
return Result;
@@ -1396,7 +1396,7 @@ static void CreateCoercedStore(llvm::Value *Src,
if (isa<llvm::ScalableVectorType>(SrcTy) ||
isa<llvm::ScalableVectorType>(DstTy) ||
SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
- Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
+ Dst = Dst.withElementType(SrcTy);
CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
@@ -1420,10 +1420,10 @@ static void CreateCoercedStore(llvm::Value *Src,
static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr,
const ABIArgInfo &info) {
if (unsigned offset = info.getDirectOffset()) {
- addr = CGF.Builder.CreateElementBitCast(addr, CGF.Int8Ty);
+ addr = addr.withElementType(CGF.Int8Ty);
addr = CGF.Builder.CreateConstInBoundsByteGEP(addr,
CharUnits::fromQuantity(offset));
- addr = CGF.Builder.CreateElementBitCast(addr, info.getCoerceToType());
+ addr = addr.withElementType(info.getCoerceToType());
}
return addr;
}
@@ -1638,9 +1638,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
if (retAI.getInAllocaSRet()) {
// sret things on win32 aren't void, they return the sret pointer.
QualType ret = FI.getReturnType();
- llvm::Type *ty = ConvertType(ret);
unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret);
- resultType = llvm::PointerType::get(ty, addressSpace);
+ resultType = llvm::PointerType::get(getLLVMContext(), addressSpace);
} else {
resultType = llvm::Type::getVoidTy(getLLVMContext());
}
@@ -1662,18 +1661,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
// Add type for sret argument.
if (IRFunctionArgs.hasSRetArg()) {
QualType Ret = FI.getReturnType();
- llvm::Type *Ty = ConvertType(Ret);
unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret);
ArgTypes[IRFunctionArgs.getSRetArgNo()] =
- llvm::PointerType::get(Ty, AddressSpace);
+ llvm::PointerType::get(getLLVMContext(), AddressSpace);
}
// Add type for inalloca argument.
- if (IRFunctionArgs.hasInallocaArg()) {
- auto ArgStruct = FI.getArgStruct();
- assert(ArgStruct);
- ArgTypes[IRFunctionArgs.getInallocaArgNo()] = ArgStruct->getPointerTo();
- }
+ if (IRFunctionArgs.hasInallocaArg())
+ ArgTypes[IRFunctionArgs.getInallocaArgNo()] =
+ llvm::PointerType::getUnqual(getLLVMContext());
// Add in all of the required arguments.
unsigned ArgNo = 0;
@@ -1696,20 +1692,17 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
assert(NumIRArgs == 0);
break;
- case ABIArgInfo::Indirect: {
+ case ABIArgInfo::Indirect:
assert(NumIRArgs == 1);
// indirect arguments are always on the stack, which is alloca addr space.
- llvm::Type *LTy = ConvertTypeForMem(it->type);
- ArgTypes[FirstIRArg] = LTy->getPointerTo(
- CGM.getDataLayout().getAllocaAddrSpace());
+ ArgTypes[FirstIRArg] = llvm::PointerType::get(
+ getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace());
break;
- }
- case ABIArgInfo::IndirectAliased: {
+ case ABIArgInfo::IndirectAliased:
assert(NumIRArgs == 1);
- llvm::Type *LTy = ConvertTypeForMem(it->type);
- ArgTypes[FirstIRArg] = LTy->getPointerTo(ArgInfo.getIndirectAddrSpace());
+ ArgTypes[FirstIRArg] = llvm::PointerType::get(
+ getLLVMContext(), ArgInfo.getIndirectAddrSpace());
break;
- }
case ABIArgInfo::Extend:
case ABIArgInfo::Direct: {
// Fast-isel and the optimizer generally like scalar values better than
@@ -1752,7 +1745,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
+ const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
if (!isFuncTypeConvertible(FPT))
return llvm::StructType::get(getLLVMContext());
@@ -1830,10 +1823,33 @@ static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy,
Module.getLangOpts().Sanitize.has(SanitizerKind::Return);
}
-void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
- bool HasOptnone,
- bool AttrOnCallSite,
- llvm::AttrBuilder &FuncAttrs) {
+/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
+/// requested denormal behavior, accounting for the overriding behavior of the
+/// -f32 case.
+static void addDenormalModeAttrs(llvm::DenormalMode FPDenormalMode,
+ llvm::DenormalMode FP32DenormalMode,
+ llvm::AttrBuilder &FuncAttrs) {
+ if (FPDenormalMode != llvm::DenormalMode::getDefault())
+ FuncAttrs.addAttribute("denormal-fp-math", FPDenormalMode.str());
+
+ if (FP32DenormalMode != FPDenormalMode && FP32DenormalMode.isValid())
+ FuncAttrs.addAttribute("denormal-fp-math-f32", FP32DenormalMode.str());
+}
+
+/// Add default attributes to a function, which have merge semantics under
+/// -mlink-builtin-bitcode and should not simply overwrite any existing
+/// attributes in the linked library.
+static void
+addMergableDefaultFunctionAttributes(const CodeGenOptions &CodeGenOpts,
+ llvm::AttrBuilder &FuncAttrs) {
+ addDenormalModeAttrs(CodeGenOpts.FPDenormalMode, CodeGenOpts.FP32DenormalMode,
+ FuncAttrs);
+}
+
+static void getTrivialDefaultFunctionAttributes(
+ StringRef Name, bool HasOptnone, const CodeGenOptions &CodeGenOpts,
+ const LangOptions &LangOpts, bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs) {
// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
if (!HasOptnone) {
if (CodeGenOpts.OptimizeSize)
@@ -1875,15 +1891,6 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
if (CodeGenOpts.NullPointerIsValid)
FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid);
- if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE())
- FuncAttrs.addAttribute("denormal-fp-math",
- CodeGenOpts.FPDenormalMode.str());
- if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) {
- FuncAttrs.addAttribute(
- "denormal-fp-math-f32",
- CodeGenOpts.FP32DenormalMode.str());
- }
-
if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore)
FuncAttrs.addAttribute("no-trapping-math", "true");
@@ -1962,7 +1969,7 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
}
}
- if (getLangOpts().assumeFunctionsAreConvergent()) {
+ if (LangOpts.assumeFunctionsAreConvergent()) {
// Conservatively, mark all functions and calls in CUDA and OpenCL as
// convergent (meaning, they may call an intrinsically convergent op, such
// as __syncthreads() / barrier(), and so can't have certain optimizations
@@ -1972,10 +1979,9 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
}
// TODO: NoUnwind attribute should be added for other GPU modes HIP,
- // SYCL, OpenMP offload. AFAIK, none of them support exceptions in device
- // code.
- if ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice) ||
- getLangOpts().OpenCL) {
+ // OpenMP offload. AFAIK, neither of them support exceptions in device code.
+ if ((LangOpts.CUDA && LangOpts.CUDAIsDevice) || LangOpts.OpenCL ||
+ LangOpts.SYCLIsDevice) {
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
@@ -1986,6 +1992,98 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
}
}
+/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as
+/// though we had emitted it ourselves. We remove any attributes on F that
+/// conflict with the attributes we add here.
+static void mergeDefaultFunctionDefinitionAttributes(
+ llvm::Function &F, const CodeGenOptions CodeGenOpts,
+ const LangOptions &LangOpts, const TargetOptions &TargetOpts,
+ bool WillInternalize) {
+
+ llvm::AttrBuilder FuncAttrs(F.getContext());
+ // Here we only extract the options that are relevant compared to the version
+ // from GetCPUAndFeaturesAttributes.
+ if (!TargetOpts.CPU.empty())
+ FuncAttrs.addAttribute("target-cpu", TargetOpts.CPU);
+ if (!TargetOpts.TuneCPU.empty())
+ FuncAttrs.addAttribute("tune-cpu", TargetOpts.TuneCPU);
+
+ ::getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
+ CodeGenOpts, LangOpts,
+ /*AttrOnCallSite=*/false, FuncAttrs);
+
+ if (!WillInternalize && F.isInterposable()) {
+ // Do not promote "dynamic" denormal-fp-math to this translation unit's
+ // setting for weak functions that won't be internalized. The user has no
+ // real control for how builtin bitcode is linked, so we shouldn't assume
+ // later copies will use a consistent mode.
+ F.addFnAttrs(FuncAttrs);
+ return;
+ }
+
+ llvm::AttributeMask AttrsToRemove;
+
+ llvm::DenormalMode DenormModeToMerge = F.getDenormalModeRaw();
+ llvm::DenormalMode DenormModeToMergeF32 = F.getDenormalModeF32Raw();
+ llvm::DenormalMode Merged =
+ CodeGenOpts.FPDenormalMode.mergeCalleeMode(DenormModeToMerge);
+ llvm::DenormalMode MergedF32 = CodeGenOpts.FP32DenormalMode;
+
+ if (DenormModeToMergeF32.isValid()) {
+ MergedF32 =
+ CodeGenOpts.FP32DenormalMode.mergeCalleeMode(DenormModeToMergeF32);
+ }
+
+ if (Merged == llvm::DenormalMode::getDefault()) {
+ AttrsToRemove.addAttribute("denormal-fp-math");
+ } else if (Merged != DenormModeToMerge) {
+ // Overwrite existing attribute
+ FuncAttrs.addAttribute("denormal-fp-math",
+ CodeGenOpts.FPDenormalMode.str());
+ }
+
+ if (MergedF32 == llvm::DenormalMode::getDefault()) {
+ AttrsToRemove.addAttribute("denormal-fp-math-f32");
+ } else if (MergedF32 != DenormModeToMergeF32) {
+ // Overwrite existing attribute
+ FuncAttrs.addAttribute("denormal-fp-math-f32",
+ CodeGenOpts.FP32DenormalMode.str());
+ }
+
+ F.removeFnAttrs(AttrsToRemove);
+ addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);
+ F.addFnAttrs(FuncAttrs);
+}
+
+void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes(
+ llvm::Function &F, const CodeGenOptions CodeGenOpts,
+ const LangOptions &LangOpts, const TargetOptions &TargetOpts,
+ bool WillInternalize) {
+
+ ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts,
+ TargetOpts, WillInternalize);
+}
+
+void CodeGenModule::getTrivialDefaultFunctionAttributes(
+ StringRef Name, bool HasOptnone, bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs) {
+ ::getTrivialDefaultFunctionAttributes(Name, HasOptnone, getCodeGenOpts(),
+ getLangOpts(), AttrOnCallSite,
+ FuncAttrs);
+}
+
+void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
+ bool HasOptnone,
+ bool AttrOnCallSite,
+ llvm::AttrBuilder &FuncAttrs) {
+ getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite,
+ FuncAttrs);
+ // If we're just getting the default, get the default values for mergeable
+ // attributes.
+ if (!AttrOnCallSite)
+ addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs);
+}
+
void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {
llvm::AttrBuilder FuncAttrs(F.getContext());
getDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
@@ -1994,8 +2092,17 @@ void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {
F.addFnAttrs(FuncAttrs);
}
+/// Apply default attributes to \p F, accounting for merge semantics of
+/// attributes that should not overwrite existing attributes.
+void CodeGenModule::mergeDefaultFunctionDefinitionAttributes(
+ llvm::Function &F, bool WillInternalize) {
+ ::mergeDefaultFunctionDefinitionAttributes(F, getCodeGenOpts(), getLangOpts(),
+ getTarget().getTargetOpts(),
+ WillInternalize);
+}
+
void CodeGenModule::addDefaultFunctionDefinitionAttributes(
- llvm::AttrBuilder &attrs) {
+ llvm::AttrBuilder &attrs) {
getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false,
/*for call*/ false, attrs);
GetCPUAndFeaturesAttributes(GlobalDecl(), attrs);
@@ -2107,6 +2214,39 @@ static bool IsArgumentMaybeUndef(const Decl *TargetDecl,
return false;
}
+/// Test if it's legal to apply nofpclass for the given parameter type and it's
+/// lowered IR type.
+static bool canApplyNoFPClass(const ABIArgInfo &AI, QualType ParamType,
+ bool IsReturn) {
+ // Should only apply to FP types in the source, not ABI promoted.
+ if (!ParamType->hasFloatingRepresentation())
+ return false;
+
+ // The promoted-to IR type also needs to support nofpclass.
+ llvm::Type *IRTy = AI.getCoerceToType();
+ if (llvm::AttributeFuncs::isNoFPClassCompatibleType(IRTy))
+ return true;
+
+ if (llvm::StructType *ST = dyn_cast<llvm::StructType>(IRTy)) {
+ return !IsReturn && AI.getCanBeFlattened() &&
+ llvm::all_of(ST->elements(), [](llvm::Type *Ty) {
+ return llvm::AttributeFuncs::isNoFPClassCompatibleType(Ty);
+ });
+ }
+
+ return false;
+}
+
+/// Return the nofpclass mask that can be applied to floating-point parameters.
+static llvm::FPClassTest getNoFPClassTestMask(const LangOptions &LangOpts) {
+ llvm::FPClassTest Mask = llvm::fcNone;
+ if (LangOpts.NoHonorInfs)
+ Mask |= llvm::fcInf;
+ if (LangOpts.NoHonorNaNs)
+ Mask |= llvm::fcNan;
+ return Mask;
+}
+
/// Construct the IR attribute list of a function or call.
///
/// When adding an attribute, please consider where it should be handled:
@@ -2202,6 +2342,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
NBA = Fn->getAttr<NoBuiltinAttr>();
}
+ }
+
+ if (isa<FunctionDecl>(TargetDecl) || isa<VarDecl>(TargetDecl)) {
// Only place nomerge attribute on call sites, never functions. This
// allows it to work on indirect virtual function calls.
if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>())
@@ -2374,6 +2517,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
case ABIArgInfo::Direct:
if (RetAI.getInReg())
RetAttrs.addAttribute(llvm::Attribute::InReg);
+
+ if (canApplyNoFPClass(RetAI, RetTy, true))
+ RetAttrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts()));
+
break;
case ABIArgInfo::Ignore:
break;
@@ -2512,8 +2659,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
else if (AI.getInReg())
Attrs.addAttribute(llvm::Attribute::InReg);
Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));
- break;
+ if (canApplyNoFPClass(AI, ParamType, false))
+ Attrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts()));
+ break;
case ABIArgInfo::Indirect: {
if (AI.getInReg())
Attrs.addAttribute(llvm::Attribute::InReg);
@@ -2745,13 +2894,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// If we're using inalloca, all the memory arguments are GEPs off of the last
// parameter, which is a pointer to the complete memory area.
Address ArgStruct = Address::invalid();
- if (IRFunctionArgs.hasInallocaArg()) {
+ if (IRFunctionArgs.hasInallocaArg())
ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()),
FI.getArgStruct(), FI.getArgStructAlignment());
- assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo());
- }
-
// Name the struct return parameter.
if (IRFunctionArgs.hasSRetArg()) {
auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo());
@@ -2807,7 +2953,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
case ABIArgInfo::IndirectAliased: {
assert(NumIRArgs == 1);
Address ParamAddr = Address(Fn->getArg(FirstIRArg), ConvertTypeForMem(Ty),
- ArgI.getIndirectAlign());
+ ArgI.getIndirectAlign(), KnownNonNull);
if (!hasScalarEvaluationKind(Ty)) {
// Aggregates and complex variables are accessed by reference. All we
@@ -3000,7 +3146,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(NumIRArgs == 1);
Coerced->setName(Arg->getName() + ".coerce");
ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(
- VecTyTo, Coerced, Zero, "castFixedSve")));
+ VecTyTo, Coerced, Zero, "cast.fixed")));
break;
}
}
@@ -3017,30 +3163,51 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
STy->getNumElements() > 1) {
- uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
- llvm::Type *DstTy = Ptr.getElementType();
- uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+ llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
+ llvm::TypeSize PtrElementSize =
+ CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());
+ if (StructSize.isScalable()) {
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ "ABI only supports structure with homogeneous scalable vector "
+ "type");
+ assert(StructSize == PtrElementSize &&
+ "Only allow non-fractional movement of structure with"
+ "homogeneous scalable vector type");
+ assert(STy->getNumElements() == NumIRArgs);
+
+ llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ auto *AI = Fn->getArg(FirstIRArg + i);
+ AI->setName(Arg->getName() + ".coerce" + Twine(i));
+ LoadedStructValue =
+ Builder.CreateInsertValue(LoadedStructValue, AI, i);
+ }
- Address AddrToStoreInto = Address::invalid();
- if (SrcSize <= DstSize) {
- AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+ Builder.CreateStore(LoadedStructValue, Ptr);
} else {
- AddrToStoreInto =
- CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
- }
+ uint64_t SrcSize = StructSize.getFixedValue();
+ uint64_t DstSize = PtrElementSize.getFixedValue();
+
+ Address AddrToStoreInto = Address::invalid();
+ if (SrcSize <= DstSize) {
+ AddrToStoreInto = Ptr.withElementType(STy);
+ } else {
+ AddrToStoreInto =
+ CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
+ }
- assert(STy->getNumElements() == NumIRArgs);
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- auto AI = Fn->getArg(FirstIRArg + i);
- AI->setName(Arg->getName() + ".coerce" + Twine(i));
- Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
- Builder.CreateStore(AI, EltPtr);
- }
+ assert(STy->getNumElements() == NumIRArgs);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ auto AI = Fn->getArg(FirstIRArg + i);
+ AI->setName(Arg->getName() + ".coerce" + Twine(i));
+ Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
+ Builder.CreateStore(AI, EltPtr);
+ }
- if (SrcSize > DstSize) {
- Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+ if (SrcSize > DstSize) {
+ Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+ }
}
-
} else {
// Simple case, just do a coerced store of the argument into the alloca.
assert(NumIRArgs == 1);
@@ -3068,7 +3235,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
ArgVals.push_back(ParamValue::forIndirect(alloca));
auto coercionType = ArgI.getCoerceAndExpandType();
- alloca = Builder.CreateElementBitCast(alloca, coercionType);
+ alloca = alloca.withElementType(coercionType);
unsigned argIndex = FirstIRArg;
for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
@@ -3325,8 +3492,9 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
// single-predecessors chain from the current insertion point.
llvm::BasicBlock *StoreBB = store->getParent();
llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
+ llvm::SmallPtrSet<llvm::BasicBlock *, 4> SeenBBs;
while (IP != StoreBB) {
- if (!(IP = IP->getSinglePredecessor()))
+ if (!SeenBBs.insert(IP).second || !(IP = IP->getSinglePredecessor()))
return nullptr;
}
@@ -3669,7 +3837,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
// Load all of the coerced elements out into results.
llvm::SmallVector<llvm::Value*, 4> results;
- Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType);
+ Address addr = ReturnValue.withElementType(coercionType);
for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
auto coercedEltType = coercionType->getElementType(i);
if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))
@@ -3795,8 +3963,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,
// FIXME: Generate IR in one pass, rather than going back and fixing up these
// placeholders.
llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
- llvm::Type *IRPtrTy = IRTy->getPointerTo();
- llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy->getPointerTo());
+ llvm::Type *IRPtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+ llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy);
// FIXME: When we generate this IR in one pass, we shouldn't need
// this win32-specific alignment hack.
@@ -4764,7 +4932,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// the proper cpu features (and it won't cause code generation issues due to
// function based code generation).
if (TargetDecl->hasAttr<AlwaysInlineAttr>() &&
- TargetDecl->hasAttr<TargetAttr>())
+ (TargetDecl->hasAttr<TargetAttr>() ||
+ (CurFuncDecl && CurFuncDecl->hasAttr<TargetAttr>())))
checkTargetFeatures(Loc, FD);
// Some architectures (such as x86-64) have the ABI changed based on
@@ -4773,25 +4942,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), FD, CallArgs);
}
-#ifndef NDEBUG
- if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) {
- // For an inalloca varargs function, we don't expect CallInfo to match the
- // function pointer's type, because the inalloca struct a will have extra
- // fields in it for the varargs parameters. Code later in this function
- // bitcasts the function pointer to the type derived from CallInfo.
- //
- // In other cases, we assert that the types match up (until pointers stop
- // having pointee types).
- if (Callee.isVirtual())
- assert(IRFuncTy == Callee.getVirtualFunctionType());
- else {
- llvm::PointerType *PtrTy =
- llvm::cast<llvm::PointerType>(Callee.getFunctionPointer()->getType());
- assert(PtrTy->isOpaqueOrPointeeTypeMatches(IRFuncTy));
- }
- }
-#endif
-
// 1. Set up the arguments.
// If we're using inalloca, insert the allocation after the stack save.
@@ -4913,10 +5063,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Store the RValue into the argument struct.
Address Addr =
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
- // There are some cases where a trivial bitcast is not avoidable. The
- // definition of a type later in a translation unit may change it's type
- // from {}* to (%struct.foo*)*.
- Addr = Builder.CreateElementBitCast(Addr, ConvertTypeForMem(I->Ty));
+ Addr = Addr.withElementType(ConvertTypeForMem(I->Ty));
I->copyInto(*this, Addr);
}
break;
@@ -5010,9 +5157,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
I->copyInto(*this, AI);
} else {
// Skip the extra memcpy call.
- auto *T = llvm::PointerType::getWithSamePointeeType(
- cast<llvm::PointerType>(V->getType()),
- CGM.getDataLayout().getAllocaAddrSpace());
+ auto *T = llvm::PointerType::get(
+ CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace());
llvm::Value *Val = getTargetHooks().performAddrSpaceCast(
*this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
@@ -5112,7 +5258,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
Src = TempAlloca;
} else {
- Src = Builder.CreateElementBitCast(Src, STy);
+ Src = Src.withElementType(STy);
}
assert(NumIRArgs == STy->getNumElements());
@@ -5176,7 +5322,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Builder.CreateStore(RV.getScalarVal(), addr);
}
- addr = Builder.CreateElementBitCast(addr, coercionType);
+ addr = addr.withElementType(coercionType);
unsigned IRArgPos = FirstIRArg;
for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
@@ -5212,35 +5358,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If we're using inalloca, set up that argument.
if (ArgMemory.isValid()) {
llvm::Value *Arg = ArgMemory.getPointer();
- if (CallInfo.isVariadic()) {
- // When passing non-POD arguments by value to variadic functions, we will
- // end up with a variadic prototype and an inalloca call site. In such
- // cases, we can't do any parameter mismatch checks. Give up and bitcast
- // the callee.
- unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace();
- CalleePtr =
- Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS));
- } else {
- llvm::Type *LastParamTy =
- IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);
- if (Arg->getType() != LastParamTy) {
-#ifndef NDEBUG
- // Assert that these structs have equivalent element types.
- llvm::StructType *FullTy = CallInfo.getArgStruct();
- if (!LastParamTy->isOpaquePointerTy()) {
- llvm::StructType *DeclaredTy = cast<llvm::StructType>(
- LastParamTy->getNonOpaquePointerElementType());
- assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
- for (auto DI = DeclaredTy->element_begin(),
- DE = DeclaredTy->element_end(),
- FI = FullTy->element_begin();
- DI != DE; ++DI, ++FI)
- assert(*DI == *FI);
- }
-#endif
- Arg = Builder.CreateBitCast(Arg, LastParamTy);
- }
- }
assert(IRFunctionArgs.hasInallocaArg());
IRCallArgs[IRFunctionArgs.getInallocaArgNo()] = Arg;
}
@@ -5560,8 +5677,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::CoerceAndExpand: {
auto coercionType = RetAI.getCoerceAndExpandType();
- Address addr = SRetPtr;
- addr = Builder.CreateElementBitCast(addr, coercionType);
+ Address addr = SRetPtr.withElementType(coercionType);
assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());
bool requiresExtract = isa<llvm::StructType>(CI->getType());
@@ -5578,7 +5694,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(unpaddedIndex == 0);
Builder.CreateStore(elt, eltAddr);
}
- // FALLTHROUGH
[[fallthrough]];
}
@@ -5628,6 +5743,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm_unreachable("bad evaluation kind");
}
+ // If coercing a fixed vector from a scalable vector for ABI
+ // compatibility, and the types match, use the llvm.vector.extract
+ // intrinsic to perform the conversion.
+ if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
+ llvm::Value *V = CI;
+ if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(V->getType())) {
+ if (FixedDst->getElementType() == ScalableSrc->getElementType()) {
+ llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
+ V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed");
+ return RValue::get(V);
+ }
+ }
+ }
+
Address DestPtr = ReturnValue.getValue();
bool DestIsVolatile = ReturnValue.isVolatile();