src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-09-02 21:17:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-12-08 17:34:50 +0000
commit	06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree	62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
parent	cf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent	7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)

Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp')

-rw-r--r--

contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp

401

1 files changed, 265 insertions, 136 deletions

diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
index ee5b76ab2120..bd272e016e92 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp

@@ -25,13 +25,13 @@

#include "clang/AST/DeclCXX.h"

#include "clang/AST/DeclObjC.h"

#include "clang/Basic/CodeGenOptions.h"

-#include "clang/Basic/TargetBuiltins.h"

#include "clang/Basic/TargetInfo.h"

#include "clang/CodeGen/CGFunctionInfo.h"

#include "clang/CodeGen/SwiftCallingConv.h"

#include "llvm/ADT/StringExtras.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/Assumptions.h"

+#include "llvm/IR/AttributeMask.h"

#include "llvm/IR/Attributes.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/IR/DataLayout.h"

@@ -1286,7 +1286,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,

// FIXME: Assert that we aren't truncating non-padding bits when have access

// to that information.

- Src = CGF.Builder.CreateElementBitCast(Src, Ty);

+ Src = Src.withElementType(Ty);

return CGF.Builder.CreateLoad(Src);

}

@@ -1311,7 +1311,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,

auto *UndefVec = llvm::UndefValue::get(ScalableDst);

auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);

llvm::Value *Result = CGF.Builder.CreateInsertVector(

- ScalableDst, UndefVec, Load, Zero, "castScalableSve");

+ ScalableDst, UndefVec, Load, Zero, "cast.scalable");

if (NeedsBitcast)

Result = CGF.Builder.CreateBitCast(Result, OrigType);

return Result;

@@ -1396,7 +1396,7 @@ static void CreateCoercedStore(llvm::Value *Src,

if (isa<llvm::ScalableVectorType>(SrcTy) ||

isa<llvm::ScalableVectorType>(DstTy) ||

SrcSize.getFixedValue() <= DstSize.getFixedValue()) {

- Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);

+ Dst = Dst.withElementType(SrcTy);

CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);

} else {

// Otherwise do coercion through memory. This is stupid, but

@@ -1420,10 +1420,10 @@ static void CreateCoercedStore(llvm::Value *Src,

static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr,

const ABIArgInfo &info) {

if (unsigned offset = info.getDirectOffset()) {

- addr = CGF.Builder.CreateElementBitCast(addr, CGF.Int8Ty);

+ addr = addr.withElementType(CGF.Int8Ty);

addr = CGF.Builder.CreateConstInBoundsByteGEP(addr,

CharUnits::fromQuantity(offset));

- addr = CGF.Builder.CreateElementBitCast(addr, info.getCoerceToType());

+ addr = addr.withElementType(info.getCoerceToType());

}

return addr;

}

@@ -1638,9 +1638,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {

if (retAI.getInAllocaSRet()) {

// sret things on win32 aren't void, they return the sret pointer.

QualType ret = FI.getReturnType();

- llvm::Type *ty = ConvertType(ret);

unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret);

- resultType = llvm::PointerType::get(ty, addressSpace);

+ resultType = llvm::PointerType::get(getLLVMContext(), addressSpace);

} else {

resultType = llvm::Type::getVoidTy(getLLVMContext());

}

@@ -1662,18 +1661,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {

// Add type for sret argument.

if (IRFunctionArgs.hasSRetArg()) {

QualType Ret = FI.getReturnType();

- llvm::Type *Ty = ConvertType(Ret);

unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret);

ArgTypes[IRFunctionArgs.getSRetArgNo()] =

- llvm::PointerType::get(Ty, AddressSpace);

+ llvm::PointerType::get(getLLVMContext(), AddressSpace);

}

// Add type for inalloca argument.

- if (IRFunctionArgs.hasInallocaArg()) {

- auto ArgStruct = FI.getArgStruct();

- assert(ArgStruct);

- ArgTypes[IRFunctionArgs.getInallocaArgNo()] = ArgStruct->getPointerTo();

- }

+ if (IRFunctionArgs.hasInallocaArg())

+ ArgTypes[IRFunctionArgs.getInallocaArgNo()] =

+ llvm::PointerType::getUnqual(getLLVMContext());

// Add in all of the required arguments.

unsigned ArgNo = 0;

@@ -1696,20 +1692,17 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {

assert(NumIRArgs == 0);

break;

- case ABIArgInfo::Indirect: {

+ case ABIArgInfo::Indirect:

assert(NumIRArgs == 1);

// indirect arguments are always on the stack, which is alloca addr space.

- llvm::Type *LTy = ConvertTypeForMem(it->type);

- ArgTypes[FirstIRArg] = LTy->getPointerTo(

- CGM.getDataLayout().getAllocaAddrSpace());

+ ArgTypes[FirstIRArg] = llvm::PointerType::get(

+ getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace());

break;

- }

- case ABIArgInfo::IndirectAliased: {

+ case ABIArgInfo::IndirectAliased:

assert(NumIRArgs == 1);

- llvm::Type *LTy = ConvertTypeForMem(it->type);

- ArgTypes[FirstIRArg] = LTy->getPointerTo(ArgInfo.getIndirectAddrSpace());

+ ArgTypes[FirstIRArg] = llvm::PointerType::get(

+ getLLVMContext(), ArgInfo.getIndirectAddrSpace());

break;

- }

case ABIArgInfo::Extend:

case ABIArgInfo::Direct: {

// Fast-isel and the optimizer generally like scalar values better than

@@ -1752,7 +1745,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {

llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {

const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());

- const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();

+ const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();

if (!isFuncTypeConvertible(FPT))

return llvm::StructType::get(getLLVMContext());

@@ -1830,10 +1823,33 @@ static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy,

Module.getLangOpts().Sanitize.has(SanitizerKind::Return);

}

-void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,

- bool HasOptnone,

- bool AttrOnCallSite,

- llvm::AttrBuilder &FuncAttrs) {

+/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the

+/// requested denormal behavior, accounting for the overriding behavior of the

+/// -f32 case.

+static void addDenormalModeAttrs(llvm::DenormalMode FPDenormalMode,

+ llvm::DenormalMode FP32DenormalMode,

+ llvm::AttrBuilder &FuncAttrs) {

+ if (FPDenormalMode != llvm::DenormalMode::getDefault())

+ FuncAttrs.addAttribute("denormal-fp-math", FPDenormalMode.str());

+ if (FP32DenormalMode != FPDenormalMode && FP32DenormalMode.isValid())

+ FuncAttrs.addAttribute("denormal-fp-math-f32", FP32DenormalMode.str());

+/// Add default attributes to a function, which have merge semantics under

+/// -mlink-builtin-bitcode and should not simply overwrite any existing

+/// attributes in the linked library.

+static void

+addMergableDefaultFunctionAttributes(const CodeGenOptions &CodeGenOpts,

+ llvm::AttrBuilder &FuncAttrs) {

+ addDenormalModeAttrs(CodeGenOpts.FPDenormalMode, CodeGenOpts.FP32DenormalMode,

+ FuncAttrs);

+static void getTrivialDefaultFunctionAttributes(

+ StringRef Name, bool HasOptnone, const CodeGenOptions &CodeGenOpts,

+ const LangOptions &LangOpts, bool AttrOnCallSite,

+ llvm::AttrBuilder &FuncAttrs) {

// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.

if (!HasOptnone) {

if (CodeGenOpts.OptimizeSize)

@@ -1875,15 +1891,6 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,

if (CodeGenOpts.NullPointerIsValid)

FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid);

- if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE())

- FuncAttrs.addAttribute("denormal-fp-math",

- CodeGenOpts.FPDenormalMode.str());

- if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) {

- FuncAttrs.addAttribute(

- "denormal-fp-math-f32",

- CodeGenOpts.FP32DenormalMode.str());

- }

if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore)

FuncAttrs.addAttribute("no-trapping-math", "true");

@@ -1962,7 +1969,7 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,

}

- if (getLangOpts().assumeFunctionsAreConvergent()) {

+ if (LangOpts.assumeFunctionsAreConvergent()) {

// Conservatively, mark all functions and calls in CUDA and OpenCL as

// convergent (meaning, they may call an intrinsically convergent op, such

// as __syncthreads() / barrier(), and so can't have certain optimizations

@@ -1972,10 +1979,9 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,

}

// TODO: NoUnwind attribute should be added for other GPU modes HIP,

- // SYCL, OpenMP offload. AFAIK, none of them support exceptions in device

- // code.

- if ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice) ||

- getLangOpts().OpenCL) {

+ // OpenMP offload. AFAIK, neither of them support exceptions in device code.

+ if ((LangOpts.CUDA && LangOpts.CUDAIsDevice) || LangOpts.OpenCL ||

+ LangOpts.SYCLIsDevice) {

FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);

}

@@ -1986,6 +1992,98 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,

}

+/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as

+/// though we had emitted it ourselves. We remove any attributes on F that

+/// conflict with the attributes we add here.

+static void mergeDefaultFunctionDefinitionAttributes(

+ llvm::Function &F, const CodeGenOptions CodeGenOpts,

+ const LangOptions &LangOpts, const TargetOptions &TargetOpts,

+ bool WillInternalize) {

+ llvm::AttrBuilder FuncAttrs(F.getContext());

+ // Here we only extract the options that are relevant compared to the version

+ // from GetCPUAndFeaturesAttributes.

+ if (!TargetOpts.CPU.empty())

+ FuncAttrs.addAttribute("target-cpu", TargetOpts.CPU);

+ if (!TargetOpts.TuneCPU.empty())

+ FuncAttrs.addAttribute("tune-cpu", TargetOpts.TuneCPU);

+ ::getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(),

+ CodeGenOpts, LangOpts,

+ /*AttrOnCallSite=*/false, FuncAttrs);

+ if (!WillInternalize && F.isInterposable()) {

+ // Do not promote "dynamic" denormal-fp-math to this translation unit's

+ // setting for weak functions that won't be internalized. The user has no

+ // real control for how builtin bitcode is linked, so we shouldn't assume

+ // later copies will use a consistent mode.

+ F.addFnAttrs(FuncAttrs);

+ return;

+ }

+ llvm::AttributeMask AttrsToRemove;

+ llvm::DenormalMode DenormModeToMerge = F.getDenormalModeRaw();

+ llvm::DenormalMode DenormModeToMergeF32 = F.getDenormalModeF32Raw();

+ llvm::DenormalMode Merged =

+ CodeGenOpts.FPDenormalMode.mergeCalleeMode(DenormModeToMerge);

+ llvm::DenormalMode MergedF32 = CodeGenOpts.FP32DenormalMode;

+ if (DenormModeToMergeF32.isValid()) {

+ MergedF32 =

+ CodeGenOpts.FP32DenormalMode.mergeCalleeMode(DenormModeToMergeF32);

+ }

+ if (Merged == llvm::DenormalMode::getDefault()) {

+ AttrsToRemove.addAttribute("denormal-fp-math");

+ } else if (Merged != DenormModeToMerge) {

+ // Overwrite existing attribute

+ FuncAttrs.addAttribute("denormal-fp-math",

+ CodeGenOpts.FPDenormalMode.str());

+ }

+ if (MergedF32 == llvm::DenormalMode::getDefault()) {

+ AttrsToRemove.addAttribute("denormal-fp-math-f32");

+ } else if (MergedF32 != DenormModeToMergeF32) {

+ // Overwrite existing attribute

+ FuncAttrs.addAttribute("denormal-fp-math-f32",

+ CodeGenOpts.FP32DenormalMode.str());

+ }

+ F.removeFnAttrs(AttrsToRemove);

+ addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);

+ F.addFnAttrs(FuncAttrs);

+void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes(

+ llvm::Function &F, const CodeGenOptions CodeGenOpts,

+ const LangOptions &LangOpts, const TargetOptions &TargetOpts,

+ bool WillInternalize) {

+ ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts,

+ TargetOpts, WillInternalize);

+void CodeGenModule::getTrivialDefaultFunctionAttributes(

+ StringRef Name, bool HasOptnone, bool AttrOnCallSite,

+ llvm::AttrBuilder &FuncAttrs) {

+ ::getTrivialDefaultFunctionAttributes(Name, HasOptnone, getCodeGenOpts(),

+ getLangOpts(), AttrOnCallSite,

+ FuncAttrs);

+void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,

+ bool HasOptnone,

+ bool AttrOnCallSite,

+ llvm::AttrBuilder &FuncAttrs) {

+ getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite,

+ FuncAttrs);

+ // If we're just getting the default, get the default values for mergeable

+ // attributes.

+ if (!AttrOnCallSite)

+ addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs);

void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {

llvm::AttrBuilder FuncAttrs(F.getContext());

getDefaultFunctionAttributes(F.getName(), F.hasOptNone(),

@@ -1994,8 +2092,17 @@ void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {

F.addFnAttrs(FuncAttrs);

}

+/// Apply default attributes to \p F, accounting for merge semantics of

+/// attributes that should not overwrite existing attributes.

+void CodeGenModule::mergeDefaultFunctionDefinitionAttributes(

+ llvm::Function &F, bool WillInternalize) {

+ ::mergeDefaultFunctionDefinitionAttributes(F, getCodeGenOpts(), getLangOpts(),

+ getTarget().getTargetOpts(),

+ WillInternalize);

void CodeGenModule::addDefaultFunctionDefinitionAttributes(

- llvm::AttrBuilder &attrs) {

+ llvm::AttrBuilder &attrs) {

getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false,

/*for call*/ false, attrs);

GetCPUAndFeaturesAttributes(GlobalDecl(), attrs);

@@ -2107,6 +2214,39 @@ static bool IsArgumentMaybeUndef(const Decl *TargetDecl,

return false;

}

+/// Test if it's legal to apply nofpclass for the given parameter type and it's

+/// lowered IR type.

+static bool canApplyNoFPClass(const ABIArgInfo &AI, QualType ParamType,

+ bool IsReturn) {

+ // Should only apply to FP types in the source, not ABI promoted.

+ if (!ParamType->hasFloatingRepresentation())

+ return false;

+ // The promoted-to IR type also needs to support nofpclass.

+ llvm::Type *IRTy = AI.getCoerceToType();

+ if (llvm::AttributeFuncs::isNoFPClassCompatibleType(IRTy))

+ return true;

+ if (llvm::StructType *ST = dyn_cast<llvm::StructType>(IRTy)) {

+ return !IsReturn && AI.getCanBeFlattened() &&

+ llvm::all_of(ST->elements(), [](llvm::Type *Ty) {

+ return llvm::AttributeFuncs::isNoFPClassCompatibleType(Ty);

+ });

+ }

+ return false;

+/// Return the nofpclass mask that can be applied to floating-point parameters.

+static llvm::FPClassTest getNoFPClassTestMask(const LangOptions &LangOpts) {

+ llvm::FPClassTest Mask = llvm::fcNone;

+ if (LangOpts.NoHonorInfs)

+ Mask |= llvm::fcInf;

+ if (LangOpts.NoHonorNaNs)

+ Mask |= llvm::fcNan;

+ return Mask;

/// Construct the IR attribute list of a function or call.

///

/// When adding an attribute, please consider where it should be handled:

@@ -2202,6 +2342,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,

FuncAttrs.addAttribute(llvm::Attribute::NoReturn);

NBA = Fn->getAttr<NoBuiltinAttr>();

}

+ }

+ if (isa<FunctionDecl>(TargetDecl) || isa<VarDecl>(TargetDecl)) {

// Only place nomerge attribute on call sites, never functions. This

// allows it to work on indirect virtual function calls.

if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>())

@@ -2374,6 +2517,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,

case ABIArgInfo::Direct:

if (RetAI.getInReg())

RetAttrs.addAttribute(llvm::Attribute::InReg);

+ if (canApplyNoFPClass(RetAI, RetTy, true))

+ RetAttrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts()));

break;

case ABIArgInfo::Ignore:

break;

@@ -2512,8 +2659,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,

else if (AI.getInReg())

Attrs.addAttribute(llvm::Attribute::InReg);

Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));

- break;

+ if (canApplyNoFPClass(AI, ParamType, false))

+ Attrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts()));

+ break;

case ABIArgInfo::Indirect: {

if (AI.getInReg())

Attrs.addAttribute(llvm::Attribute::InReg);

@@ -2745,13 +2894,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

// If we're using inalloca, all the memory arguments are GEPs off of the last

// parameter, which is a pointer to the complete memory area.

Address ArgStruct = Address::invalid();

- if (IRFunctionArgs.hasInallocaArg()) {

+ if (IRFunctionArgs.hasInallocaArg())

ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()),

FI.getArgStruct(), FI.getArgStructAlignment());

- assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo());

- }

// Name the struct return parameter.

if (IRFunctionArgs.hasSRetArg()) {

auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo());

@@ -2807,7 +2953,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

case ABIArgInfo::IndirectAliased: {

assert(NumIRArgs == 1);

Address ParamAddr = Address(Fn->getArg(FirstIRArg), ConvertTypeForMem(Ty),

- ArgI.getIndirectAlign());

+ ArgI.getIndirectAlign(), KnownNonNull);

if (!hasScalarEvaluationKind(Ty)) {

// Aggregates and complex variables are accessed by reference. All we

@@ -3000,7 +3146,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

assert(NumIRArgs == 1);

Coerced->setName(Arg->getName() + ".coerce");

ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(

- VecTyTo, Coerced, Zero, "castFixedSve")));

+ VecTyTo, Coerced, Zero, "cast.fixed")));

break;

}

@@ -3017,30 +3163,51 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());

if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&

STy->getNumElements() > 1) {

- uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);

- llvm::Type *DstTy = Ptr.getElementType();

- uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);

+ llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);

+ llvm::TypeSize PtrElementSize =

+ CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());

+ if (StructSize.isScalable()) {

+ assert(STy->containsHomogeneousScalableVectorTypes() &&

+ "ABI only supports structure with homogeneous scalable vector "

+ "type");

+ assert(StructSize == PtrElementSize &&

+ "Only allow non-fractional movement of structure with"

+ "homogeneous scalable vector type");

+ assert(STy->getNumElements() == NumIRArgs);

+ llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);

+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {

+ auto *AI = Fn->getArg(FirstIRArg + i);

+ AI->setName(Arg->getName() + ".coerce" + Twine(i));

+ LoadedStructValue =

+ Builder.CreateInsertValue(LoadedStructValue, AI, i);

+ }

- Address AddrToStoreInto = Address::invalid();

- if (SrcSize <= DstSize) {

- AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);

+ Builder.CreateStore(LoadedStructValue, Ptr);

} else {

- AddrToStoreInto =

- CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");

- }

+ uint64_t SrcSize = StructSize.getFixedValue();

+ uint64_t DstSize = PtrElementSize.getFixedValue();

+ Address AddrToStoreInto = Address::invalid();

+ if (SrcSize <= DstSize) {

+ AddrToStoreInto = Ptr.withElementType(STy);

+ } else {

+ AddrToStoreInto =

+ CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");

+ }

- assert(STy->getNumElements() == NumIRArgs);

- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {

- auto AI = Fn->getArg(FirstIRArg + i);

- AI->setName(Arg->getName() + ".coerce" + Twine(i));

- Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);

- Builder.CreateStore(AI, EltPtr);

- }

+ assert(STy->getNumElements() == NumIRArgs);

+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {

+ auto AI = Fn->getArg(FirstIRArg + i);

+ AI->setName(Arg->getName() + ".coerce" + Twine(i));

+ Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);

+ Builder.CreateStore(AI, EltPtr);

+ }

- if (SrcSize > DstSize) {

- Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);

+ if (SrcSize > DstSize) {

+ Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);

+ }

}

} else {

// Simple case, just do a coerced store of the argument into the alloca.

assert(NumIRArgs == 1);

@@ -3068,7 +3235,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

ArgVals.push_back(ParamValue::forIndirect(alloca));

auto coercionType = ArgI.getCoerceAndExpandType();

- alloca = Builder.CreateElementBitCast(alloca, coercionType);

+ alloca = alloca.withElementType(coercionType);

unsigned argIndex = FirstIRArg;

for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {

@@ -3325,8 +3492,9 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {

// single-predecessors chain from the current insertion point.

llvm::BasicBlock *StoreBB = store->getParent();

llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();

+ llvm::SmallPtrSet<llvm::BasicBlock *, 4> SeenBBs;

while (IP != StoreBB) {

- if (!(IP = IP->getSinglePredecessor()))

+ if (!SeenBBs.insert(IP).second || !(IP = IP->getSinglePredecessor()))

return nullptr;

}

@@ -3669,7 +3837,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,

// Load all of the coerced elements out into results.

llvm::SmallVector<llvm::Value*, 4> results;

- Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType);

+ Address addr = ReturnValue.withElementType(coercionType);

for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {

auto coercedEltType = coercionType->getElementType(i);

if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))

@@ -3795,8 +3963,8 @@ static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,

// FIXME: Generate IR in one pass, rather than going back and fixing up these

// placeholders.

llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);

- llvm::Type *IRPtrTy = IRTy->getPointerTo();

- llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy->getPointerTo());

+ llvm::Type *IRPtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());

+ llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy);

// FIXME: When we generate this IR in one pass, we shouldn't need

// this win32-specific alignment hack.

@@ -4764,7 +4932,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

// the proper cpu features (and it won't cause code generation issues due to

// function based code generation).

if (TargetDecl->hasAttr<AlwaysInlineAttr>() &&

- TargetDecl->hasAttr<TargetAttr>())

+ (TargetDecl->hasAttr<TargetAttr>() ||

+ (CurFuncDecl && CurFuncDecl->hasAttr<TargetAttr>())))

checkTargetFeatures(Loc, FD);

// Some architectures (such as x86-64) have the ABI changed based on

@@ -4773,25 +4942,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), FD, CallArgs);

}

-#ifndef NDEBUG

- if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) {

- // For an inalloca varargs function, we don't expect CallInfo to match the

- // function pointer's type, because the inalloca struct a will have extra

- // fields in it for the varargs parameters. Code later in this function

- // bitcasts the function pointer to the type derived from CallInfo.

- //

- // In other cases, we assert that the types match up (until pointers stop

- // having pointee types).

- if (Callee.isVirtual())

- assert(IRFuncTy == Callee.getVirtualFunctionType());

- else {

- llvm::PointerType *PtrTy =

- llvm::cast<llvm::PointerType>(Callee.getFunctionPointer()->getType());

- assert(PtrTy->isOpaqueOrPointeeTypeMatches(IRFuncTy));

- }

-#endif

// 1. Set up the arguments.

// If we're using inalloca, insert the allocation after the stack save.

@@ -4913,10 +5063,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

// Store the RValue into the argument struct.

Address Addr =

Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());

- // There are some cases where a trivial bitcast is not avoidable. The

- // definition of a type later in a translation unit may change it's type

- // from {}* to (%struct.foo*)*.

- Addr = Builder.CreateElementBitCast(Addr, ConvertTypeForMem(I->Ty));

+ Addr = Addr.withElementType(ConvertTypeForMem(I->Ty));

I->copyInto(*this, Addr);

}

break;

@@ -5010,9 +5157,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

I->copyInto(*this, AI);

} else {

// Skip the extra memcpy call.

- auto *T = llvm::PointerType::getWithSamePointeeType(

- cast<llvm::PointerType>(V->getType()),

- CGM.getDataLayout().getAllocaAddrSpace());

+ auto *T = llvm::PointerType::get(

+ CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace());

llvm::Value *Val = getTargetHooks().performAddrSpaceCast(

*this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,

@@ -5112,7 +5258,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

Builder.CreateMemCpy(TempAlloca, Src, SrcSize);

Src = TempAlloca;

} else {

- Src = Builder.CreateElementBitCast(Src, STy);

+ Src = Src.withElementType(STy);

}

assert(NumIRArgs == STy->getNumElements());

@@ -5176,7 +5322,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

Builder.CreateStore(RV.getScalarVal(), addr);

}

- addr = Builder.CreateElementBitCast(addr, coercionType);

+ addr = addr.withElementType(coercionType);

unsigned IRArgPos = FirstIRArg;

for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {

@@ -5212,35 +5358,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

// If we're using inalloca, set up that argument.

if (ArgMemory.isValid()) {

llvm::Value *Arg = ArgMemory.getPointer();

- if (CallInfo.isVariadic()) {

- // When passing non-POD arguments by value to variadic functions, we will

- // end up with a variadic prototype and an inalloca call site. In such

- // cases, we can't do any parameter mismatch checks. Give up and bitcast

- // the callee.

- unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace();

- CalleePtr =

- Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS));

- } else {

- llvm::Type *LastParamTy =

- IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);

- if (Arg->getType() != LastParamTy) {

-#ifndef NDEBUG

- // Assert that these structs have equivalent element types.

- llvm::StructType *FullTy = CallInfo.getArgStruct();

- if (!LastParamTy->isOpaquePointerTy()) {

- llvm::StructType *DeclaredTy = cast<llvm::StructType>(

- LastParamTy->getNonOpaquePointerElementType());

- assert(DeclaredTy->getNumElements() == FullTy->getNumElements());

- for (auto DI = DeclaredTy->element_begin(),

- DE = DeclaredTy->element_end(),

- FI = FullTy->element_begin();

- DI != DE; ++DI, ++FI)

- assert(*DI == *FI);

- }

-#endif

- Arg = Builder.CreateBitCast(Arg, LastParamTy);

- }

assert(IRFunctionArgs.hasInallocaArg());

IRCallArgs[IRFunctionArgs.getInallocaArgNo()] = Arg;

}

@@ -5560,8 +5677,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

case ABIArgInfo::CoerceAndExpand: {

auto coercionType = RetAI.getCoerceAndExpandType();

- Address addr = SRetPtr;

- addr = Builder.CreateElementBitCast(addr, coercionType);

+ Address addr = SRetPtr.withElementType(coercionType);

assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());

bool requiresExtract = isa<llvm::StructType>(CI->getType());

@@ -5578,7 +5694,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

assert(unpaddedIndex == 0);

Builder.CreateStore(elt, eltAddr);

}

- // FALLTHROUGH

[[fallthrough]];

}

@@ -5628,6 +5743,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

llvm_unreachable("bad evaluation kind");

}

+ // If coercing a fixed vector from a scalable vector for ABI

+ // compatibility, and the types match, use the llvm.vector.extract

+ // intrinsic to perform the conversion.

+ if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {

+ llvm::Value *V = CI;

+ if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(V->getType())) {

+ if (FixedDst->getElementType() == ScalableSrc->getElementType()) {

+ llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);

+ V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed");

+ return RValue::get(V);

+ }

Address DestPtr = ReturnValue.getValue();

bool DestIsVolatile = ReturnValue.isVolatile();