summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/CGBuiltin.cpp183
-rw-r--r--lib/CodeGen/CGCall.cpp50
-rw-r--r--lib/CodeGen/CGClass.cpp13
-rw-r--r--lib/CodeGen/CGDecl.cpp24
-rw-r--r--lib/CodeGen/CGExpr.cpp44
-rw-r--r--lib/CodeGen/CGObjCMac.cpp2
-rw-r--r--lib/CodeGen/CGStmt.cpp12
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp13
-rw-r--r--lib/CodeGen/CodeGenFunction.h59
-rw-r--r--lib/CodeGen/CodeGenModule.cpp35
-rw-r--r--lib/CodeGen/CodeGenModule.h3
-rw-r--r--lib/CodeGen/CodeGenTypeCache.h2
-rw-r--r--lib/CodeGen/SwiftCallingConv.cpp8
-rw-r--r--lib/CodeGen/TargetInfo.cpp169
14 files changed, 425 insertions, 192 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 8f0c22d1f7ef..a6451b7fc3c1 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -2956,8 +2956,9 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
case NeonTypeFlags::Int16:
case NeonTypeFlags::Poly16:
- case NeonTypeFlags::Float16:
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
+ case NeonTypeFlags::Float16:
+ return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Int32:
return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Int64:
@@ -2980,6 +2981,8 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
NeonTypeFlags IntTypeFlags) {
int IsQuad = IntTypeFlags.isQuad();
switch (IntTypeFlags.getEltType()) {
+ case NeonTypeFlags::Int16:
+ return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
case NeonTypeFlags::Int32:
return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
case NeonTypeFlags::Int64:
@@ -3127,55 +3130,80 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvt_s16_v),
NEONMAP0(vcvt_s32_v),
NEONMAP0(vcvt_s64_v),
+ NEONMAP0(vcvt_u16_v),
NEONMAP0(vcvt_u32_v),
NEONMAP0(vcvt_u64_v),
+ NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_s16_v),
NEONMAP0(vcvtq_s32_v),
NEONMAP0(vcvtq_s64_v),
+ NEONMAP0(vcvtq_u16_v),
NEONMAP0(vcvtq_u32_v),
NEONMAP0(vcvtq_u64_v),
NEONMAP0(vext_v),
@@ -3338,19 +3366,27 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_v),
NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_f16_v),
NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
@@ -3819,9 +3855,20 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcageq_v:
case NEON::BI__builtin_neon_vcagt_v:
case NEON::BI__builtin_neon_vcagtq_v: {
- llvm::Type *VecFlt = llvm::VectorType::get(
- VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
- VTy->getNumElements());
+ llvm::Type *Ty;
+ switch (VTy->getScalarSizeInBits()) {
+ default: llvm_unreachable("unexpected type");
+ case 32:
+ Ty = FloatTy;
+ break;
+ case 64:
+ Ty = DoubleTy;
+ break;
+ case 16:
+ Ty = HalfTy;
+ break;
+ }
+ llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
llvm::Type *Tys[] = { VTy, VecFlt };
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
return EmitNeonCall(F, Ops, NameHint);
@@ -3838,8 +3885,16 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+ case NEON::BI__builtin_neon_vcvt_f16_v:
+ case NEON::BI__builtin_neon_vcvtq_f16_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad));
+ return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+ : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+ case NEON::BI__builtin_neon_vcvt_n_f16_v:
case NEON::BI__builtin_neon_vcvt_n_f32_v:
case NEON::BI__builtin_neon_vcvt_n_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_v:
case NEON::BI__builtin_neon_vcvtq_n_f32_v:
case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
@@ -3847,11 +3902,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
+ case NEON::BI__builtin_neon_vcvt_n_s16_v:
case NEON::BI__builtin_neon_vcvt_n_s32_v:
+ case NEON::BI__builtin_neon_vcvt_n_u16_v:
case NEON::BI__builtin_neon_vcvt_n_u32_v:
case NEON::BI__builtin_neon_vcvt_n_s64_v:
case NEON::BI__builtin_neon_vcvt_n_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s16_v:
case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u16_v:
case NEON::BI__builtin_neon_vcvtq_n_u32_v:
case NEON::BI__builtin_neon_vcvtq_n_s64_v:
case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
@@ -3863,44 +3922,63 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s16_v:
+ case NEON::BI__builtin_neon_vcvt_u16_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
- case NEON::BI__builtin_neon_vcvtq_u64_v: {
+ case NEON::BI__builtin_neon_vcvtq_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
}
+ case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
case NEON::BI__builtin_neon_vcvta_u64_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_v:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_v:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
case NEON::BI__builtin_neon_vcvtaq_u64_v:
+ case NEON::BI__builtin_neon_vcvtn_s16_v:
case NEON::BI__builtin_neon_vcvtn_s32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_v:
case NEON::BI__builtin_neon_vcvtn_u32_v:
case NEON::BI__builtin_neon_vcvtn_u64_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_v:
case NEON::BI__builtin_neon_vcvtnq_s32_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_v:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
case NEON::BI__builtin_neon_vcvtnq_u64_v:
+ case NEON::BI__builtin_neon_vcvtp_s16_v:
case NEON::BI__builtin_neon_vcvtp_s32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_v:
case NEON::BI__builtin_neon_vcvtp_u32_v:
case NEON::BI__builtin_neon_vcvtp_u64_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_v:
case NEON::BI__builtin_neon_vcvtpq_s32_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_v:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
case NEON::BI__builtin_neon_vcvtpq_u64_v:
+ case NEON::BI__builtin_neon_vcvtm_s16_v:
case NEON::BI__builtin_neon_vcvtm_s32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_v:
case NEON::BI__builtin_neon_vcvtm_u32_v:
case NEON::BI__builtin_neon_vcvtm_u64_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_v:
case NEON::BI__builtin_neon_vcvtmq_s32_v:
case NEON::BI__builtin_neon_vcvtmq_s64_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_v:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtmq_u64_v: {
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
@@ -6110,7 +6188,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
}
+ case NEON::BI__builtin_neon_vfmah_lane_f16:
case NEON::BI__builtin_neon_vfmas_lane_f32:
+ case NEON::BI__builtin_neon_vfmah_laneq_f16:
case NEON::BI__builtin_neon_vfmas_laneq_f32:
case NEON::BI__builtin_neon_vfmad_lane_f64:
case NEON::BI__builtin_neon_vfmad_laneq_f64: {
@@ -6285,18 +6365,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s16_v:
+ case NEON::BI__builtin_neon_vcvt_u16_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
- case NEON::BI__builtin_neon_vcvtq_u64_v: {
+ case NEON::BI__builtin_neon_vcvtq_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
if (usgn)
return Builder.CreateFPToUI(Ops[0], Ty);
return Builder.CreateFPToSI(Ops[0], Ty);
}
+ case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_v:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_v:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
@@ -6306,9 +6393,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
}
+ case NEON::BI__builtin_neon_vcvtm_s16_v:
case NEON::BI__builtin_neon_vcvtm_s32_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_v:
case NEON::BI__builtin_neon_vcvtmq_s32_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_v:
case NEON::BI__builtin_neon_vcvtm_u32_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_v:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
case NEON::BI__builtin_neon_vcvtmq_s64_v:
@@ -6318,9 +6409,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
}
+ case NEON::BI__builtin_neon_vcvtn_s16_v:
case NEON::BI__builtin_neon_vcvtn_s32_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_v:
case NEON::BI__builtin_neon_vcvtnq_s32_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_v:
case NEON::BI__builtin_neon_vcvtn_u32_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_v:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
@@ -6330,9 +6425,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
}
+ case NEON::BI__builtin_neon_vcvtp_s16_v:
case NEON::BI__builtin_neon_vcvtp_s32_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_v:
case NEON::BI__builtin_neon_vcvtpq_s32_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_v:
case NEON::BI__builtin_neon_vcvtp_u32_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_v:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
@@ -6505,6 +6604,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
+ case NEON::BI__builtin_neon_vmaxv_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vmaxvq_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
case NEON::BI__builtin_neon_vminv_u8: {
Int = Intrinsic::aarch64_neon_uminv;
Ty = Int32Ty;
@@ -6577,6 +6694,60 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
+ case NEON::BI__builtin_neon_vminv_f16: {
+ Int = Intrinsic::aarch64_neon_fminv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vminvq_f16: {
+ Int = Intrinsic::aarch64_neon_fminv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vmaxnmv_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vmaxnmvq_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vminnmv_f16: {
+ Int = Intrinsic::aarch64_neon_fminnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vminnmvq_f16: {
+ Int = Intrinsic::aarch64_neon_fminnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
case NEON::BI__builtin_neon_vmul_n_f64: {
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index c65dc18be306..38d520a2beb0 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -2906,7 +2906,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
llvm::Instruction *Ret;
if (RV) {
- EmitReturnValueCheck(RV, EndLoc);
+ EmitReturnValueCheck(RV);
Ret = Builder.CreateRet(RV);
} else {
Ret = Builder.CreateRetVoid();
@@ -2916,8 +2916,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
Ret->setDebugLoc(std::move(RetDbgLoc));
}
-void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV,
- SourceLocation EndLoc) {
+void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) {
// A current decl may not be available when emitting vtable thunks.
if (!CurCodeDecl)
return;
@@ -2950,27 +2949,30 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV,
SanitizerScope SanScope(this);
- llvm::BasicBlock *Check = nullptr;
- llvm::BasicBlock *NoCheck = nullptr;
- if (requiresReturnValueNullabilityCheck()) {
- // Before doing the nullability check, make sure that the preconditions for
- // the check are met.
- Check = createBasicBlock("nullcheck");
- NoCheck = createBasicBlock("no.nullcheck");
- Builder.CreateCondBr(RetValNullabilityPrecondition, Check, NoCheck);
- EmitBlock(Check);
- }
+ // Make sure the "return" source location is valid. If we're checking a
+ // nullability annotation, make sure the preconditions for the check are met.
+ llvm::BasicBlock *Check = createBasicBlock("nullcheck");
+ llvm::BasicBlock *NoCheck = createBasicBlock("no.nullcheck");
+ llvm::Value *SLocPtr = Builder.CreateLoad(ReturnLocation, "return.sloc.load");
+ llvm::Value *CanNullCheck = Builder.CreateIsNotNull(SLocPtr);
+ if (requiresReturnValueNullabilityCheck())
+ CanNullCheck =
+ Builder.CreateAnd(CanNullCheck, RetValNullabilityPrecondition);
+ Builder.CreateCondBr(CanNullCheck, Check, NoCheck);
+ EmitBlock(Check);
- // Now do the null check. If the returns_nonnull attribute is present, this
- // is done unconditionally.
+ // Now do the null check.
llvm::Value *Cond = Builder.CreateIsNotNull(RV);
- llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(EndLoc), EmitCheckSourceLocation(AttrLoc),
- };
- EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None);
+ llvm::Constant *StaticData[] = {EmitCheckSourceLocation(AttrLoc)};
+ llvm::Value *DynamicData[] = {SLocPtr};
+ EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, DynamicData);
- if (requiresReturnValueNullabilityCheck())
- EmitBlock(NoCheck);
+ EmitBlock(NoCheck);
+
+#ifndef NDEBUG
+ // The return location should not be used after the check has been emitted.
+ ReturnLocation = Address::invalid();
+#endif
}
static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
@@ -3813,7 +3815,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(NumIRArgs == 1);
if (RV.isScalar() || RV.isComplex()) {
// Make a temporary alloca to pass the argument.
- Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+ Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "indirect-arg-temp", false);
IRCallArgs[FirstIRArg] = Addr.getPointer();
LValue argLV = MakeAddrLValue(Addr, I->Ty);
@@ -3842,7 +3845,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
< Align.getQuantity()) ||
(ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
// Create an aligned temporary, and copy to it.
- Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+ Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "byval-temp", false);
IRCallArgs[FirstIRArg] = AI.getPointer();
EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
} else {
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 5b4dc5ff0ab3..127d7df348ee 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -2770,10 +2770,19 @@ CodeGenFunction::CanDevirtualizeMemberFunctionCall(const Expr *Base,
// We can devirtualize calls on an object accessed by a class member access
// expression, since by C++11 [basic.life]p6 we know that it can't refer to
- // a derived class object constructed in the same location.
+ // a derived class object constructed in the same location. However, we avoid
+ // devirtualizing a call to a template function that we could instantiate
+ // implicitly, but have not decided to do so. This is needed because if this
+ // function does not get instantiated, the devirtualization will create a
+ // direct call to a function whose body may not exist. In contrast, calls to
+ // template functions that are not defined in this TU are allowed to be
+ // devirtualized under assumption that it is user responsibility to
+ // instantiate them in some other TU.
if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base))
if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl()))
- return VD->getType()->isRecordType();
+ return VD->getType()->isRecordType() &&
+ (MD->instantiationIsPending() || MD->isDefined() ||
+ !MD->isImplicitlyInstantiable());
// Likewise for calls on an object accessed by a (non-reference) pointer to
// member access.
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 87bfa507a8c0..ccd3b8d513b1 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -954,6 +954,7 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
CodeGenFunction::AutoVarEmission
CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
QualType Ty = D.getType();
+ assert(Ty.getAddressSpace() == LangAS::Default);
AutoVarEmission emission(D);
@@ -1046,8 +1047,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// Create the alloca. Note that we set the name separately from
// building the instruction so that it's there even in no-asserts
// builds.
- address = CreateTempAlloca(allocaTy, allocaAlignment);
- address.getPointer()->setName(D.getName());
+ address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
// Don't emit lifetime markers for MSVC catch parameters. The lifetime of
// the catch parameter starts in the catchpad instruction, and we can't
@@ -1107,27 +1107,9 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
llvm::Type *llvmTy = ConvertTypeForMem(elementType);
// Allocate memory for the array.
- llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
- vla->setAlignment(alignment.getQuantity());
-
- address = Address(vla, alignment);
+ address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
}
- // Alloca always returns a pointer in alloca address space, which may
- // be different from the type defined by the language. For example,
- // in C++ the auto variables are in the default address space. Therefore
- // cast alloca to the expected address space when necessary.
- auto T = D.getType();
- assert(T.getAddressSpace() == LangAS::Default);
- if (getASTAllocaAddressSpace() != LangAS::Default) {
- auto *Addr = getTargetHooks().performAddrSpaceCast(
- *this, address.getPointer(), getASTAllocaAddressSpace(),
- T.getAddressSpace(),
- address.getElementType()->getPointerTo(
- getContext().getTargetAddressSpace(T.getAddressSpace())),
- /*non-null*/ true);
- address = Address(Addr, address.getAlignment());
- }
setAddrOfLocalVar(&D, address);
emission.Addr = address;
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 7359006677f4..2ee1c96a6619 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -61,18 +61,36 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
/// block.
Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
- const Twine &Name) {
- auto Alloca = CreateTempAlloca(Ty, Name);
+ const Twine &Name,
+ llvm::Value *ArraySize,
+ bool CastToDefaultAddrSpace) {
+ auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
Alloca->setAlignment(Align.getQuantity());
- return Address(Alloca, Align);
+ llvm::Value *V = Alloca;
+ // Alloca always returns a pointer in alloca address space, which may
+ // be different from the type defined by the language. For example,
+ // in C++ the auto variables are in the default address space. Therefore
+ // cast alloca to the default address space when necessary.
+ if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
+ auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
+ V = getTargetHooks().performAddrSpaceCast(
+ *this, V, getASTAllocaAddressSpace(), LangAS::Default,
+ Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
+ }
+
+ return Address(V, Align);
}
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block.
+/// CreateTempAlloca - This creates an alloca and inserts it into the entry
+/// block if \p ArraySize is nullptr, otherwise inserts it at the current
+/// insertion point of the builder.
llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
- const Twine &Name) {
+ const Twine &Name,
+ llvm::Value *ArraySize) {
+ if (ArraySize)
+ return Builder.CreateAlloca(Ty, ArraySize, Name);
return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(),
- nullptr, Name, AllocaInsertPt);
+ ArraySize, Name, AllocaInsertPt);
}
/// CreateDefaultAlignTempAlloca - This creates an alloca with the
@@ -99,14 +117,18 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
return CreateTempAlloca(ConvertType(Ty), Align, Name);
}
-Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) {
+Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
+ bool CastToDefaultAddrSpace) {
// FIXME: Should we prefer the preferred type alignment here?
- return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name);
+ return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name,
+ CastToDefaultAddrSpace);
}
Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
- const Twine &Name) {
- return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name);
+ const Twine &Name,
+ bool CastToDefaultAddrSpace) {
+ return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr,
+ CastToDefaultAddrSpace);
}
/// EvaluateExprAsBool - Perform the usual unary conversions on the specified
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index c78d3febd4cd..7976c53d9e98 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -308,7 +308,7 @@ public:
SmallVector<CanQualType,5> Params;
Params.push_back(Ctx.VoidPtrTy);
Params.push_back(Ctx.VoidPtrTy);
- Params.push_back(Ctx.LongTy);
+ Params.push_back(Ctx.getSizeType());
Params.push_back(Ctx.BoolTy);
Params.push_back(Ctx.BoolTy);
llvm::FunctionType *FTy =
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 683f366ebe45..a13c38646164 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -1024,6 +1024,18 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
/// if the function returns void, or may be missing one if the function returns
/// non-void. Fun stuff :).
void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
+ if (requiresReturnValueCheck()) {
+ llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart());
+ auto *SLocPtr =
+ new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false,
+ llvm::GlobalVariable::PrivateLinkage, SLoc);
+ SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr);
+ assert(ReturnLocation.isValid() && "No valid return location");
+ Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy),
+ ReturnLocation);
+ }
+
// Returning from an outlined SEH helper is UB, and we already warn on it.
if (IsOutlinedSEHHelper) {
Builder.CreateUnreachable();
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index ac1a1334f103..93a4a3866193 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -860,6 +860,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Builder.SetInsertPoint(EntryBB);
+ // If we're checking the return value, allocate space for a pointer to a
+ // precise source location of the checked return statement.
+ if (requiresReturnValueCheck()) {
+ ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr");
+ InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy));
+ }
+
// Emit subprogram debug descriptor.
if (CGDebugInfo *DI = getDebugInfo()) {
// Reconstruct the type from the argument list so that implicit parameters,
@@ -887,8 +894,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (CGM.getCodeGenOpts().InstrumentForProfiling) {
if (CGM.getCodeGenOpts().CallFEntry)
Fn->addFnAttr("fentry-call", "true");
- else
- Fn->addFnAttr("counting-function", getTarget().getMCountName());
+ else {
+ if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>())
+ Fn->addFnAttr("counting-function", getTarget().getMCountName());
+ }
}
if (RetTy->isVoidType()) {
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 831eedf9e478..6785111bd052 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -116,9 +116,9 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \
SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \
SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \
- SANITIZER_CHECK(NullabilityReturn, nullability_return, 0) \
+ SANITIZER_CHECK(NullabilityReturn, nullability_return, 1) \
SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \
- SANITIZER_CHECK(NonnullReturn, nonnull_return, 0) \
+ SANITIZER_CHECK(NonnullReturn, nonnull_return, 1) \
SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \
SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0) \
SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \
@@ -1407,6 +1407,17 @@ private:
return RetValNullabilityPrecondition;
}
+ /// Used to store precise source locations for return statements by the
+ /// runtime return value checks.
+ Address ReturnLocation = Address::invalid();
+
+ /// Check if the return value of this function requires sanitization.
+ bool requiresReturnValueCheck() const {
+ return requiresReturnValueNullabilityCheck() ||
+ (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) &&
+ CurCodeDecl && CurCodeDecl->getAttr<ReturnsNonNullAttr>());
+ }
+
llvm::BasicBlock *TerminateLandingPad;
llvm::BasicBlock *TerminateHandler;
llvm::BasicBlock *TrapBB;
@@ -1778,7 +1789,7 @@ public:
SourceLocation EndLoc);
/// Emit a test that checks if the return value \p RV is nonnull.
- void EmitReturnValueCheck(llvm::Value *RV, SourceLocation EndLoc);
+ void EmitReturnValueCheck(llvm::Value *RV);
/// EmitStartEHSpec - Emit the start of the exception spec.
void EmitStartEHSpec(const Decl *D);
@@ -1916,13 +1927,36 @@ public:
LValueBaseInfo *BaseInfo = nullptr);
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
- /// CreateTempAlloca - This creates a alloca and inserts it into the entry
- /// block. The caller is responsible for setting an appropriate alignment on
+ /// CreateTempAlloca - This creates an alloca and inserts it into the entry
+ /// block if \p ArraySize is nullptr, otherwise inserts it at the current
+ /// insertion point of the builder. The caller is responsible for setting an
+ /// appropriate alignment on
/// the alloca.
- llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
- const Twine &Name = "tmp");
+ ///
+ /// \p ArraySize is the number of array elements to be allocated if it
+ /// is not nullptr.
+ ///
+ /// LangAS::Default is the address space of pointers to local variables and
+ /// temporaries, as exposed in the source language. In certain
+ /// configurations, this is not the same as the alloca address space, and a
+ /// cast is needed to lift the pointer from the alloca AS into
+ /// LangAS::Default. This can happen when the target uses a restricted
+ /// address space for the stack but the source language requires
+ /// LangAS::Default to be a generic address space. The latter condition is
+ /// common for most programming languages; OpenCL is an exception in that
+ /// LangAS::Default is the private address space, which naturally maps
+ /// to the stack.
+ ///
+ /// Because the address of a temporary is often exposed to the program in
+ /// various ways, this function will perform the cast by default. The cast
+ /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is
+ /// more efficient if the caller knows that the address will not be exposed.
+ llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
+ llvm::Value *ArraySize = nullptr);
Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
- const Twine &Name = "tmp");
+ const Twine &Name = "tmp",
+ llvm::Value *ArraySize = nullptr,
+ bool CastToDefaultAddrSpace = true);
/// CreateDefaultAlignedTempAlloca - This creates an alloca with the
/// default ABI alignment of the given LLVM type.
@@ -1957,9 +1991,12 @@ public:
Address CreateIRTemp(QualType T, const Twine &Name = "tmp");
/// CreateMemTemp - Create a temporary memory object of the given type, with
- /// appropriate alignment.
- Address CreateMemTemp(QualType T, const Twine &Name = "tmp");
- Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp");
+ /// appropriate alignment. Cast it to the default address space if
+ /// \p CastToDefaultAddrSpace is true.
+ Address CreateMemTemp(QualType T, const Twine &Name = "tmp",
+ bool CastToDefaultAddrSpace = true);
+ Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp",
+ bool CastToDefaultAddrSpace = true);
/// CreateAggTemp - Create a temporary memory object for the given
/// aggregate type.
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 19a055075604..5319ccec163f 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -98,6 +98,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
Int16Ty = llvm::Type::getInt16Ty(LLVMContext);
Int32Ty = llvm::Type::getInt32Ty(LLVMContext);
Int64Ty = llvm::Type::getInt64Ty(LLVMContext);
+ HalfTy = llvm::Type::getHalfTy(LLVMContext);
FloatTy = llvm::Type::getFloatTy(LLVMContext);
DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
PointerWidthInBits = C.getTargetInfo().getPointerWidth(0);
@@ -506,6 +507,26 @@ void CodeGenModule::Release() {
LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
}
+ // Emit OpenCL specific module metadata: OpenCL/SPIR version.
+ if (LangOpts.OpenCL) {
+ EmitOpenCLMetadata();
+ // Emit SPIR version.
+ if (getTriple().getArch() == llvm::Triple::spir ||
+ getTriple().getArch() == llvm::Triple::spir64) {
+ // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
+ // opencl.spir.version named metadata.
+ llvm::Metadata *SPIRVerElts[] = {
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, LangOpts.OpenCLVersion / 100)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))};
+ llvm::NamedMDNode *SPIRVerMD =
+ TheModule.getOrInsertNamedMetadata("opencl.spir.version");
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
+ }
+ }
+
if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
assert(PLevel < 3 && "Invalid PIC Level");
getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel));
@@ -529,6 +550,20 @@ void CodeGenModule::Release() {
EmitTargetMetadata();
}
+void CodeGenModule::EmitOpenCLMetadata() {
+ // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
+ // opencl.ocl.version named metadata node.
+ llvm::Metadata *OCLVerElts[] = {
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, LangOpts.OpenCLVersion / 100)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))};
+ llvm::NamedMDNode *OCLVerMD =
+ TheModule.getOrInsertNamedMetadata("opencl.ocl.version");
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+ OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+}
+
void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
// Make sure that this type is translated.
Types.UpdateCompletedType(TD);
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 59e56a6ba194..c5f1a2b409ee 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -1321,6 +1321,9 @@ private:
/// Emits target specific Metadata for global declarations.
void EmitTargetMetadata();
+ /// Emits OpenCL specific Metadata e.g. OpenCL version.
+ void EmitOpenCLMetadata();
+
/// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and
/// .gcda files in a way that persists in .bc files.
void EmitCoverageFile();
diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h
index 450eab48a3b4..6910d36733dc 100644
--- a/lib/CodeGen/CodeGenTypeCache.h
+++ b/lib/CodeGen/CodeGenTypeCache.h
@@ -36,7 +36,7 @@ struct CodeGenTypeCache {
/// i8, i16, i32, and i64
llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
/// float, double
- llvm::Type *FloatTy, *DoubleTy;
+ llvm::Type *HalfTy, *FloatTy, *DoubleTy;
/// int
llvm::IntegerType *IntTy;
diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp
index 0bfe30a32c80..fc8e36d2c599 100644
--- a/lib/CodeGen/SwiftCallingConv.cpp
+++ b/lib/CodeGen/SwiftCallingConv.cpp
@@ -57,6 +57,10 @@ static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) {
return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type));
}
+static CharUnits getTypeAllocSize(CodeGenModule &CGM, llvm::Type *type) {
+ return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(type));
+}
+
void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
// Deal with various aggregate types as special cases:
@@ -542,7 +546,9 @@ SwiftAggLowering::getCoerceAndExpandTypes() const {
packed = true;
elts.push_back(entry.Type);
- lastEnd = entry.End;
+
+ lastEnd = entry.Begin + getTypeAllocSize(CGM, entry.Type);
+ assert(entry.End <= lastEnd);
}
// We don't need to adjust 'packed' to deal with possible tail padding
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 427ec06a2fff..8d00e055306d 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -951,8 +951,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
- ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
- const ABIArgInfo& current) const;
+
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -1536,27 +1535,6 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
return true;
}
-ABIArgInfo
-X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
- const ABIArgInfo &current) const {
- // Assumes vectorCall calling convention.
- const Type *Base = nullptr;
- uint64_t NumElts = 0;
-
- if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
- isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- // HVA types get passed directly in registers if there is room.
- State.FreeSSERegs -= NumElts;
- return getDirectX86Hva();
- }
- // If there's no room, the HVA gets passed as normal indirect
- // structure.
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- }
- return current;
-}
-
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
@@ -1575,35 +1553,20 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets, regcall uses some of the HVA rules.
+ // Regcall uses the concept of a homogenous vector aggregate, similar
+ // to other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if ((State.CC == llvm::CallingConv::X86_VectorCall ||
- State.CC == llvm::CallingConv::X86_RegCall) &&
+ if (State.CC == llvm::CallingConv::X86_RegCall &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.CC == llvm::CallingConv::X86_RegCall) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
- return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
-
- }
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
- if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
- // Actual floating-point types get registers first time through if
- // there is registers available
- State.FreeSSERegs -= NumElts;
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
return ABIArgInfo::getDirect();
- } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
- // HVA Types only get registers after everything else has been
- // set, so it gets set as indirect for now.
- return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
- }
+ return ABIArgInfo::getExpand();
}
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
@@ -1684,31 +1647,53 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
bool &UsedInAlloca) const {
- // Vectorcall only allows the first 6 parameters to be passed in registers,
- // and homogeneous vector aggregates are only put into registers as a second
- // priority.
- unsigned Count = 0;
- CCState ZeroState = State;
- ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
- // HVAs must be done as a second priority for registers, so the deferred
- // items are dealt with by going through the pattern a second time.
+ // Vectorcall x86 works subtly different than in x64, so the format is
+ // a bit different than the x64 version. First, all vector types (not HVAs)
+ // are assigned, with the first 6 ending up in the YMM0-5 or XMM0-5 registers.
+ // This differs from the x64 implementation, where the first 6 by INDEX get
+ // registers.
+ // After that, integers AND HVAs are assigned Left to Right in the same pass.
+ // Integers are passed as ECX/EDX if one is available (in order). HVAs will
+ // first take up the remaining YMM/XMM registers. If insufficient registers
+ // remain but an integer register (ECX/EDX) is available, it will be passed
+ // in that, else, on the stack.
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = classifyArgumentType(I.type, State);
- else
- // Parameters after the 6th cannot be passed in registers,
- // so pretend there are no registers left for them.
- I.info = classifyArgumentType(I.type, ZeroState);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
- ++Count;
+ // First pass do all the vector types.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = ABIArgInfo::getDirect();
+ } else {
+ I.info = classifyArgumentType(Ty, State);
+ }
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
- Count = 0;
- // Go through the arguments a second time to get HVAs registers if there
- // are still some available.
+
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, State, I.info);
- ++Count;
+ // Second pass, do the rest!
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ const QualType& Ty = I.type;
+ bool IsHva = isHomogeneousAggregate(Ty, Base, NumElts);
+
+ if (IsHva && !Ty->isVectorType() && !Ty->isBuiltinType()) {
+ // Assign true HVAs (non vector/native FP types).
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ I.info = getDirectX86Hva();
+ } else {
+ I.info = getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ } else if (!IsHva) {
+ // Assign all Non-HVAs, so this will exclude Vector/FP args.
+ I.info = classifyArgumentType(Ty, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
}
@@ -3901,6 +3886,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
bool IsRegCall) const {
unsigned Count = 0;
for (auto &I : FI.arguments()) {
+ // Vectorcall in x64 only permits the first 6 arguments to be passed
+ // as XMM/YMM registers.
if (Count < VectorcallMaxParamNumAsReg)
I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
else {
@@ -3913,11 +3900,8 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
++Count;
}
- Count = 0;
for (auto &I : FI.arguments()) {
- if (Count < VectorcallMaxParamNumAsReg)
- I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
- ++Count;
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
}
}
@@ -7344,8 +7328,6 @@ public:
};
}
-static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM);
-
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D,
llvm::GlobalValue *GV,
@@ -7402,8 +7384,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
-
- appendOpenCLVersionMD(M);
}
unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
@@ -8074,8 +8054,6 @@ class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
- void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
};
@@ -8090,41 +8068,6 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
}
}
-/// Emit SPIR specific metadata: OpenCL and SPIR version.
-void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- llvm::LLVMContext &Ctx = CGM.getModule().getContext();
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
- llvm::Module &M = CGM.getModule();
- // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
- // opencl.spir.version named metadata.
- llvm::Metadata *SPIRVerElts[] = {
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))};
- llvm::NamedMDNode *SPIRVerMD =
- M.getOrInsertNamedMetadata("opencl.spir.version");
- SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
- appendOpenCLVersionMD(CGM);
-}
-
-static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) {
- llvm::LLVMContext &Ctx = CGM.getModule().getContext();
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
- llvm::Module &M = CGM.getModule();
- // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
- // opencl.ocl.version named metadata node.
- llvm::Metadata *OCLVerElts[] = {
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
- llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
- llvm::NamedMDNode *OCLVerMD =
- M.getOrInsertNamedMetadata("opencl.ocl.version");
- OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
-}
-
unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
return llvm::CallingConv::SPIR_KERNEL;
}