summaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--lib/CodeGen/CGBuiltin.cpp261
1 files changed, 218 insertions, 43 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 3ecd1c6697d7..609987c4fa4c 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1432,14 +1432,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__debugbreak:
return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
case Builtin::BI__builtin_unreachable: {
- if (SanOpts.has(SanitizerKind::Unreachable)) {
- SanitizerScope SanScope(this);
- EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
- SanitizerKind::Unreachable),
- SanitizerHandler::BuiltinUnreachable,
- EmitCheckSourceLocation(E->getExprLoc()), None);
- } else
- Builder.CreateUnreachable();
+ EmitUnreachable(E->getExprLoc());
// We do need to preserve an insertion point.
EmitBlock(createBasicBlock("unreachable.cont"));
@@ -3341,10 +3334,10 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
- return CGF->EmitARMBuiltinExpr(BuiltinID, E);
+ return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
- return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
+ return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return CGF->EmitX86BuiltinExpr(BuiltinID, E);
@@ -3385,6 +3378,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
NeonTypeFlags TypeFlags,
+ llvm::Triple::ArchType Arch,
bool V1Ty=false) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
@@ -3393,8 +3387,14 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
case NeonTypeFlags::Int16:
case NeonTypeFlags::Poly16:
- case NeonTypeFlags::Float16:
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
+ case NeonTypeFlags::Float16:
+ // FIXME: Only AArch64 backend can so far properly handle half types.
+ // Remove else part once ARM backend support for half is complete.
+ if (Arch == llvm::Triple::aarch64)
+ return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
+ else
+ return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Int32:
return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Int64:
@@ -3417,6 +3417,8 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
NeonTypeFlags IntTypeFlags) {
int IsQuad = IntTypeFlags.isQuad();
switch (IntTypeFlags.getEltType()) {
+ case NeonTypeFlags::Int16:
+ return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
case NeonTypeFlags::Int32:
return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
case NeonTypeFlags::Int64:
@@ -3564,55 +3566,80 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvt_s16_v),
NEONMAP0(vcvt_s32_v),
NEONMAP0(vcvt_s64_v),
+ NEONMAP0(vcvt_u16_v),
NEONMAP0(vcvt_u32_v),
NEONMAP0(vcvt_u64_v),
+ NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_s16_v),
NEONMAP0(vcvtq_s32_v),
NEONMAP0(vcvtq_s64_v),
+ NEONMAP0(vcvtq_u16_v),
NEONMAP0(vcvtq_u32_v),
NEONMAP0(vcvtq_u64_v),
NEONMAP0(vext_v),
@@ -3775,19 +3802,27 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_v),
NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_f16_v),
NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
@@ -4197,7 +4232,8 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
const char *NameHint, unsigned Modifier, const CallExpr *E,
- SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
+ SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
+ llvm::Triple::ArchType Arch) {
// Get the last argument, which specifies the vector type.
llvm::APSInt NeonTypeConst;
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
@@ -4209,7 +4245,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
- llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -4256,9 +4292,20 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcageq_v:
case NEON::BI__builtin_neon_vcagt_v:
case NEON::BI__builtin_neon_vcagtq_v: {
- llvm::Type *VecFlt = llvm::VectorType::get(
- VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
- VTy->getNumElements());
+ llvm::Type *Ty;
+ switch (VTy->getScalarSizeInBits()) {
+ default: llvm_unreachable("unexpected type");
+ case 32:
+ Ty = FloatTy;
+ break;
+ case 64:
+ Ty = DoubleTy;
+ break;
+ case 16:
+ Ty = HalfTy;
+ break;
+ }
+ llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
llvm::Type *Tys[] = { VTy, VecFlt };
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
return EmitNeonCall(F, Ops, NameHint);
@@ -4272,11 +4319,19 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_f32_v:
case NEON::BI__builtin_neon_vcvtq_f32_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
+ return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
+ : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+ case NEON::BI__builtin_neon_vcvt_f16_v:
+ case NEON::BI__builtin_neon_vcvtq_f16_v:
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
+ case NEON::BI__builtin_neon_vcvt_n_f16_v:
case NEON::BI__builtin_neon_vcvt_n_f32_v:
case NEON::BI__builtin_neon_vcvt_n_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_v:
case NEON::BI__builtin_neon_vcvtq_n_f32_v:
case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
@@ -4284,11 +4339,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
+ case NEON::BI__builtin_neon_vcvt_n_s16_v:
case NEON::BI__builtin_neon_vcvt_n_s32_v:
+ case NEON::BI__builtin_neon_vcvt_n_u16_v:
case NEON::BI__builtin_neon_vcvt_n_u32_v:
case NEON::BI__builtin_neon_vcvt_n_s64_v:
case NEON::BI__builtin_neon_vcvt_n_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s16_v:
case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u16_v:
case NEON::BI__builtin_neon_vcvtq_n_u32_v:
case NEON::BI__builtin_neon_vcvtq_n_s64_v:
case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
@@ -4300,44 +4359,63 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s16_v:
+ case NEON::BI__builtin_neon_vcvt_u16_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
- case NEON::BI__builtin_neon_vcvtq_u64_v: {
+ case NEON::BI__builtin_neon_vcvtq_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
}
+ case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
case NEON::BI__builtin_neon_vcvta_u64_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_v:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_v:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
case NEON::BI__builtin_neon_vcvtaq_u64_v:
+ case NEON::BI__builtin_neon_vcvtn_s16_v:
case NEON::BI__builtin_neon_vcvtn_s32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_v:
case NEON::BI__builtin_neon_vcvtn_u32_v:
case NEON::BI__builtin_neon_vcvtn_u64_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_v:
case NEON::BI__builtin_neon_vcvtnq_s32_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_v:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
case NEON::BI__builtin_neon_vcvtnq_u64_v:
+ case NEON::BI__builtin_neon_vcvtp_s16_v:
case NEON::BI__builtin_neon_vcvtp_s32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_v:
case NEON::BI__builtin_neon_vcvtp_u32_v:
case NEON::BI__builtin_neon_vcvtp_u64_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_v:
case NEON::BI__builtin_neon_vcvtpq_s32_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_v:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
case NEON::BI__builtin_neon_vcvtpq_u64_v:
+ case NEON::BI__builtin_neon_vcvtm_s16_v:
case NEON::BI__builtin_neon_vcvtm_s32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_v:
case NEON::BI__builtin_neon_vcvtm_u32_v:
case NEON::BI__builtin_neon_vcvtm_u64_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_v:
case NEON::BI__builtin_neon_vcvtmq_s32_v:
case NEON::BI__builtin_neon_vcvtmq_s64_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_v:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtmq_u64_v: {
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
@@ -4816,7 +4894,8 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
}
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+ const CallExpr *E,
+ llvm::Triple::ArchType Arch) {
if (auto Hint = GetValueForARMHint(BuiltinID))
return Hint;
@@ -5355,7 +5434,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
bool usgn = Type.isUnsigned();
bool rightShift = false;
- llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -5368,7 +5447,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
if (Builtin)
return EmitCommonNeonBuiltinExpr(
Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
- Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
+ Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
unsigned Int;
switch (BuiltinID) {
@@ -5393,7 +5472,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
}
- // fall through
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vld1_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
@@ -5518,7 +5597,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
Tys), Ops);
}
- // fall through
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vst1_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
@@ -5555,7 +5634,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
const CallExpr *E,
- SmallVectorImpl<Value *> &Ops) {
+ SmallVectorImpl<Value *> &Ops,
+ llvm::Triple::ArchType Arch) {
unsigned int Int = 0;
const char *s = nullptr;
@@ -5600,7 +5680,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(Result.getZExtValue());
- llvm::VectorType *Ty = GetNeonType(&CGF, Type);
+ llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch);
if (!Ty)
return nullptr;
@@ -5710,7 +5790,8 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
}
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+ const CallExpr *E,
+ llvm::Triple::ArchType Arch) {
unsigned HintID = static_cast<unsigned>(-1);
switch (BuiltinID) {
default: break;
@@ -6011,7 +6092,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvts_u32_f32:
case NEON::BI__builtin_neon_vcvtd_u64_f64:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvts_s32_f32:
case NEON::BI__builtin_neon_vcvtd_s64_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -6026,7 +6107,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvts_f32_u32:
case NEON::BI__builtin_neon_vcvtd_f64_u64:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvts_f32_s32:
case NEON::BI__builtin_neon_vcvtd_f64_s64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -6453,7 +6534,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
}
- llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -6467,9 +6548,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitCommonNeonBuiltinExpr(
Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
Builtin->NameHint, Builtin->TypeModifier, E, Ops,
- /*never use addresses*/ Address::invalid(), Address::invalid());
+ /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
- if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
+ if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
return V;
unsigned Int;
@@ -6518,7 +6599,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch);
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
@@ -6547,7 +6628,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
}
+ case NEON::BI__builtin_neon_vfmah_lane_f16:
case NEON::BI__builtin_neon_vfmas_lane_f32:
+ case NEON::BI__builtin_neon_vfmah_laneq_f16:
case NEON::BI__builtin_neon_vfmas_laneq_f32:
case NEON::BI__builtin_neon_vfmad_lane_f64:
case NEON::BI__builtin_neon_vfmad_laneq_f64: {
@@ -6699,14 +6782,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_f64_v:
case NEON::BI__builtin_neon_vcvtq_f64_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
+ Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch);
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f64_f32: {
assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
"unexpected vcvt_f64_f32 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
}
@@ -6714,7 +6797,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
assert(Type.getEltType() == NeonTypeFlags::Float32 &&
"unexpected vcvt_f32_f64 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
+ Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
}
@@ -6722,18 +6805,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_u32_v:
case NEON::BI__builtin_neon_vcvt_s64_v:
case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s16_v:
+ case NEON::BI__builtin_neon_vcvt_u16_v:
case NEON::BI__builtin_neon_vcvtq_s32_v:
case NEON::BI__builtin_neon_vcvtq_u32_v:
case NEON::BI__builtin_neon_vcvtq_s64_v:
- case NEON::BI__builtin_neon_vcvtq_u64_v: {
+ case NEON::BI__builtin_neon_vcvtq_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s16_v:
+ case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
if (usgn)
return Builder.CreateFPToUI(Ops[0], Ty);
return Builder.CreateFPToSI(Ops[0], Ty);
}
+ case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_s32_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_v:
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvta_u32_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_v:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
@@ -6743,9 +6833,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
}
+ case NEON::BI__builtin_neon_vcvtm_s16_v:
case NEON::BI__builtin_neon_vcvtm_s32_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_v:
case NEON::BI__builtin_neon_vcvtmq_s32_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_v:
case NEON::BI__builtin_neon_vcvtm_u32_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_v:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
case NEON::BI__builtin_neon_vcvtmq_s64_v:
@@ -6755,9 +6849,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
}
+ case NEON::BI__builtin_neon_vcvtn_s16_v:
case NEON::BI__builtin_neon_vcvtn_s32_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_v:
case NEON::BI__builtin_neon_vcvtnq_s32_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_v:
case NEON::BI__builtin_neon_vcvtn_u32_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_v:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
@@ -6767,9 +6865,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
}
+ case NEON::BI__builtin_neon_vcvtp_s16_v:
case NEON::BI__builtin_neon_vcvtp_s32_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_v:
case NEON::BI__builtin_neon_vcvtpq_s32_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_v:
case NEON::BI__builtin_neon_vcvtp_u32_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_v:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
@@ -6792,7 +6894,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
+ NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch);
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -6824,7 +6926,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vaddv_u8:
// FIXME: These are handled by the AArch64 scalar code.
usgn = true;
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vaddv_s8: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -6836,7 +6938,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vaddv_u16:
usgn = true;
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vaddv_s16: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -6848,7 +6950,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vaddvq_u8:
usgn = true;
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vaddvq_s8: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -6860,7 +6962,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vaddvq_u16:
usgn = true;
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vaddvq_s16: {
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
Ty = Int32Ty;
@@ -6942,6 +7044,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
+ case NEON::BI__builtin_neon_vmaxv_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vmaxvq_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
case NEON::BI__builtin_neon_vminv_u8: {
Int = Intrinsic::aarch64_neon_uminv;
Ty = Int32Ty;
@@ -7014,6 +7134,60 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
+ case NEON::BI__builtin_neon_vminv_f16: {
+ Int = Intrinsic::aarch64_neon_fminv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vminvq_f16: {
+ Int = Intrinsic::aarch64_neon_fminv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vmaxnmv_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vmaxnmvq_f16: {
+ Int = Intrinsic::aarch64_neon_fmaxnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vminnmv_f16: {
+ Int = Intrinsic::aarch64_neon_fminnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 4);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
+ case NEON::BI__builtin_neon_vminnmvq_f16: {
+ Int = Intrinsic::aarch64_neon_fminnmv;
+ Ty = HalfTy;
+ VTy = llvm::VectorType::get(HalfTy, 8);
+ llvm::Type *Tys[2] = { Ty, VTy };
+ Ops.push_back(EmitScalarExpr(E->getArg(0)));
+ Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
+ return Builder.CreateTrunc(Ops[0], HalfTy);
+ }
case NEON::BI__builtin_neon_vmul_n_f64: {
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
@@ -7848,8 +8022,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
default: return nullptr;
case X86::BI_mm_prefetch: {
Value *Address = Ops[0];
- Value *RW = ConstantInt::get(Int32Ty, 0);
- Value *Locality = Ops[1];
+ ConstantInt *C = cast<ConstantInt>(Ops[1]);
+ Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
+ Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
Value *Data = ConstantInt::get(Int32Ty, 1);
Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
return Builder.CreateCall(F, {Address, RW, Locality, Data});