aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp2363
1 files changed, 1558 insertions, 805 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8994b939093e..113541bd5024 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -78,7 +78,8 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
}
if (CGF.CGM.stopAutoInit())
return;
- CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
+ auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
+ I->addAnnotationMetadata("auto-init");
}
/// getBuiltinLibFunction - Given a builtin id for a function like
@@ -303,6 +304,10 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
AtomicOrdering::Monotonic :
SuccessOrdering;
+ // The atomic instruction is marked volatile for consistency with MSVC. This
+ // blocks the few atomics optimizations that LLVM has. If we want to optimize
+ // _Interlocked* operations in the future, we will have to remove the volatile
+ // marker.
auto *Result = CGF.Builder.CreateAtomicCmpXchg(
Destination, Comparand, Exchange,
SuccessOrdering, FailureOrdering);
@@ -310,6 +315,68 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
return CGF.Builder.CreateExtractValue(Result, 0);
}
+// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
+// prototyped like this:
+//
+// unsigned char _InterlockedCompareExchange128...(
+// __int64 volatile * _Destination,
+// __int64 _ExchangeHigh,
+// __int64 _ExchangeLow,
+// __int64 * _ComparandResult);
+static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
+ const CallExpr *E,
+ AtomicOrdering SuccessOrdering) {
+ assert(E->getNumArgs() == 4);
+ llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
+ llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
+
+ assert(Destination->getType()->isPointerTy());
+ assert(!ExchangeHigh->getType()->isPointerTy());
+ assert(!ExchangeLow->getType()->isPointerTy());
+ assert(ComparandPtr->getType()->isPointerTy());
+
+ // For Release ordering, the failure ordering should be Monotonic.
+ auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
+ ? AtomicOrdering::Monotonic
+ : SuccessOrdering;
+
+ // Convert to i128 pointers and values.
+ llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
+ llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
+ Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy);
+ Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy),
+ CGF.getContext().toCharUnitsFromBits(128));
+
+ // (((i128)hi) << 64) | ((i128)lo)
+ ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
+ ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
+ ExchangeHigh =
+ CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
+ llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
+
+ // Load the comparand for the instruction.
+ llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
+
+ auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ SuccessOrdering, FailureOrdering);
+
+ // The atomic instruction is marked volatile for consistency with MSVC. This
+ // blocks the few atomics optimizations that LLVM has. If we want to optimize
+ // _Interlocked* operations in the future, we will have to remove the volatile
+ // marker.
+ CXI->setVolatile(true);
+
+ // Store the result as an outparameter.
+ CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
+ ComparandResult);
+
+ // Get the success boolean and zero extend it to i8.
+ Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
+ return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
+}
+
static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
assert(E->getArg(0)->getType()->isPointerType());
@@ -373,6 +440,7 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
if (CGF.Builder.getIsFPConstrained()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
} else {
@@ -390,6 +458,7 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
if (CGF.Builder.getIsFPConstrained()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
} else {
@@ -408,6 +477,7 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
if (CGF.Builder.getIsFPConstrained()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
} else {
@@ -489,6 +559,7 @@ emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
if (CGF.Builder.getIsFPConstrained()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
{ResultType, Src0->getType()});
return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
@@ -805,10 +876,15 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
AsmOS << "bt";
if (Action)
AsmOS << Action;
- AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
+ AsmOS << SizeSuffix << " $2, ($1)";
// Build the constraints. FIXME: We should support immediates when possible.
- std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
+ std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
+ std::string MachineClobbers = CGF.getTarget().getClobbers();
+ if (!MachineClobbers.empty()) {
+ Constraints += ',';
+ Constraints += MachineClobbers;
+ }
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(),
CGF.getContext().getTypeSize(E->getArg(1)->getType()));
@@ -987,6 +1063,10 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedCompareExchange_acq,
_InterlockedCompareExchange_rel,
_InterlockedCompareExchange_nf,
+ _InterlockedCompareExchange128,
+ _InterlockedCompareExchange128_acq,
+ _InterlockedCompareExchange128_rel,
+ _InterlockedCompareExchange128_nf,
_InterlockedOr_acq,
_InterlockedOr_rel,
_InterlockedOr_nf,
@@ -1005,16 +1085,352 @@ enum class CodeGenFunction::MSVCIntrin {
__fastfail,
};
+static Optional<CodeGenFunction::MSVCIntrin>
+translateArmToMsvcIntrin(unsigned BuiltinID) {
+ using MSVCIntrin = CodeGenFunction::MSVCIntrin;
+ switch (BuiltinID) {
+ default:
+ return None;
+ case ARM::BI_BitScanForward:
+ case ARM::BI_BitScanForward64:
+ return MSVCIntrin::_BitScanForward;
+ case ARM::BI_BitScanReverse:
+ case ARM::BI_BitScanReverse64:
+ return MSVCIntrin::_BitScanReverse;
+ case ARM::BI_InterlockedAnd64:
+ return MSVCIntrin::_InterlockedAnd;
+ case ARM::BI_InterlockedExchange64:
+ return MSVCIntrin::_InterlockedExchange;
+ case ARM::BI_InterlockedExchangeAdd64:
+ return MSVCIntrin::_InterlockedExchangeAdd;
+ case ARM::BI_InterlockedExchangeSub64:
+ return MSVCIntrin::_InterlockedExchangeSub;
+ case ARM::BI_InterlockedOr64:
+ return MSVCIntrin::_InterlockedOr;
+ case ARM::BI_InterlockedXor64:
+ return MSVCIntrin::_InterlockedXor;
+ case ARM::BI_InterlockedDecrement64:
+ return MSVCIntrin::_InterlockedDecrement;
+ case ARM::BI_InterlockedIncrement64:
+ return MSVCIntrin::_InterlockedIncrement;
+ case ARM::BI_InterlockedExchangeAdd8_acq:
+ case ARM::BI_InterlockedExchangeAdd16_acq:
+ case ARM::BI_InterlockedExchangeAdd_acq:
+ case ARM::BI_InterlockedExchangeAdd64_acq:
+ return MSVCIntrin::_InterlockedExchangeAdd_acq;
+ case ARM::BI_InterlockedExchangeAdd8_rel:
+ case ARM::BI_InterlockedExchangeAdd16_rel:
+ case ARM::BI_InterlockedExchangeAdd_rel:
+ case ARM::BI_InterlockedExchangeAdd64_rel:
+ return MSVCIntrin::_InterlockedExchangeAdd_rel;
+ case ARM::BI_InterlockedExchangeAdd8_nf:
+ case ARM::BI_InterlockedExchangeAdd16_nf:
+ case ARM::BI_InterlockedExchangeAdd_nf:
+ case ARM::BI_InterlockedExchangeAdd64_nf:
+ return MSVCIntrin::_InterlockedExchangeAdd_nf;
+ case ARM::BI_InterlockedExchange8_acq:
+ case ARM::BI_InterlockedExchange16_acq:
+ case ARM::BI_InterlockedExchange_acq:
+ case ARM::BI_InterlockedExchange64_acq:
+ return MSVCIntrin::_InterlockedExchange_acq;
+ case ARM::BI_InterlockedExchange8_rel:
+ case ARM::BI_InterlockedExchange16_rel:
+ case ARM::BI_InterlockedExchange_rel:
+ case ARM::BI_InterlockedExchange64_rel:
+ return MSVCIntrin::_InterlockedExchange_rel;
+ case ARM::BI_InterlockedExchange8_nf:
+ case ARM::BI_InterlockedExchange16_nf:
+ case ARM::BI_InterlockedExchange_nf:
+ case ARM::BI_InterlockedExchange64_nf:
+ return MSVCIntrin::_InterlockedExchange_nf;
+ case ARM::BI_InterlockedCompareExchange8_acq:
+ case ARM::BI_InterlockedCompareExchange16_acq:
+ case ARM::BI_InterlockedCompareExchange_acq:
+ case ARM::BI_InterlockedCompareExchange64_acq:
+ return MSVCIntrin::_InterlockedCompareExchange_acq;
+ case ARM::BI_InterlockedCompareExchange8_rel:
+ case ARM::BI_InterlockedCompareExchange16_rel:
+ case ARM::BI_InterlockedCompareExchange_rel:
+ case ARM::BI_InterlockedCompareExchange64_rel:
+ return MSVCIntrin::_InterlockedCompareExchange_rel;
+ case ARM::BI_InterlockedCompareExchange8_nf:
+ case ARM::BI_InterlockedCompareExchange16_nf:
+ case ARM::BI_InterlockedCompareExchange_nf:
+ case ARM::BI_InterlockedCompareExchange64_nf:
+ return MSVCIntrin::_InterlockedCompareExchange_nf;
+ case ARM::BI_InterlockedOr8_acq:
+ case ARM::BI_InterlockedOr16_acq:
+ case ARM::BI_InterlockedOr_acq:
+ case ARM::BI_InterlockedOr64_acq:
+ return MSVCIntrin::_InterlockedOr_acq;
+ case ARM::BI_InterlockedOr8_rel:
+ case ARM::BI_InterlockedOr16_rel:
+ case ARM::BI_InterlockedOr_rel:
+ case ARM::BI_InterlockedOr64_rel:
+ return MSVCIntrin::_InterlockedOr_rel;
+ case ARM::BI_InterlockedOr8_nf:
+ case ARM::BI_InterlockedOr16_nf:
+ case ARM::BI_InterlockedOr_nf:
+ case ARM::BI_InterlockedOr64_nf:
+ return MSVCIntrin::_InterlockedOr_nf;
+ case ARM::BI_InterlockedXor8_acq:
+ case ARM::BI_InterlockedXor16_acq:
+ case ARM::BI_InterlockedXor_acq:
+ case ARM::BI_InterlockedXor64_acq:
+ return MSVCIntrin::_InterlockedXor_acq;
+ case ARM::BI_InterlockedXor8_rel:
+ case ARM::BI_InterlockedXor16_rel:
+ case ARM::BI_InterlockedXor_rel:
+ case ARM::BI_InterlockedXor64_rel:
+ return MSVCIntrin::_InterlockedXor_rel;
+ case ARM::BI_InterlockedXor8_nf:
+ case ARM::BI_InterlockedXor16_nf:
+ case ARM::BI_InterlockedXor_nf:
+ case ARM::BI_InterlockedXor64_nf:
+ return MSVCIntrin::_InterlockedXor_nf;
+ case ARM::BI_InterlockedAnd8_acq:
+ case ARM::BI_InterlockedAnd16_acq:
+ case ARM::BI_InterlockedAnd_acq:
+ case ARM::BI_InterlockedAnd64_acq:
+ return MSVCIntrin::_InterlockedAnd_acq;
+ case ARM::BI_InterlockedAnd8_rel:
+ case ARM::BI_InterlockedAnd16_rel:
+ case ARM::BI_InterlockedAnd_rel:
+ case ARM::BI_InterlockedAnd64_rel:
+ return MSVCIntrin::_InterlockedAnd_rel;
+ case ARM::BI_InterlockedAnd8_nf:
+ case ARM::BI_InterlockedAnd16_nf:
+ case ARM::BI_InterlockedAnd_nf:
+ case ARM::BI_InterlockedAnd64_nf:
+ return MSVCIntrin::_InterlockedAnd_nf;
+ case ARM::BI_InterlockedIncrement16_acq:
+ case ARM::BI_InterlockedIncrement_acq:
+ case ARM::BI_InterlockedIncrement64_acq:
+ return MSVCIntrin::_InterlockedIncrement_acq;
+ case ARM::BI_InterlockedIncrement16_rel:
+ case ARM::BI_InterlockedIncrement_rel:
+ case ARM::BI_InterlockedIncrement64_rel:
+ return MSVCIntrin::_InterlockedIncrement_rel;
+ case ARM::BI_InterlockedIncrement16_nf:
+ case ARM::BI_InterlockedIncrement_nf:
+ case ARM::BI_InterlockedIncrement64_nf:
+ return MSVCIntrin::_InterlockedIncrement_nf;
+ case ARM::BI_InterlockedDecrement16_acq:
+ case ARM::BI_InterlockedDecrement_acq:
+ case ARM::BI_InterlockedDecrement64_acq:
+ return MSVCIntrin::_InterlockedDecrement_acq;
+ case ARM::BI_InterlockedDecrement16_rel:
+ case ARM::BI_InterlockedDecrement_rel:
+ case ARM::BI_InterlockedDecrement64_rel:
+ return MSVCIntrin::_InterlockedDecrement_rel;
+ case ARM::BI_InterlockedDecrement16_nf:
+ case ARM::BI_InterlockedDecrement_nf:
+ case ARM::BI_InterlockedDecrement64_nf:
+ return MSVCIntrin::_InterlockedDecrement_nf;
+ }
+ llvm_unreachable("must return from switch");
+}
+
+static Optional<CodeGenFunction::MSVCIntrin>
+translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
+ using MSVCIntrin = CodeGenFunction::MSVCIntrin;
+ switch (BuiltinID) {
+ default:
+ return None;
+ case AArch64::BI_BitScanForward:
+ case AArch64::BI_BitScanForward64:
+ return MSVCIntrin::_BitScanForward;
+ case AArch64::BI_BitScanReverse:
+ case AArch64::BI_BitScanReverse64:
+ return MSVCIntrin::_BitScanReverse;
+ case AArch64::BI_InterlockedAnd64:
+ return MSVCIntrin::_InterlockedAnd;
+ case AArch64::BI_InterlockedExchange64:
+ return MSVCIntrin::_InterlockedExchange;
+ case AArch64::BI_InterlockedExchangeAdd64:
+ return MSVCIntrin::_InterlockedExchangeAdd;
+ case AArch64::BI_InterlockedExchangeSub64:
+ return MSVCIntrin::_InterlockedExchangeSub;
+ case AArch64::BI_InterlockedOr64:
+ return MSVCIntrin::_InterlockedOr;
+ case AArch64::BI_InterlockedXor64:
+ return MSVCIntrin::_InterlockedXor;
+ case AArch64::BI_InterlockedDecrement64:
+ return MSVCIntrin::_InterlockedDecrement;
+ case AArch64::BI_InterlockedIncrement64:
+ return MSVCIntrin::_InterlockedIncrement;
+ case AArch64::BI_InterlockedExchangeAdd8_acq:
+ case AArch64::BI_InterlockedExchangeAdd16_acq:
+ case AArch64::BI_InterlockedExchangeAdd_acq:
+ case AArch64::BI_InterlockedExchangeAdd64_acq:
+ return MSVCIntrin::_InterlockedExchangeAdd_acq;
+ case AArch64::BI_InterlockedExchangeAdd8_rel:
+ case AArch64::BI_InterlockedExchangeAdd16_rel:
+ case AArch64::BI_InterlockedExchangeAdd_rel:
+ case AArch64::BI_InterlockedExchangeAdd64_rel:
+ return MSVCIntrin::_InterlockedExchangeAdd_rel;
+ case AArch64::BI_InterlockedExchangeAdd8_nf:
+ case AArch64::BI_InterlockedExchangeAdd16_nf:
+ case AArch64::BI_InterlockedExchangeAdd_nf:
+ case AArch64::BI_InterlockedExchangeAdd64_nf:
+ return MSVCIntrin::_InterlockedExchangeAdd_nf;
+ case AArch64::BI_InterlockedExchange8_acq:
+ case AArch64::BI_InterlockedExchange16_acq:
+ case AArch64::BI_InterlockedExchange_acq:
+ case AArch64::BI_InterlockedExchange64_acq:
+ return MSVCIntrin::_InterlockedExchange_acq;
+ case AArch64::BI_InterlockedExchange8_rel:
+ case AArch64::BI_InterlockedExchange16_rel:
+ case AArch64::BI_InterlockedExchange_rel:
+ case AArch64::BI_InterlockedExchange64_rel:
+ return MSVCIntrin::_InterlockedExchange_rel;
+ case AArch64::BI_InterlockedExchange8_nf:
+ case AArch64::BI_InterlockedExchange16_nf:
+ case AArch64::BI_InterlockedExchange_nf:
+ case AArch64::BI_InterlockedExchange64_nf:
+ return MSVCIntrin::_InterlockedExchange_nf;
+ case AArch64::BI_InterlockedCompareExchange8_acq:
+ case AArch64::BI_InterlockedCompareExchange16_acq:
+ case AArch64::BI_InterlockedCompareExchange_acq:
+ case AArch64::BI_InterlockedCompareExchange64_acq:
+ return MSVCIntrin::_InterlockedCompareExchange_acq;
+ case AArch64::BI_InterlockedCompareExchange8_rel:
+ case AArch64::BI_InterlockedCompareExchange16_rel:
+ case AArch64::BI_InterlockedCompareExchange_rel:
+ case AArch64::BI_InterlockedCompareExchange64_rel:
+ return MSVCIntrin::_InterlockedCompareExchange_rel;
+ case AArch64::BI_InterlockedCompareExchange8_nf:
+ case AArch64::BI_InterlockedCompareExchange16_nf:
+ case AArch64::BI_InterlockedCompareExchange_nf:
+ case AArch64::BI_InterlockedCompareExchange64_nf:
+ return MSVCIntrin::_InterlockedCompareExchange_nf;
+ case AArch64::BI_InterlockedCompareExchange128:
+ return MSVCIntrin::_InterlockedCompareExchange128;
+ case AArch64::BI_InterlockedCompareExchange128_acq:
+ return MSVCIntrin::_InterlockedCompareExchange128_acq;
+ case AArch64::BI_InterlockedCompareExchange128_nf:
+ return MSVCIntrin::_InterlockedCompareExchange128_nf;
+ case AArch64::BI_InterlockedCompareExchange128_rel:
+ return MSVCIntrin::_InterlockedCompareExchange128_rel;
+ case AArch64::BI_InterlockedOr8_acq:
+ case AArch64::BI_InterlockedOr16_acq:
+ case AArch64::BI_InterlockedOr_acq:
+ case AArch64::BI_InterlockedOr64_acq:
+ return MSVCIntrin::_InterlockedOr_acq;
+ case AArch64::BI_InterlockedOr8_rel:
+ case AArch64::BI_InterlockedOr16_rel:
+ case AArch64::BI_InterlockedOr_rel:
+ case AArch64::BI_InterlockedOr64_rel:
+ return MSVCIntrin::_InterlockedOr_rel;
+ case AArch64::BI_InterlockedOr8_nf:
+ case AArch64::BI_InterlockedOr16_nf:
+ case AArch64::BI_InterlockedOr_nf:
+ case AArch64::BI_InterlockedOr64_nf:
+ return MSVCIntrin::_InterlockedOr_nf;
+ case AArch64::BI_InterlockedXor8_acq:
+ case AArch64::BI_InterlockedXor16_acq:
+ case AArch64::BI_InterlockedXor_acq:
+ case AArch64::BI_InterlockedXor64_acq:
+ return MSVCIntrin::_InterlockedXor_acq;
+ case AArch64::BI_InterlockedXor8_rel:
+ case AArch64::BI_InterlockedXor16_rel:
+ case AArch64::BI_InterlockedXor_rel:
+ case AArch64::BI_InterlockedXor64_rel:
+ return MSVCIntrin::_InterlockedXor_rel;
+ case AArch64::BI_InterlockedXor8_nf:
+ case AArch64::BI_InterlockedXor16_nf:
+ case AArch64::BI_InterlockedXor_nf:
+ case AArch64::BI_InterlockedXor64_nf:
+ return MSVCIntrin::_InterlockedXor_nf;
+ case AArch64::BI_InterlockedAnd8_acq:
+ case AArch64::BI_InterlockedAnd16_acq:
+ case AArch64::BI_InterlockedAnd_acq:
+ case AArch64::BI_InterlockedAnd64_acq:
+ return MSVCIntrin::_InterlockedAnd_acq;
+ case AArch64::BI_InterlockedAnd8_rel:
+ case AArch64::BI_InterlockedAnd16_rel:
+ case AArch64::BI_InterlockedAnd_rel:
+ case AArch64::BI_InterlockedAnd64_rel:
+ return MSVCIntrin::_InterlockedAnd_rel;
+ case AArch64::BI_InterlockedAnd8_nf:
+ case AArch64::BI_InterlockedAnd16_nf:
+ case AArch64::BI_InterlockedAnd_nf:
+ case AArch64::BI_InterlockedAnd64_nf:
+ return MSVCIntrin::_InterlockedAnd_nf;
+ case AArch64::BI_InterlockedIncrement16_acq:
+ case AArch64::BI_InterlockedIncrement_acq:
+ case AArch64::BI_InterlockedIncrement64_acq:
+ return MSVCIntrin::_InterlockedIncrement_acq;
+ case AArch64::BI_InterlockedIncrement16_rel:
+ case AArch64::BI_InterlockedIncrement_rel:
+ case AArch64::BI_InterlockedIncrement64_rel:
+ return MSVCIntrin::_InterlockedIncrement_rel;
+ case AArch64::BI_InterlockedIncrement16_nf:
+ case AArch64::BI_InterlockedIncrement_nf:
+ case AArch64::BI_InterlockedIncrement64_nf:
+ return MSVCIntrin::_InterlockedIncrement_nf;
+ case AArch64::BI_InterlockedDecrement16_acq:
+ case AArch64::BI_InterlockedDecrement_acq:
+ case AArch64::BI_InterlockedDecrement64_acq:
+ return MSVCIntrin::_InterlockedDecrement_acq;
+ case AArch64::BI_InterlockedDecrement16_rel:
+ case AArch64::BI_InterlockedDecrement_rel:
+ case AArch64::BI_InterlockedDecrement64_rel:
+ return MSVCIntrin::_InterlockedDecrement_rel;
+ case AArch64::BI_InterlockedDecrement16_nf:
+ case AArch64::BI_InterlockedDecrement_nf:
+ case AArch64::BI_InterlockedDecrement64_nf:
+ return MSVCIntrin::_InterlockedDecrement_nf;
+ }
+ llvm_unreachable("must return from switch");
+}
+
+static Optional<CodeGenFunction::MSVCIntrin>
+translateX86ToMsvcIntrin(unsigned BuiltinID) {
+ using MSVCIntrin = CodeGenFunction::MSVCIntrin;
+ switch (BuiltinID) {
+ default:
+ return None;
+ case clang::X86::BI_BitScanForward:
+ case clang::X86::BI_BitScanForward64:
+ return MSVCIntrin::_BitScanForward;
+ case clang::X86::BI_BitScanReverse:
+ case clang::X86::BI_BitScanReverse64:
+ return MSVCIntrin::_BitScanReverse;
+ case clang::X86::BI_InterlockedAnd64:
+ return MSVCIntrin::_InterlockedAnd;
+ case clang::X86::BI_InterlockedCompareExchange128:
+ return MSVCIntrin::_InterlockedCompareExchange128;
+ case clang::X86::BI_InterlockedExchange64:
+ return MSVCIntrin::_InterlockedExchange;
+ case clang::X86::BI_InterlockedExchangeAdd64:
+ return MSVCIntrin::_InterlockedExchangeAdd;
+ case clang::X86::BI_InterlockedExchangeSub64:
+ return MSVCIntrin::_InterlockedExchangeSub;
+ case clang::X86::BI_InterlockedOr64:
+ return MSVCIntrin::_InterlockedOr;
+ case clang::X86::BI_InterlockedXor64:
+ return MSVCIntrin::_InterlockedXor;
+ case clang::X86::BI_InterlockedDecrement64:
+ return MSVCIntrin::_InterlockedDecrement;
+ case clang::X86::BI_InterlockedIncrement64:
+ return MSVCIntrin::_InterlockedIncrement;
+ }
+ llvm_unreachable("must return from switch");
+}
+
+// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
case MSVCIntrin::_BitScanForward:
case MSVCIntrin::_BitScanReverse: {
+ Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
Value *ArgValue = EmitScalarExpr(E->getArg(1));
llvm::Type *ArgType = ArgValue->getType();
llvm::Type *IndexType =
- EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
+ IndexAddress.getPointer()->getType()->getPointerElementType();
llvm::Type *ResultType = ConvertType(E->getType());
Value *ArgZero = llvm::Constant::getNullValue(ArgType);
@@ -1033,7 +1449,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
Result->addIncoming(ResZero, Begin);
Builder.SetInsertPoint(NotZero);
- Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
if (BuiltinID == MSVCIntrin::_BitScanForward) {
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
@@ -1092,6 +1507,15 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
case MSVCIntrin::_InterlockedCompareExchange_nf:
return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedCompareExchange128:
+ return EmitAtomicCmpXchg128ForMSIntrin(
+ *this, E, AtomicOrdering::SequentiallyConsistent);
+ case MSVCIntrin::_InterlockedCompareExchange128_acq:
+ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedCompareExchange128_rel:
+ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedCompareExchange128_nf:
+ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
case MSVCIntrin::_InterlockedOr_acq:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
AtomicOrdering::Acquire);
@@ -1408,6 +1832,47 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
return RValue::get(BufAddr.getPointer());
}
+static bool isSpecialUnsignedMultiplySignedResult(
+ unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
+ WidthAndSignedness ResultInfo) {
+ return BuiltinID == Builtin::BI__builtin_mul_overflow &&
+ Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
+ !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
+}
+
+static RValue EmitCheckedUnsignedMultiplySignedResult(
+ CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
+ const clang::Expr *Op2, WidthAndSignedness Op2Info,
+ const clang::Expr *ResultArg, QualType ResultQTy,
+ WidthAndSignedness ResultInfo) {
+ assert(isSpecialUnsignedMultiplySignedResult(
+ Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
+ "Cannot specialize this multiply");
+
+ llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
+ llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
+
+ llvm::Value *HasOverflow;
+ llvm::Value *Result = EmitOverflowIntrinsic(
+ CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
+
+ // The intrinsic call will detect overflow when the value is > UINT_MAX,
+ // however, since the original builtin had a signed result, we need to report
+ // an overflow when the result is greater than INT_MAX.
+ auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
+ llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
+
+ llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
+ HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
+
+ bool isVolatile =
+ ResultArg->getType()->getPointeeType().isVolatileQualified();
+ Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
+ CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
+ isVolatile);
+ return RValue::get(HasOverflow);
+}
+
/// Determine if a binop is a checked mixed-sign multiply we can specialize.
static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
WidthAndSignedness Op1Info,
@@ -1642,6 +2107,78 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
}
+// Map math builtins for long-double to f128 version.
+static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
+ switch (BuiltinID) {
+#define MUTATE_LDBL(func) \
+ case Builtin::BI__builtin_##func##l: \
+ return Builtin::BI__builtin_##func##f128;
+ MUTATE_LDBL(sqrt)
+ MUTATE_LDBL(cbrt)
+ MUTATE_LDBL(fabs)
+ MUTATE_LDBL(log)
+ MUTATE_LDBL(log2)
+ MUTATE_LDBL(log10)
+ MUTATE_LDBL(log1p)
+ MUTATE_LDBL(logb)
+ MUTATE_LDBL(exp)
+ MUTATE_LDBL(exp2)
+ MUTATE_LDBL(expm1)
+ MUTATE_LDBL(fdim)
+ MUTATE_LDBL(hypot)
+ MUTATE_LDBL(ilogb)
+ MUTATE_LDBL(pow)
+ MUTATE_LDBL(fmin)
+ MUTATE_LDBL(fmax)
+ MUTATE_LDBL(ceil)
+ MUTATE_LDBL(trunc)
+ MUTATE_LDBL(rint)
+ MUTATE_LDBL(nearbyint)
+ MUTATE_LDBL(round)
+ MUTATE_LDBL(floor)
+ MUTATE_LDBL(lround)
+ MUTATE_LDBL(llround)
+ MUTATE_LDBL(lrint)
+ MUTATE_LDBL(llrint)
+ MUTATE_LDBL(fmod)
+ MUTATE_LDBL(modf)
+ MUTATE_LDBL(nan)
+ MUTATE_LDBL(nans)
+ MUTATE_LDBL(inf)
+ MUTATE_LDBL(fma)
+ MUTATE_LDBL(sin)
+ MUTATE_LDBL(cos)
+ MUTATE_LDBL(tan)
+ MUTATE_LDBL(sinh)
+ MUTATE_LDBL(cosh)
+ MUTATE_LDBL(tanh)
+ MUTATE_LDBL(asin)
+ MUTATE_LDBL(acos)
+ MUTATE_LDBL(atan)
+ MUTATE_LDBL(asinh)
+ MUTATE_LDBL(acosh)
+ MUTATE_LDBL(atanh)
+ MUTATE_LDBL(atan2)
+ MUTATE_LDBL(erf)
+ MUTATE_LDBL(erfc)
+ MUTATE_LDBL(ldexp)
+ MUTATE_LDBL(frexp)
+ MUTATE_LDBL(huge_val)
+ MUTATE_LDBL(copysign)
+ MUTATE_LDBL(nextafter)
+ MUTATE_LDBL(nexttoward)
+ MUTATE_LDBL(remainder)
+ MUTATE_LDBL(remquo)
+ MUTATE_LDBL(scalbln)
+ MUTATE_LDBL(scalbn)
+ MUTATE_LDBL(tgamma)
+ MUTATE_LDBL(lgamma)
+#undef MUTATE_LDBL
+ default:
+ return BuiltinID;
+ }
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -1658,13 +2195,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Result.Val.getFloat()));
}
+ // If current long-double semantics is IEEE 128-bit, replace math builtins
+ // of long-double with f128 equivalent.
+ // TODO: This mutation should also be applied to other targets other than PPC,
+ // after backend supports IEEE 128-bit style libcalls.
+ if (getTarget().getTriple().isPPC64() &&
+ &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
+ BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
+
+ // If the builtin has been declared explicitly with an assembler label,
+ // disable the specialized emitting below. Ideally we should communicate the
+ // rename in IR, or at least avoid generating the intrinsic calls that are
+ // likely to get lowered to the renamed library functions.
+ const unsigned BuiltinIDIfNoAsmLabel =
+ FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
+
// There are LLVM math intrinsics/instructions corresponding to math library
// functions except the LLVM op will never set errno while the math library
// might. Also, math builtins have the same semantics as their math library
// twins. Thus, we can transform math library and builtin calls to their
// LLVM counterparts if the call is marked 'const' (known to never set errno).
if (FD->hasAttr<ConstAttr>()) {
- switch (BuiltinID) {
+ switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIceil:
case Builtin::BIceilf:
case Builtin::BIceill:
@@ -1672,6 +2224,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_ceilf:
case Builtin::BI__builtin_ceilf16:
case Builtin::BI__builtin_ceill:
+ case Builtin::BI__builtin_ceilf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::ceil,
Intrinsic::experimental_constrained_ceil));
@@ -1693,6 +2246,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_cosf:
case Builtin::BI__builtin_cosf16:
case Builtin::BI__builtin_cosl:
+ case Builtin::BI__builtin_cosf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::cos,
Intrinsic::experimental_constrained_cos));
@@ -1704,6 +2258,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_expf:
case Builtin::BI__builtin_expf16:
case Builtin::BI__builtin_expl:
+ case Builtin::BI__builtin_expf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::exp,
Intrinsic::experimental_constrained_exp));
@@ -1715,6 +2270,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_exp2f:
case Builtin::BI__builtin_exp2f16:
case Builtin::BI__builtin_exp2l:
+ case Builtin::BI__builtin_exp2f128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::exp2,
Intrinsic::experimental_constrained_exp2));
@@ -1736,6 +2292,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_floorf:
case Builtin::BI__builtin_floorf16:
case Builtin::BI__builtin_floorl:
+ case Builtin::BI__builtin_floorf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::floor,
Intrinsic::experimental_constrained_floor));
@@ -1747,6 +2304,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmaf:
case Builtin::BI__builtin_fmaf16:
case Builtin::BI__builtin_fmal:
+ case Builtin::BI__builtin_fmaf128:
return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::fma,
Intrinsic::experimental_constrained_fma));
@@ -1758,6 +2316,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmaxf:
case Builtin::BI__builtin_fmaxf16:
case Builtin::BI__builtin_fmaxl:
+ case Builtin::BI__builtin_fmaxf128:
return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::maxnum,
Intrinsic::experimental_constrained_maxnum));
@@ -1769,6 +2328,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fminf:
case Builtin::BI__builtin_fminf16:
case Builtin::BI__builtin_fminl:
+ case Builtin::BI__builtin_fminf128:
return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::minnum,
Intrinsic::experimental_constrained_minnum));
@@ -1781,7 +2341,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_fmod:
case Builtin::BI__builtin_fmodf:
case Builtin::BI__builtin_fmodf16:
- case Builtin::BI__builtin_fmodl: {
+ case Builtin::BI__builtin_fmodl:
+ case Builtin::BI__builtin_fmodf128: {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *Arg1 = EmitScalarExpr(E->getArg(0));
Value *Arg2 = EmitScalarExpr(E->getArg(1));
return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
@@ -1794,6 +2356,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_logf:
case Builtin::BI__builtin_logf16:
case Builtin::BI__builtin_logl:
+ case Builtin::BI__builtin_logf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::log,
Intrinsic::experimental_constrained_log));
@@ -1805,6 +2368,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_log10f:
case Builtin::BI__builtin_log10f16:
case Builtin::BI__builtin_log10l:
+ case Builtin::BI__builtin_log10f128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::log10,
Intrinsic::experimental_constrained_log10));
@@ -1816,6 +2380,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_log2f:
case Builtin::BI__builtin_log2f16:
case Builtin::BI__builtin_log2l:
+ case Builtin::BI__builtin_log2f128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::log2,
Intrinsic::experimental_constrained_log2));
@@ -1826,6 +2391,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_nearbyint:
case Builtin::BI__builtin_nearbyintf:
case Builtin::BI__builtin_nearbyintl:
+ case Builtin::BI__builtin_nearbyintf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::nearbyint,
Intrinsic::experimental_constrained_nearbyint));
@@ -1837,6 +2403,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_powf:
case Builtin::BI__builtin_powf16:
case Builtin::BI__builtin_powl:
+ case Builtin::BI__builtin_powf128:
return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::pow,
Intrinsic::experimental_constrained_pow));
@@ -1848,6 +2415,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_rintf:
case Builtin::BI__builtin_rintf16:
case Builtin::BI__builtin_rintl:
+ case Builtin::BI__builtin_rintf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::rint,
Intrinsic::experimental_constrained_rint));
@@ -1859,6 +2427,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_roundf:
case Builtin::BI__builtin_roundf16:
case Builtin::BI__builtin_roundl:
+ case Builtin::BI__builtin_roundf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::round,
Intrinsic::experimental_constrained_round));
@@ -1870,6 +2439,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sinf:
case Builtin::BI__builtin_sinf16:
case Builtin::BI__builtin_sinl:
+ case Builtin::BI__builtin_sinf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::sin,
Intrinsic::experimental_constrained_sin));
@@ -1881,6 +2451,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sqrtf:
case Builtin::BI__builtin_sqrtf16:
case Builtin::BI__builtin_sqrtl:
+ case Builtin::BI__builtin_sqrtf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::sqrt,
Intrinsic::experimental_constrained_sqrt));
@@ -1892,6 +2463,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_truncf:
case Builtin::BI__builtin_truncf16:
case Builtin::BI__builtin_truncl:
+ case Builtin::BI__builtin_truncf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::trunc,
Intrinsic::experimental_constrained_trunc));
@@ -1902,6 +2474,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_lround:
case Builtin::BI__builtin_lroundf:
case Builtin::BI__builtin_lroundl:
+ case Builtin::BI__builtin_lroundf128:
return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
*this, E, Intrinsic::lround,
Intrinsic::experimental_constrained_lround));
@@ -1912,6 +2485,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_llround:
case Builtin::BI__builtin_llroundf:
case Builtin::BI__builtin_llroundl:
+ case Builtin::BI__builtin_llroundf128:
return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
*this, E, Intrinsic::llround,
Intrinsic::experimental_constrained_llround));
@@ -1922,6 +2496,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_lrint:
case Builtin::BI__builtin_lrintf:
case Builtin::BI__builtin_lrintl:
+ case Builtin::BI__builtin_lrintf128:
return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
*this, E, Intrinsic::lrint,
Intrinsic::experimental_constrained_lrint));
@@ -1932,6 +2507,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_llrint:
case Builtin::BI__builtin_llrintf:
case Builtin::BI__builtin_llrintl:
+ case Builtin::BI__builtin_llrintf128:
return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
*this, E, Intrinsic::llrint,
Intrinsic::experimental_constrained_llrint));
@@ -1941,7 +2517,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
}
- switch (BuiltinID) {
+ switch (BuiltinIDIfNoAsmLabel) {
default: break;
case Builtin::BI__builtin___CFStringMakeConstantString:
case Builtin::BI__builtin___NSStringMakeConstantString:
@@ -1978,6 +2554,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
return RValue::get(Result);
}
+ case Builtin::BI__builtin_complex: {
+ Value *Real = EmitScalarExpr(E->getArg(0));
+ Value *Imag = EmitScalarExpr(E->getArg(1));
+ return RValue::getComplex({Real, Imag});
+ }
case Builtin::BI__builtin_conj:
case Builtin::BI__builtin_conjf:
case Builtin::BI__builtin_conjl:
@@ -2373,6 +2954,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_isunordered: {
// Ordered comparisons: we know the arguments to these are matching scalar
// floating point values.
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
@@ -2401,6 +2984,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
}
case Builtin::BI__builtin_isnan: {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *V = EmitScalarExpr(E->getArg(0));
V = Builder.CreateFCmpUNO(V, V, "cmp");
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
@@ -2464,6 +3049,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// isinf(x) --> fabs(x) == infinity
// isfinite(x) --> fabs(x) != infinity
// x != NaN via the ordered compare in either case.
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *V = EmitScalarExpr(E->getArg(0));
Value *Fabs = EmitFAbs(*this, V);
Constant *Infinity = ConstantFP::getInfinity(V->getType());
@@ -2476,6 +3063,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_isinf_sign: {
// isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *Arg = EmitScalarExpr(E->getArg(0));
Value *AbsArg = EmitFAbs(*this, Arg);
Value *IsInf = Builder.CreateFCmpOEQ(
@@ -2493,6 +3082,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_isnormal: {
// isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *V = EmitScalarExpr(E->getArg(0));
Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
@@ -2521,6 +3112,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_fpclassify: {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
Value *V = EmitScalarExpr(E->getArg(5));
llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
@@ -3386,7 +3979,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// non-wide string literal, potentially casted, so the cast<> is safe.
const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
- return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
+ return RValue::get(
+ EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
}
case Builtin::BI__builtin_addcb:
case Builtin::BI__builtin_addcs:
@@ -3479,6 +4073,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
RightInfo, ResultArg, ResultQTy,
ResultInfo);
+ if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
+ ResultInfo))
+ return EmitCheckedUnsignedMultiplySignedResult(
+ *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
+ ResultInfo);
+
WidthAndSignedness EncompassingInfo =
EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
@@ -3754,11 +4354,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI_abnormal_termination:
return RValue::get(EmitSEHAbnormalTermination());
case Builtin::BI_setjmpex:
- if (getTarget().getTriple().isOSMSVCRT())
+ if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
+ E->getArg(0)->getType()->isPointerType())
return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
break;
case Builtin::BI_setjmp:
- if (getTarget().getTriple().isOSMSVCRT()) {
+ if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
+ E->getArg(0)->getType()->isPointerType()) {
if (getTarget().getTriple().getArch() == llvm::Triple::x86)
return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
@@ -3838,8 +4440,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, BCast, PacketSize, PacketAlign}));
+ EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ {Arg0, BCast, PacketSize, PacketAlign}));
} else {
assert(4 == E->getNumArgs() &&
"Illegal number of parameters to pipe function");
@@ -3857,9 +4459,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// it to i32.
if (Arg2->getType() != Int32Ty)
Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
- return RValue::get(Builder.CreateCall(
- CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
+ return RValue::get(
+ EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
}
}
// OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
@@ -3900,9 +4502,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// it to i32.
if (Arg1->getType() != Int32Ty)
Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, PacketSize, PacketAlign}));
+ return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ {Arg0, Arg1, PacketSize, PacketAlign}));
}
// OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
// functions
@@ -3938,9 +4539,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, PacketSize, PacketAlign}));
+ return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ {Arg0, Arg1, PacketSize, PacketAlign}));
}
// OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
case Builtin::BIget_pipe_num_packets:
@@ -3963,8 +4563,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, PacketSize, PacketAlign}));
+ return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ {Arg0, PacketSize, PacketAlign}));
}
// OpenCL v2.0 s6.13.9 - Address space qualifier functions.
@@ -3986,7 +4586,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
auto NewCall =
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
+ EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
ConvertType(E->getType())));
}
@@ -4029,8 +4629,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
auto RTCall =
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
- {Queue, Flags, Range, Kernel, Block});
+ EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
+ {Queue, Flags, Range, Kernel, Block});
RTCall->setAttributes(ByValAttrSet);
return RValue::get(RTCall);
}
@@ -4089,7 +4689,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
auto Call = RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
+ EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
if (TmpSize)
EmitLifetimeEnd(TmpSize, TmpPtr);
return Call;
@@ -4147,8 +4747,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
}
// Has event info and variadics
// Pass the number of variadics to the runtime function too.
@@ -4164,8 +4764,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
auto Call =
- RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
if (TmpSize)
EmitLifetimeEnd(TmpSize, TmpPtr);
return Call;
@@ -4181,7 +4781,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- return RValue::get(Builder.CreateCall(
+ return RValue::get(EmitRuntimeCall(
CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
false),
@@ -4195,7 +4795,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- return RValue::get(Builder.CreateCall(
+ return RValue::get(EmitRuntimeCall(
CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
false),
@@ -4216,7 +4816,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
? "__get_kernel_max_sub_group_size_for_ndrange_impl"
: "__get_kernel_sub_group_count_for_ndrange_impl";
- return RValue::get(Builder.CreateCall(
+ return RValue::get(EmitRuntimeCall(
CGM.CreateRuntimeFunction(
llvm::FunctionType::get(
IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
@@ -4419,11 +5019,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
} else {
// If this is required to be a constant, constant fold it so that we
// know that the generated intrinsic gets a ConstantInt.
- llvm::APSInt Result;
- bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
- assert(IsConst && "Constant arg isn't actually constant?");
- (void)IsConst;
- ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
+ ArgValue = llvm::ConstantInt::get(
+ getLLVMContext(),
+ *E->getArg(i)->getIntegerConstantExpr(getContext()));
}
// If the intrinsic arg type is different from the builtin arg type
@@ -4523,6 +5121,7 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::x86_64:
return CGF->EmitX86BuiltinExpr(BuiltinID, E);
case llvm::Triple::ppc:
+ case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
@@ -4558,11 +5157,11 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
getTarget().getTriple().getArch());
}
-static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
- NeonTypeFlags TypeFlags,
- bool HasLegalHalfType = true,
- bool V1Ty = false,
- bool AllowBFloatArgsAndRet = true) {
+static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
+ NeonTypeFlags TypeFlags,
+ bool HasLegalHalfType = true,
+ bool V1Ty = false,
+ bool AllowBFloatArgsAndRet = true) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
case NeonTypeFlags::Int8:
@@ -5075,6 +5674,14 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vcltzq_v),
NEONMAP1(vclz_v, ctlz, Add1ArgType),
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType),
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
@@ -5248,6 +5855,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
@@ -5265,6 +5874,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
@@ -5423,6 +6034,10 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
@@ -5583,8 +6198,8 @@ static Value *EmitCommonNeonSISDBuiltinExpr(
Value *Result = CGF.EmitNeonCall(F, Ops, s);
llvm::Type *ResultType = CGF.ConvertType(E->getType());
- if (ResultType->getPrimitiveSizeInBits() <
- Result->getType()->getPrimitiveSizeInBits())
+ if (ResultType->getPrimitiveSizeInBits().getFixedSize() <
+ Result->getType()->getPrimitiveSizeInBits().getFixedSize())
return CGF.Builder.CreateExtractElement(Result, C0);
return CGF.Builder.CreateBitCast(Result, ResultType, s);
@@ -5596,21 +6211,22 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
llvm::Triple::ArchType Arch) {
// Get the last argument, which specifies the vector type.
- llvm::APSInt NeonTypeConst;
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
+ Optional<llvm::APSInt> NeonTypeConst =
+ Arg->getIntegerConstantExpr(getContext());
+ if (!NeonTypeConst)
return nullptr;
// Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(NeonTypeConst.getZExtValue());
+ NeonTypeFlags Type(NeonTypeConst->getZExtValue());
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
const bool HasLegalHalfType = getTarget().hasLegalHalfType();
const bool AllowBFloatArgsAndRet =
getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
- llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType, false,
- AllowBFloatArgsAndRet);
+ llvm::FixedVectorType *VTy =
+ GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -5633,7 +6249,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
NumElements = NumElements * 2;
if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
- NumElements = NumElements / 2;
+ NumElements = NumElements.divideCoefficientBy(2);
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
@@ -5651,8 +6267,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
case NEON::BI__builtin_neon_vaddhn_v: {
- llvm::VectorType *SrcTy =
- llvm::VectorType::getExtendedElementVectorType(VTy);
+ llvm::FixedVectorType *SrcTy =
+ llvm::FixedVectorType::getExtendedElementVectorType(VTy);
// %sum = add <4 x i32> %lhs, %rhs
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
@@ -5924,14 +6540,16 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vmovl_v: {
- llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
+ llvm::FixedVectorType *DTy =
+ llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
if (Usgn)
return Builder.CreateZExt(Ops[0], Ty, "vmovl");
return Builder.CreateSExt(Ops[0], Ty, "vmovl");
}
case NEON::BI__builtin_neon_vmovn_v: {
- llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
+ llvm::FixedVectorType *QTy =
+ llvm::FixedVectorType::getExtendedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
}
@@ -5977,7 +6595,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vqdmulh_lane_v:
case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
- auto *RTy = cast<llvm::VectorType>(Ty);
+ auto *RTy = cast<llvm::FixedVectorType>(Ty);
if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
RTy = llvm::FixedVectorType::get(RTy->getElementType(),
@@ -6026,7 +6644,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
"vshl_n");
case NEON::BI__builtin_neon_vshll_n_v: {
- llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
+ llvm::FixedVectorType *SrcTy =
+ llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
if (Usgn)
Ops[0] = Builder.CreateZExt(Ops[0], VTy);
@@ -6036,7 +6655,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
}
case NEON::BI__builtin_neon_vshrn_n_v: {
- llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
+ llvm::FixedVectorType *SrcTy =
+ llvm::FixedVectorType::getExtendedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
if (Usgn)
@@ -6085,8 +6705,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
}
case NEON::BI__builtin_neon_vsubhn_v: {
- llvm::VectorType *SrcTy =
- llvm::VectorType::getExtendedElementVectorType(VTy);
+ llvm::FixedVectorType *SrcTy =
+ llvm::FixedVectorType::getExtendedElementVectorType(VTy);
// %sum = add <4 x i32> %lhs, %rhs
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
@@ -6225,28 +6845,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vbfdot_v:
case NEON::BI__builtin_neon_vbfdotq_v: {
llvm::Type *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+ llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
llvm::Type *Tys[2] = { Ty, InputTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
}
- case NEON::BI__builtin_neon_vbfmmlaq_v: {
- llvm::Type *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmmla");
- }
- case NEON::BI__builtin_neon_vbfmlalbq_v: {
- llvm::Type *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalb");
- }
- case NEON::BI__builtin_neon_vbfmlaltq_v: {
- llvm::Type *InputTy =
- llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
- llvm::Type *Tys[2] = { Ty, InputTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalt");
- }
case NEON::BI__builtin_neon___a32_vcvt_bf16_v: {
llvm::Type *Tys[1] = { Ty };
Function *F = CGM.getIntrinsic(Int, Tys);
@@ -6298,7 +6900,7 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
// Build a vector containing sequential number like (0, 1, 2, ..., 15)
SmallVector<int, 16> Indices;
- llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
Indices.push_back(2*i);
Indices.push_back(2*i+1);
@@ -6810,6 +7412,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
AccessKind);
}
+ // Handle MSVC intrinsics before argument evaluation to prevent double
+ // evaluation.
+ if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
+ return EmitMSVCBuiltinExpr(*MsvcIntId, E);
+
// Deal with MVE builtins
if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
return Result;
@@ -6898,10 +7505,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
} else {
// If this is required to be a constant, constant fold it so that we know
// that the generated intrinsic gets a ConstantInt.
- llvm::APSInt Result;
- bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
- assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
- Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+ Ops.push_back(llvm::ConstantInt::get(
+ getLLVMContext(),
+ *E->getArg(i)->getIntegerConstantExpr(getContext())));
}
}
@@ -6971,150 +7577,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
Ops[3], Ops[4], Ops[5]});
}
- case ARM::BI_BitScanForward:
- case ARM::BI_BitScanForward64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
- case ARM::BI_BitScanReverse:
- case ARM::BI_BitScanReverse64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
-
- case ARM::BI_InterlockedAnd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
- case ARM::BI_InterlockedExchange64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
- case ARM::BI_InterlockedExchangeAdd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
- case ARM::BI_InterlockedExchangeSub64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
- case ARM::BI_InterlockedOr64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
- case ARM::BI_InterlockedXor64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
- case ARM::BI_InterlockedDecrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
- case ARM::BI_InterlockedIncrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
- case ARM::BI_InterlockedExchangeAdd8_acq:
- case ARM::BI_InterlockedExchangeAdd16_acq:
- case ARM::BI_InterlockedExchangeAdd_acq:
- case ARM::BI_InterlockedExchangeAdd64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
- case ARM::BI_InterlockedExchangeAdd8_rel:
- case ARM::BI_InterlockedExchangeAdd16_rel:
- case ARM::BI_InterlockedExchangeAdd_rel:
- case ARM::BI_InterlockedExchangeAdd64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
- case ARM::BI_InterlockedExchangeAdd8_nf:
- case ARM::BI_InterlockedExchangeAdd16_nf:
- case ARM::BI_InterlockedExchangeAdd_nf:
- case ARM::BI_InterlockedExchangeAdd64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
- case ARM::BI_InterlockedExchange8_acq:
- case ARM::BI_InterlockedExchange16_acq:
- case ARM::BI_InterlockedExchange_acq:
- case ARM::BI_InterlockedExchange64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
- case ARM::BI_InterlockedExchange8_rel:
- case ARM::BI_InterlockedExchange16_rel:
- case ARM::BI_InterlockedExchange_rel:
- case ARM::BI_InterlockedExchange64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
- case ARM::BI_InterlockedExchange8_nf:
- case ARM::BI_InterlockedExchange16_nf:
- case ARM::BI_InterlockedExchange_nf:
- case ARM::BI_InterlockedExchange64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
- case ARM::BI_InterlockedCompareExchange8_acq:
- case ARM::BI_InterlockedCompareExchange16_acq:
- case ARM::BI_InterlockedCompareExchange_acq:
- case ARM::BI_InterlockedCompareExchange64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
- case ARM::BI_InterlockedCompareExchange8_rel:
- case ARM::BI_InterlockedCompareExchange16_rel:
- case ARM::BI_InterlockedCompareExchange_rel:
- case ARM::BI_InterlockedCompareExchange64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
- case ARM::BI_InterlockedCompareExchange8_nf:
- case ARM::BI_InterlockedCompareExchange16_nf:
- case ARM::BI_InterlockedCompareExchange_nf:
- case ARM::BI_InterlockedCompareExchange64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
- case ARM::BI_InterlockedOr8_acq:
- case ARM::BI_InterlockedOr16_acq:
- case ARM::BI_InterlockedOr_acq:
- case ARM::BI_InterlockedOr64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
- case ARM::BI_InterlockedOr8_rel:
- case ARM::BI_InterlockedOr16_rel:
- case ARM::BI_InterlockedOr_rel:
- case ARM::BI_InterlockedOr64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
- case ARM::BI_InterlockedOr8_nf:
- case ARM::BI_InterlockedOr16_nf:
- case ARM::BI_InterlockedOr_nf:
- case ARM::BI_InterlockedOr64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
- case ARM::BI_InterlockedXor8_acq:
- case ARM::BI_InterlockedXor16_acq:
- case ARM::BI_InterlockedXor_acq:
- case ARM::BI_InterlockedXor64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
- case ARM::BI_InterlockedXor8_rel:
- case ARM::BI_InterlockedXor16_rel:
- case ARM::BI_InterlockedXor_rel:
- case ARM::BI_InterlockedXor64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
- case ARM::BI_InterlockedXor8_nf:
- case ARM::BI_InterlockedXor16_nf:
- case ARM::BI_InterlockedXor_nf:
- case ARM::BI_InterlockedXor64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
- case ARM::BI_InterlockedAnd8_acq:
- case ARM::BI_InterlockedAnd16_acq:
- case ARM::BI_InterlockedAnd_acq:
- case ARM::BI_InterlockedAnd64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
- case ARM::BI_InterlockedAnd8_rel:
- case ARM::BI_InterlockedAnd16_rel:
- case ARM::BI_InterlockedAnd_rel:
- case ARM::BI_InterlockedAnd64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
- case ARM::BI_InterlockedAnd8_nf:
- case ARM::BI_InterlockedAnd16_nf:
- case ARM::BI_InterlockedAnd_nf:
- case ARM::BI_InterlockedAnd64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
- case ARM::BI_InterlockedIncrement16_acq:
- case ARM::BI_InterlockedIncrement_acq:
- case ARM::BI_InterlockedIncrement64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
- case ARM::BI_InterlockedIncrement16_rel:
- case ARM::BI_InterlockedIncrement_rel:
- case ARM::BI_InterlockedIncrement64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
- case ARM::BI_InterlockedIncrement16_nf:
- case ARM::BI_InterlockedIncrement_nf:
- case ARM::BI_InterlockedIncrement64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
- case ARM::BI_InterlockedDecrement16_acq:
- case ARM::BI_InterlockedDecrement_acq:
- case ARM::BI_InterlockedDecrement64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
- case ARM::BI_InterlockedDecrement16_rel:
- case ARM::BI_InterlockedDecrement_rel:
- case ARM::BI_InterlockedDecrement64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
- case ARM::BI_InterlockedDecrement16_nf:
- case ARM::BI_InterlockedDecrement_nf:
- case ARM::BI_InterlockedDecrement64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
}
// Get the last argument, which specifies the vector type.
assert(HasExtraArg);
- llvm::APSInt Result;
const Expr *Arg = E->getArg(E->getNumArgs()-1);
- if (!Arg->isIntegerConstantExpr(Result, getContext()))
+ Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext());
+ if (!Result)
return nullptr;
if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
@@ -7127,7 +7596,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Ty = DoubleTy;
// Determine whether this is an unsigned conversion or not.
- bool usgn = Result.getZExtValue() == 1;
+ bool usgn = Result->getZExtValue() == 1;
unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
// Call the appropriate intrinsic.
@@ -7136,14 +7605,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
// Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(Result.getZExtValue());
+ NeonTypeFlags Type = Result->getZExtValue();
bool usgn = Type.isUnsigned();
bool rightShift = false;
- llvm::VectorType *VTy = GetNeonType(this, Type,
- getTarget().hasLegalHalfType(),
- false,
- getTarget().hasBFloat16Type());
+ llvm::FixedVectorType *VTy =
+ GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
+ getTarget().hasBFloat16Type());
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -7280,11 +7748,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
template<typename Integer>
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
- llvm::APSInt IntVal;
- bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
- assert(IsConst && "Sema should have checked this was a constant");
- (void)IsConst;
- return IntVal.getExtValue();
+ return E->getIntegerConstantExpr(Context)->getExtValue();
}
static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
@@ -7355,11 +7819,10 @@ static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
// or odds, as desired).
SmallVector<int, 16> Indices;
unsigned InputElements =
- cast<llvm::VectorType>(V->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(V->getType())->getNumElements();
for (unsigned i = 0; i < InputElements; i += 2)
Indices.push_back(i + Odd);
- return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
- Indices);
+ return Builder.CreateShuffleVector(V, Indices);
}
static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
@@ -7368,7 +7831,7 @@ static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
assert(V0->getType() == V1->getType() && "Can't zip different vector types");
SmallVector<int, 16> Indices;
unsigned InputElements =
- cast<llvm::VectorType>(V0->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
for (unsigned i = 0; i < InputElements; i++) {
Indices.push_back(i);
Indices.push_back(i + InputElements);
@@ -7400,8 +7863,7 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
unsigned Mask = ReverseWidth / LaneSize - 1;
for (unsigned i = 0; i < Elements; i++)
Indices.push_back(i ^ Mask);
- return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
- Indices);
+ return Builder.CreateShuffleVector(V, Indices);
}
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
@@ -7557,14 +8019,14 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
assert(E->getNumArgs() >= 3);
// Get the last argument, which specifies the vector type.
- llvm::APSInt Result;
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+ Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext());
+ if (!Result)
return nullptr;
// Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(Result.getZExtValue());
- llvm::VectorType *Ty = GetNeonType(&CGF, Type);
+ NeonTypeFlags Type = Result->getZExtValue();
+ llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
if (!Ty)
return nullptr;
@@ -8240,15 +8702,15 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
else {
// If this is required to be a constant, constant fold it so that we know
// that the generated intrinsic gets a ConstantInt.
- llvm::APSInt Result;
- if (!E->getArg(i)->isIntegerConstantExpr(Result, getContext()))
- llvm_unreachable("Expected argument to be a constant");
+ Optional<llvm::APSInt> Result =
+ E->getArg(i)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
// Immediates for SVE llvm intrinsics are always 32bit. We can safely
// truncate because the immediate has been range checked and no valid
// immediate requires more than a handful of bits.
- Result = Result.extOrTrunc(32);
- Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+ *Result = Result->extOrTrunc(32);
+ Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
}
}
@@ -8465,7 +8927,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svlen_u64: {
SVETypeFlags TF(Builtin->TypeModifier);
auto VTy = cast<llvm::VectorType>(getSVEType(TF));
- auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min);
+ auto *NumEls =
+ llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
return Builder.CreateMul(NumEls, Builder.CreateCall(F));
@@ -8485,8 +8948,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svtbl2_f64: {
SVETypeFlags TF(Builtin->TypeModifier);
auto VTy = cast<llvm::VectorType>(getSVEType(TF));
- auto TupleTy = llvm::VectorType::get(VTy->getElementType(),
- VTy->getElementCount() * 2);
+ auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy);
Function *FExtr =
CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)});
@@ -8597,6 +9059,46 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
}
+ if (BuiltinID == AArch64::BI__builtin_arm_ld64b ||
+ BuiltinID == AArch64::BI__builtin_arm_st64b ||
+ BuiltinID == AArch64::BI__builtin_arm_st64bv ||
+ BuiltinID == AArch64::BI__builtin_arm_st64bv0) {
+ llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
+ llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
+
+ if (BuiltinID == AArch64::BI__builtin_arm_ld64b) {
+ // Load from the address via an LLVM intrinsic, receiving a
+ // tuple of 8 i64 words, and store each one to ValPtr.
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
+ llvm::Value *Val = Builder.CreateCall(F, MemAddr);
+ llvm::Value *ToRet;
+ for (size_t i = 0; i < 8; i++) {
+ llvm::Value *ValOffsetPtr = Builder.CreateGEP(ValPtr, Builder.getInt32(i));
+ Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8));
+ ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
+ }
+ return ToRet;
+ } else {
+ // Load 8 i64 words from ValPtr, and store them to the address
+ // via an LLVM intrinsic.
+ SmallVector<llvm::Value *, 9> Args;
+ Args.push_back(MemAddr);
+ for (size_t i = 0; i < 8; i++) {
+ llvm::Value *ValOffsetPtr = Builder.CreateGEP(ValPtr, Builder.getInt32(i));
+ Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8));
+ Args.push_back(Builder.CreateLoad(Addr));
+ }
+
+ auto Intr = (BuiltinID == AArch64::BI__builtin_arm_st64b
+ ? Intrinsic::aarch64_st64b
+ : BuiltinID == AArch64::BI__builtin_arm_st64bv
+ ? Intrinsic::aarch64_st64bv
+ : Intrinsic::aarch64_st64bv0);
+ Function *F = CGM.getIntrinsic(Intr);
+ return Builder.CreateCall(F, Args);
+ }
+ }
+
if (BuiltinID == AArch64::BI__clear_cache) {
assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
const FunctionDecl *FD = E->getDirectCallee();
@@ -8918,6 +9420,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F);
}
+ // Handle MSVC intrinsics before argument evaluation to prevent double
+ // evaluation.
+ if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
+ return EmitMSVCBuiltinExpr(*MsvcIntId, E);
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -8952,11 +9459,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
} else {
// If this is required to be a constant, constant fold it so that we know
// that the generated intrinsic gets a ConstantInt.
- llvm::APSInt Result;
- bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
- assert(IsConst && "Constant arg isn't actually constant?");
- (void)IsConst;
- Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+ Ops.push_back(llvm::ConstantInt::get(
+ getLLVMContext(),
+ *E->getArg(i)->getIntegerConstantExpr(getContext())));
}
}
@@ -8971,12 +9476,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Result;
}
- llvm::APSInt Result;
const Expr *Arg = E->getArg(E->getNumArgs()-1);
NeonTypeFlags Type(0);
- if (Arg->isIntegerConstantExpr(Result, getContext()))
+ if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()))
// Determine the type of this overloaded NEON intrinsic.
- Type = NeonTypeFlags(Result.getZExtValue());
+ Type = NeonTypeFlags(Result->getZExtValue());
bool usgn = Type.isUnsigned();
bool quad = Type.isQuad();
@@ -8999,21 +9503,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
}
- case NEON::BI__builtin_neon_vcvts_u32_f32:
- case NEON::BI__builtin_neon_vcvtd_u64_f64:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcvts_s32_f32:
- case NEON::BI__builtin_neon_vcvtd_s64_f64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
- llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
- llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
- Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
- if (usgn)
- return Builder.CreateFPToUI(Ops[0], InTy);
- return Builder.CreateFPToSI(Ops[0], InTy);
- }
case NEON::BI__builtin_neon_vcvts_f32_u32:
case NEON::BI__builtin_neon_vcvtd_f64_u64:
usgn = true;
@@ -9051,44 +9540,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateUIToFP(Ops[0], FTy);
return Builder.CreateSIToFP(Ops[0], FTy);
}
- case NEON::BI__builtin_neon_vcvth_u16_f16:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcvth_s16_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
- if (usgn)
- return Builder.CreateFPToUI(Ops[0], Int16Ty);
- return Builder.CreateFPToSI(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vcvth_u32_f16:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcvth_s32_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
- if (usgn)
- return Builder.CreateFPToUI(Ops[0], Int32Ty);
- return Builder.CreateFPToSI(Ops[0], Int32Ty);
- }
- case NEON::BI__builtin_neon_vcvth_u64_f16:
- usgn = true;
- LLVM_FALLTHROUGH;
- case NEON::BI__builtin_neon_vcvth_s64_f16: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
- if (usgn)
- return Builder.CreateFPToUI(Ops[0], Int64Ty);
- return Builder.CreateFPToSI(Ops[0], Int64Ty);
- }
case NEON::BI__builtin_neon_vcvtah_u16_f16:
case NEON::BI__builtin_neon_vcvtmh_u16_f16:
case NEON::BI__builtin_neon_vcvtnh_u16_f16:
case NEON::BI__builtin_neon_vcvtph_u16_f16:
+ case NEON::BI__builtin_neon_vcvth_u16_f16:
case NEON::BI__builtin_neon_vcvtah_s16_f16:
case NEON::BI__builtin_neon_vcvtmh_s16_f16:
case NEON::BI__builtin_neon_vcvtnh_s16_f16:
- case NEON::BI__builtin_neon_vcvtph_s16_f16: {
+ case NEON::BI__builtin_neon_vcvtph_s16_f16:
+ case NEON::BI__builtin_neon_vcvth_s16_f16: {
unsigned Int;
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
@@ -9104,6 +9565,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fcvtnu; break;
case NEON::BI__builtin_neon_vcvtph_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtpu; break;
+ case NEON::BI__builtin_neon_vcvth_u16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtzu; break;
case NEON::BI__builtin_neon_vcvtah_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtas; break;
case NEON::BI__builtin_neon_vcvtmh_s16_f16:
@@ -9112,6 +9575,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fcvtns; break;
case NEON::BI__builtin_neon_vcvtph_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtps; break;
+ case NEON::BI__builtin_neon_vcvth_s16_f16:
+ Int = Intrinsic::aarch64_neon_fcvtzs; break;
}
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
return Builder.CreateTrunc(Ops[0], Int16Ty);
@@ -9661,142 +10126,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
"vgetq_lane");
}
- case AArch64::BI_BitScanForward:
- case AArch64::BI_BitScanForward64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
- case AArch64::BI_BitScanReverse:
- case AArch64::BI_BitScanReverse64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
- case AArch64::BI_InterlockedAnd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
- case AArch64::BI_InterlockedExchange64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
- case AArch64::BI_InterlockedExchangeAdd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
- case AArch64::BI_InterlockedExchangeSub64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
- case AArch64::BI_InterlockedOr64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
- case AArch64::BI_InterlockedXor64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
- case AArch64::BI_InterlockedDecrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
- case AArch64::BI_InterlockedIncrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
- case AArch64::BI_InterlockedExchangeAdd8_acq:
- case AArch64::BI_InterlockedExchangeAdd16_acq:
- case AArch64::BI_InterlockedExchangeAdd_acq:
- case AArch64::BI_InterlockedExchangeAdd64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
- case AArch64::BI_InterlockedExchangeAdd8_rel:
- case AArch64::BI_InterlockedExchangeAdd16_rel:
- case AArch64::BI_InterlockedExchangeAdd_rel:
- case AArch64::BI_InterlockedExchangeAdd64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
- case AArch64::BI_InterlockedExchangeAdd8_nf:
- case AArch64::BI_InterlockedExchangeAdd16_nf:
- case AArch64::BI_InterlockedExchangeAdd_nf:
- case AArch64::BI_InterlockedExchangeAdd64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
- case AArch64::BI_InterlockedExchange8_acq:
- case AArch64::BI_InterlockedExchange16_acq:
- case AArch64::BI_InterlockedExchange_acq:
- case AArch64::BI_InterlockedExchange64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
- case AArch64::BI_InterlockedExchange8_rel:
- case AArch64::BI_InterlockedExchange16_rel:
- case AArch64::BI_InterlockedExchange_rel:
- case AArch64::BI_InterlockedExchange64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
- case AArch64::BI_InterlockedExchange8_nf:
- case AArch64::BI_InterlockedExchange16_nf:
- case AArch64::BI_InterlockedExchange_nf:
- case AArch64::BI_InterlockedExchange64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
- case AArch64::BI_InterlockedCompareExchange8_acq:
- case AArch64::BI_InterlockedCompareExchange16_acq:
- case AArch64::BI_InterlockedCompareExchange_acq:
- case AArch64::BI_InterlockedCompareExchange64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
- case AArch64::BI_InterlockedCompareExchange8_rel:
- case AArch64::BI_InterlockedCompareExchange16_rel:
- case AArch64::BI_InterlockedCompareExchange_rel:
- case AArch64::BI_InterlockedCompareExchange64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
- case AArch64::BI_InterlockedCompareExchange8_nf:
- case AArch64::BI_InterlockedCompareExchange16_nf:
- case AArch64::BI_InterlockedCompareExchange_nf:
- case AArch64::BI_InterlockedCompareExchange64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
- case AArch64::BI_InterlockedOr8_acq:
- case AArch64::BI_InterlockedOr16_acq:
- case AArch64::BI_InterlockedOr_acq:
- case AArch64::BI_InterlockedOr64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
- case AArch64::BI_InterlockedOr8_rel:
- case AArch64::BI_InterlockedOr16_rel:
- case AArch64::BI_InterlockedOr_rel:
- case AArch64::BI_InterlockedOr64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
- case AArch64::BI_InterlockedOr8_nf:
- case AArch64::BI_InterlockedOr16_nf:
- case AArch64::BI_InterlockedOr_nf:
- case AArch64::BI_InterlockedOr64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
- case AArch64::BI_InterlockedXor8_acq:
- case AArch64::BI_InterlockedXor16_acq:
- case AArch64::BI_InterlockedXor_acq:
- case AArch64::BI_InterlockedXor64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
- case AArch64::BI_InterlockedXor8_rel:
- case AArch64::BI_InterlockedXor16_rel:
- case AArch64::BI_InterlockedXor_rel:
- case AArch64::BI_InterlockedXor64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
- case AArch64::BI_InterlockedXor8_nf:
- case AArch64::BI_InterlockedXor16_nf:
- case AArch64::BI_InterlockedXor_nf:
- case AArch64::BI_InterlockedXor64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
- case AArch64::BI_InterlockedAnd8_acq:
- case AArch64::BI_InterlockedAnd16_acq:
- case AArch64::BI_InterlockedAnd_acq:
- case AArch64::BI_InterlockedAnd64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
- case AArch64::BI_InterlockedAnd8_rel:
- case AArch64::BI_InterlockedAnd16_rel:
- case AArch64::BI_InterlockedAnd_rel:
- case AArch64::BI_InterlockedAnd64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
- case AArch64::BI_InterlockedAnd8_nf:
- case AArch64::BI_InterlockedAnd16_nf:
- case AArch64::BI_InterlockedAnd_nf:
- case AArch64::BI_InterlockedAnd64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
- case AArch64::BI_InterlockedIncrement16_acq:
- case AArch64::BI_InterlockedIncrement_acq:
- case AArch64::BI_InterlockedIncrement64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
- case AArch64::BI_InterlockedIncrement16_rel:
- case AArch64::BI_InterlockedIncrement_rel:
- case AArch64::BI_InterlockedIncrement64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
- case AArch64::BI_InterlockedIncrement16_nf:
- case AArch64::BI_InterlockedIncrement_nf:
- case AArch64::BI_InterlockedIncrement64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
- case AArch64::BI_InterlockedDecrement16_acq:
- case AArch64::BI_InterlockedDecrement_acq:
- case AArch64::BI_InterlockedDecrement64_acq:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
- case AArch64::BI_InterlockedDecrement16_rel:
- case AArch64::BI_InterlockedDecrement_rel:
- case AArch64::BI_InterlockedDecrement64_rel:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
- case AArch64::BI_InterlockedDecrement16_nf:
- case AArch64::BI_InterlockedDecrement_nf:
- case AArch64::BI_InterlockedDecrement64_nf:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
case AArch64::BI_InterlockedAdd: {
Value *Arg0 = EmitScalarExpr(E->getArg(0));
@@ -9808,7 +10137,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
}
- llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::FixedVectorType *VTy = GetNeonType(this, Type);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
@@ -9869,13 +10198,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
}
case NEON::BI__builtin_neon_vfma_laneq_v: {
- llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+ auto *VTy = cast<llvm::FixedVectorType>(Ty);
// v1f64 fma should be mapped to Neon scalar f64 fma
if (VTy && VTy->getElementType() == DoubleTy) {
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
- llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ llvm::FixedVectorType *VTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
Value *Result;
@@ -10152,10 +10481,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvtq_u64_v:
case NEON::BI__builtin_neon_vcvtq_s16_v:
case NEON::BI__builtin_neon_vcvtq_u16_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
- if (usgn)
- return Builder.CreateFPToUI(Ops[0], Ty);
- return Builder.CreateFPToSI(Ops[0], Ty);
+ Int =
+ usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
+ llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
}
case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_u16_v:
@@ -10243,8 +10572,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
- llvm::Type *VTy = GetNeonType(this,
- NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
+ llvm::FixedVectorType *VTy =
+ GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -10760,8 +11089,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld2q_lane_v: {
llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
- Ops.push_back(Ops[1]);
- Ops.erase(Ops.begin()+1);
+ std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
@@ -10774,8 +11102,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld3q_lane_v: {
llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
- Ops.push_back(Ops[1]);
- Ops.erase(Ops.begin()+1);
+ std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
@@ -10789,8 +11116,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld4q_lane_v: {
llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
- Ops.push_back(Ops[1]);
- Ops.erase(Ops.begin()+1);
+ std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
@@ -10803,16 +11129,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vst2_v:
case NEON::BI__builtin_neon_vst2q_v: {
- Ops.push_back(Ops[0]);
- Ops.erase(Ops.begin());
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
Ops, "");
}
case NEON::BI__builtin_neon_vst2_lane_v:
case NEON::BI__builtin_neon_vst2q_lane_v: {
- Ops.push_back(Ops[0]);
- Ops.erase(Ops.begin());
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
@@ -10820,16 +11144,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vst3_v:
case NEON::BI__builtin_neon_vst3q_v: {
- Ops.push_back(Ops[0]);
- Ops.erase(Ops.begin());
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
Ops, "");
}
case NEON::BI__builtin_neon_vst3_lane_v:
case NEON::BI__builtin_neon_vst3q_lane_v: {
- Ops.push_back(Ops[0]);
- Ops.erase(Ops.begin());
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
@@ -10837,16 +11159,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vst4_v:
case NEON::BI__builtin_neon_vst4q_v: {
- Ops.push_back(Ops[0]);
- Ops.erase(Ops.begin());
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
Ops, "");
}
case NEON::BI__builtin_neon_vst4_lane_v:
case NEON::BI__builtin_neon_vst4q_lane_v: {
- Ops.push_back(Ops[0]);
- Ops.erase(Ops.begin());
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
@@ -10956,9 +11276,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
- BuiltinID == BPF::BI__builtin_btf_type_id) &&
+ BuiltinID == BPF::BI__builtin_btf_type_id ||
+ BuiltinID == BPF::BI__builtin_preserve_type_info ||
+ BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
"unexpected BPF builtin");
+ // A sequence number, injected into IR builtin functions, to
+ // prevent CSE given the only difference of the funciton
+ // may just be the debuginfo metadata.
+ static uint32_t BuiltinSeqNum;
+
switch (BuiltinID) {
default:
llvm_unreachable("Unexpected BPF builtin");
@@ -10989,65 +11316,65 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
{FieldAddr->getType()});
return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
}
- case BPF::BI__builtin_btf_type_id: {
- Value *FieldVal = nullptr;
-
- // The LValue cannot be converted Value in order to be used as the function
- // parameter. If it is a structure, it is the "alloca" result of the LValue
- // (a pointer) is used in the parameter. If it is a simple type,
- // the value will be loaded from its corresponding "alloca" and used as
- // the parameter. In our case, let us just get a pointer of the LValue
- // since we do not really use the parameter. The purpose of parameter
- // is to prevent the generated IR llvm.bpf.btf.type.id intrinsic call,
- // which carries metadata, from being changed.
- bool IsLValue = E->getArg(0)->isLValue();
- if (IsLValue)
- FieldVal = EmitLValue(E->getArg(0)).getPointer(*this);
- else
- FieldVal = EmitScalarExpr(E->getArg(0));
+ case BPF::BI__builtin_btf_type_id:
+ case BPF::BI__builtin_preserve_type_info: {
+ if (!getDebugInfo()) {
+ CGM.Error(E->getExprLoc(), "using builtin function without -g");
+ return nullptr;
+ }
+
+ const Expr *Arg0 = E->getArg(0);
+ llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
+ Arg0->getType(), Arg0->getExprLoc());
+
+ ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
+ Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
+ Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
+ llvm::Function *FnDecl;
+ if (BuiltinID == BPF::BI__builtin_btf_type_id)
+ FnDecl = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
+ else
+ FnDecl = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
+ CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+ return Fn;
+ }
+ case BPF::BI__builtin_preserve_enum_value: {
if (!getDebugInfo()) {
- CGM.Error(E->getExprLoc(), "using __builtin_btf_type_id() without -g");
+ CGM.Error(E->getExprLoc(), "using builtin function without -g");
return nullptr;
}
- // Generate debuginfo type for the first argument.
- llvm::DIType *DbgInfo =
- getDebugInfo()->getOrCreateStandaloneType(E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc());
+ const Expr *Arg0 = E->getArg(0);
+ llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
+ Arg0->getType(), Arg0->getExprLoc());
+
+ // Find enumerator
+ const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
+ const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
+ const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
+ const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
+
+ auto &InitVal = Enumerator->getInitVal();
+ std::string InitValStr;
+ if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
+ InitValStr = std::to_string(InitVal.getSExtValue());
+ else
+ InitValStr = std::to_string(InitVal.getZExtValue());
+ std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
+ Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
+ Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
- // Built the IR for the btf_type_id intrinsic.
- //
- // In the above, we converted LValue argument to a pointer to LValue.
- // For example, the following
- // int v;
- // C1: __builtin_btf_type_id(v, flag);
- // will be converted to
- // L1: llvm.bpf.btf.type.id(&v, flag)
- // This makes it hard to differentiate from
- // C2: __builtin_btf_type_id(&v, flag);
- // to
- // L2: llvm.bpf.btf.type.id(&v, flag)
- //
- // If both C1 and C2 are present in the code, the llvm may later
- // on do CSE on L1 and L2, which will result in incorrect tagged types.
- //
- // The C1->L1 transformation only happens if the argument of
- // __builtin_btf_type_id() is a LValue. So Let us put whether
- // the argument is an LValue or not into generated IR. This should
- // prevent potential CSE from causing debuginfo type loss.
- //
- // The generated IR intrinsics will hence look like
- // L1: llvm.bpf.btf.type.id(&v, 1, flag) !di_type_for_{v};
- // L2: llvm.bpf.btf.type.id(&v, 0, flag) !di_type_for_{&v};
- Constant *CV = ConstantInt::get(IntTy, IsLValue);
- llvm::Function *FnBtfTypeId = llvm::Intrinsic::getDeclaration(
- &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id,
- {FieldVal->getType(), CV->getType()});
- CallInst *Fn = Builder.CreateCall(FnBtfTypeId, {FieldVal, CV, FlagValue});
+ llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
+ CallInst *Fn =
+ Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
return Fn;
}
@@ -11109,7 +11436,8 @@ static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
llvm::PointerType::getUnqual(Ops[1]->getType()));
Value *MaskVec = getMaskVecValue(
- CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements());
+ CGF, Ops[2],
+ cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
}
@@ -11121,7 +11449,8 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
llvm::PointerType::getUnqual(Ops[1]->getType()));
Value *MaskVec = getMaskVecValue(
- CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements());
+ CGF, Ops[2],
+ cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
return CGF.Builder.CreateMaskedLoad(Ptr, Alignment, MaskVec, Ops[1]);
}
@@ -11135,7 +11464,8 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
+ Value *MaskVec = getMaskVecValue(
+ CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
ResultTy);
@@ -11145,7 +11475,7 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
ArrayRef<Value *> Ops,
bool IsCompress) {
- auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
+ auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
@@ -11157,7 +11487,7 @@ static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
static Value *EmitX86CompressStore(CodeGenFunction &CGF,
ArrayRef<Value *> Ops) {
- auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
+ auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
llvm::Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
@@ -11193,7 +11523,7 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
- unsigned NumElts = cast<llvm::VectorType>(Ty)->getNumElements();
+ unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
}
@@ -11252,7 +11582,7 @@ static Value *EmitX86Select(CodeGenFunction &CGF,
return Op0;
Mask = getMaskVecValue(
- CGF, Mask, cast<llvm::VectorType>(Op0->getType())->getNumElements());
+ CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
}
@@ -11299,7 +11629,7 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
assert((Ops.size() == 2 || Ops.size() == 4) &&
"Unexpected number of arguments");
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
Value *Cmp;
if (CC == 3) {
@@ -11353,25 +11683,6 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
}
-static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
-
- llvm::Type *Ty = Ops[0]->getType();
- Value *Zero = llvm::Constant::getNullValue(Ty);
- Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
- Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
- Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
- return Res;
-}
-
-static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
- ArrayRef<Value *> Ops) {
- Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
- Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
-
- assert(Ops.size() == 2);
- return Res;
-}
-
// Lowers X86 FMA intrinsics to IR.
static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
unsigned BuiltinID, bool IsAddSub) {
@@ -11576,18 +11887,15 @@ static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
llvm::Type *DstTy) {
- unsigned NumberOfElements = cast<llvm::VectorType>(DstTy)->getNumElements();
+ unsigned NumberOfElements =
+ cast<llvm::FixedVectorType>(DstTy)->getNumElements();
Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
-// Emit addition or subtraction with signed/unsigned saturation.
-static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops, bool IsSigned,
- bool IsAddition) {
- Intrinsic::ID IID =
- IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
- : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
+// Emit binary intrinsic with the same type used in result/args.
+static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops, Intrinsic::ID IID) {
llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
}
@@ -11612,14 +11920,14 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
}
- unsigned NumDstElts = cast<llvm::VectorType>(DstTy)->getNumElements();
+ unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
Value *Src = Ops[0];
// Extract the subvector.
- if (NumDstElts != cast<llvm::VectorType>(Src->getType())->getNumElements()) {
+ if (NumDstElts !=
+ cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
assert(NumDstElts == 4 && "Unexpected vector size");
- Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()),
- ArrayRef<int>{0, 1, 2, 3});
+ Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
}
// Bitcast from vXi16 to vXf16.
@@ -11790,7 +12098,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
if (BuiltinID == X86::BI__builtin_cpu_init)
return EmitX86CpuInit();
+ // Handle MSVC intrinsics before argument evaluation to prevent double
+ // evaluation.
+ if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
+ return EmitMSVCBuiltinExpr(*MsvcIntId, E);
+
SmallVector<Value*, 4> Ops;
+ bool IsMaskFCmp = false;
// Find out if any arguments are required to be integer constant expressions.
unsigned ICEArguments = 0;
@@ -11807,10 +12121,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// If this is required to be a constant, constant fold it so that we know
// that the generated intrinsic gets a ConstantInt.
- llvm::APSInt Result;
- bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
- assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
- Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+ Ops.push_back(llvm::ConstantInt::get(
+ getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
}
// These exist so that the builtin that takes an immediate can be bounds
@@ -11916,7 +12228,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vec_ext_v8si:
case X86::BI__builtin_ia32_vec_ext_v4di: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
Index &= NumElts - 1;
// These builtins exist so we can ensure the index is an ICE and in range.
@@ -11932,7 +12244,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vec_set_v8si:
case X86::BI__builtin_ia32_vec_set_v4di: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
Index &= NumElts - 1;
// These builtins exist so we can ensure the index is an ICE and in range.
@@ -12358,9 +12670,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
break;
}
- unsigned MinElts =
- std::min(cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(),
- cast<llvm::VectorType>(Ops[2]->getType())->getNumElements());
+ unsigned MinElts = std::min(
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
+ cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
Function *Intr = CGM.getIntrinsic(IID);
return Builder.CreateCall(Intr, Ops);
@@ -12467,9 +12779,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
break;
}
- unsigned MinElts =
- std::min(cast<llvm::VectorType>(Ops[2]->getType())->getNumElements(),
- cast<llvm::VectorType>(Ops[3]->getType())->getNumElements());
+ unsigned MinElts = std::min(
+ cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
+ cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
Function *Intr = CGM.getIntrinsic(IID);
return Builder.CreateCall(Intr, Ops);
@@ -12491,10 +12803,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_extracti64x2_256_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask: {
- auto *DstTy = cast<llvm::VectorType>(ConvertType(E->getType()));
+ auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
unsigned NumElts = DstTy->getNumElements();
unsigned SrcNumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
unsigned SubVectors = SrcNumElts / NumElts;
unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
@@ -12506,7 +12818,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[i] = i + Index;
Value *Res = Builder.CreateShuffleVector(Ops[0],
- UndefValue::get(Ops[0]->getType()),
makeArrayRef(Indices, NumElts),
"extract");
@@ -12532,9 +12843,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_insertf64x2_512:
case X86::BI__builtin_ia32_inserti64x2_512: {
unsigned DstNumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
unsigned SrcNumElts =
- cast<llvm::VectorType>(Ops[1]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
unsigned SubVectors = DstNumElts / SrcNumElts;
unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
@@ -12546,7 +12857,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
Value *Op1 = Builder.CreateShuffleVector(Ops[1],
- UndefValue::get(Ops[1]->getType()),
makeArrayRef(Indices, DstNumElts),
"widen");
@@ -12599,7 +12909,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pblendd128:
case X86::BI__builtin_ia32_pblendd256: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
int Indices[16];
@@ -12616,7 +12926,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
unsigned NumElts = Ty->getNumElements();
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
@@ -12632,15 +12942,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Indices[l + i] = l + i;
}
- return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
- makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
"pshuflw");
}
case X86::BI__builtin_ia32_pshufhw:
case X86::BI__builtin_ia32_pshufhw256:
case X86::BI__builtin_ia32_pshufhw512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
unsigned NumElts = Ty->getNumElements();
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
@@ -12656,8 +12965,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
- makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
"pshufhw");
}
case X86::BI__builtin_ia32_pshufd:
@@ -12670,7 +12978,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vpermilpd512:
case X86::BI__builtin_ia32_vpermilps512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
unsigned NumElts = Ty->getNumElements();
unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
@@ -12686,8 +12994,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
}
- return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
- makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
"permil");
}
case X86::BI__builtin_ia32_shufpd:
@@ -12697,7 +13004,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
unsigned NumElts = Ty->getNumElements();
unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
@@ -12725,7 +13032,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_permdi512:
case X86::BI__builtin_ia32_permdf512: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
- auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
unsigned NumElts = Ty->getNumElements();
// These intrinsics operate on 256-bit lanes of four 64-bit elements.
@@ -12734,8 +13041,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
- return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
- makeArrayRef(Indices, NumElts),
+ return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
"perm");
}
case X86::BI__builtin_ia32_palignr128:
@@ -12744,7 +13050,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
assert(NumElts % 16 == 0);
// If palignr is shifting the pair of vectors more than the size of two
@@ -12782,7 +13088,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
// Mask the shift amount to width of two vectors.
@@ -12805,7 +13111,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_shuf_i32x4:
case X86::BI__builtin_ia32_shuf_i64x2: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
- auto *Ty = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
unsigned NumElts = Ty->getNumElements();
unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
unsigned NumLaneElts = NumElts / NumLanes;
@@ -12832,7 +13138,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_permti256: {
unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
// This takes a very simple approach since there are two lanes and a
// shuffle can have 2 inputs. So we reserve the first input for the first
@@ -12870,7 +13176,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift: {
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
// Builtin type is vXi64 so multiply by 8 to get bytes.
unsigned NumElts = ResultType->getNumElements() * 8;
@@ -12900,7 +13206,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_psrldqi256_byteshift:
case X86::BI__builtin_ia32_psrldqi512_byteshift: {
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
- auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType());
+ auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
// Builtin type is vXi64 so multiply by 8 to get bytes.
unsigned NumElts = ResultType->getNumElements() * 8;
@@ -13342,9 +13648,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pabsb512:
case X86::BI__builtin_ia32_pabsw512:
case X86::BI__builtin_ia32_pabsd512:
- case X86::BI__builtin_ia32_pabsq512:
- return EmitX86Abs(*this, Ops);
-
+ case X86::BI__builtin_ia32_pabsq512: {
+ Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
+ }
case X86::BI__builtin_ia32_pmaxsb128:
case X86::BI__builtin_ia32_pmaxsw128:
case X86::BI__builtin_ia32_pmaxsd128:
@@ -13357,7 +13664,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pmaxsw512:
case X86::BI__builtin_ia32_pmaxsd512:
case X86::BI__builtin_ia32_pmaxsq512:
- return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax);
case X86::BI__builtin_ia32_pmaxub128:
case X86::BI__builtin_ia32_pmaxuw128:
case X86::BI__builtin_ia32_pmaxud128:
@@ -13370,7 +13677,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pmaxuw512:
case X86::BI__builtin_ia32_pmaxud512:
case X86::BI__builtin_ia32_pmaxuq512:
- return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax);
case X86::BI__builtin_ia32_pminsb128:
case X86::BI__builtin_ia32_pminsw128:
case X86::BI__builtin_ia32_pminsd128:
@@ -13383,7 +13690,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pminsw512:
case X86::BI__builtin_ia32_pminsd512:
case X86::BI__builtin_ia32_pminsq512:
- return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin);
case X86::BI__builtin_ia32_pminub128:
case X86::BI__builtin_ia32_pminuw128:
case X86::BI__builtin_ia32_pminud128:
@@ -13396,7 +13703,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pminuw512:
case X86::BI__builtin_ia32_pminud512:
case X86::BI__builtin_ia32_pminuq512:
- return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin);
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
@@ -13470,6 +13777,68 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Ops 0 and 1 are swapped.
return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+ // Reductions
+ case X86::BI__builtin_ia32_reduce_add_d512:
+ case X86::BI__builtin_ia32_reduce_add_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_and_d512:
+ case X86::BI__builtin_ia32_reduce_and_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_fadd_pd512:
+ case X86::BI__builtin_ia32_reduce_fadd_ps512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
+ return Builder.CreateCall(F, {Ops[0], Ops[1]});
+ }
+ case X86::BI__builtin_ia32_reduce_fmul_pd512:
+ case X86::BI__builtin_ia32_reduce_fmul_ps512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
+ return Builder.CreateCall(F, {Ops[0], Ops[1]});
+ }
+ case X86::BI__builtin_ia32_reduce_mul_d512:
+ case X86::BI__builtin_ia32_reduce_mul_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_or_d512:
+ case X86::BI__builtin_ia32_reduce_or_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_or, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_smax_d512:
+ case X86::BI__builtin_ia32_reduce_smax_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_smax, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_smin_d512:
+ case X86::BI__builtin_ia32_reduce_smin_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_smin, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_umax_d512:
+ case X86::BI__builtin_ia32_reduce_umax_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_umax, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+ case X86::BI__builtin_ia32_reduce_umin_d512:
+ case X86::BI__builtin_ia32_reduce_umin_q512: {
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::vector_reduce_umin, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0]});
+ }
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
@@ -13547,7 +13916,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclasspd256_mask:
case X86::BI__builtin_ia32_fpclasspd512_mask: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
Value *MaskIn = Ops[2];
Ops.erase(&Ops[2]);
@@ -13585,7 +13954,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vp2intersect_d_256:
case X86::BI__builtin_ia32_vp2intersect_d_128: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
Intrinsic::ID ID;
switch (BuiltinID) {
@@ -13644,7 +14013,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
Value *MaskIn = Ops[2];
Ops.erase(&Ops[2]);
@@ -13691,21 +14060,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpordps:
case X86::BI__builtin_ia32_cmpordpd:
return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
- case X86::BI__builtin_ia32_cmpps:
- case X86::BI__builtin_ia32_cmpps256:
- case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256:
case X86::BI__builtin_ia32_cmpps128_mask:
case X86::BI__builtin_ia32_cmpps256_mask:
case X86::BI__builtin_ia32_cmpps512_mask:
case X86::BI__builtin_ia32_cmppd128_mask:
case X86::BI__builtin_ia32_cmppd256_mask:
- case X86::BI__builtin_ia32_cmppd512_mask: {
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ IsMaskFCmp = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256: {
// Lowering vector comparisons to fcmp instructions, while
// ignoring signalling behaviour requested
// ignoring rounding mode requested
- // This is is only possible as long as FENV_ACCESS is not implemented.
- // See also: https://reviews.llvm.org/D45616
+ // This is only possible if fp-model is not strict and FENV_ACCESS is off.
// The third argument is the comparison condition, and integer in the
// range [0, 31]
@@ -13745,8 +14115,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// If the predicate is true or false and we're using constrained intrinsics,
// we don't have a compare intrinsic we can use. Just use the legacy X86
// specific intrinsic.
- if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) &&
- Builder.getIsFPConstrained()) {
+ // If the intrinsic is mask enabled and we're using constrained intrinsics,
+ // use the legacy X86 specific intrinsic.
+ if (Builder.getIsFPConstrained() &&
+ (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
+ IsMaskFCmp)) {
Intrinsic::ID IID;
switch (BuiltinID) {
@@ -13764,36 +14137,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
IID = Intrinsic::x86_avx_cmp_pd_256;
break;
case X86::BI__builtin_ia32_cmpps512_mask:
- IID = Intrinsic::x86_avx512_cmp_ps_512;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
break;
case X86::BI__builtin_ia32_cmppd512_mask:
- IID = Intrinsic::x86_avx512_cmp_pd_512;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
break;
case X86::BI__builtin_ia32_cmpps128_mask:
- IID = Intrinsic::x86_avx512_cmp_ps_128;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
break;
case X86::BI__builtin_ia32_cmpps256_mask:
- IID = Intrinsic::x86_avx512_cmp_ps_256;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
break;
case X86::BI__builtin_ia32_cmppd128_mask:
- IID = Intrinsic::x86_avx512_cmp_pd_128;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
break;
case X86::BI__builtin_ia32_cmppd256_mask:
- IID = Intrinsic::x86_avx512_cmp_pd_256;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
break;
}
Function *Intr = CGM.getIntrinsic(IID);
- if (cast<llvm::VectorType>(Intr->getReturnType())
- ->getElementType()
- ->isIntegerTy(1)) {
+ if (IsMaskFCmp) {
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
- Value *MaskIn = Ops[3];
- Ops.erase(&Ops[3]);
-
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
+ Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
Value *Cmp = Builder.CreateCall(Intr, Ops);
- return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
+ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
}
return Builder.CreateCall(Intr, Ops);
@@ -13801,16 +14170,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Builtins without the _mask suffix return a vector of integers
// of the same width as the input vectors
- switch (BuiltinID) {
- case X86::BI__builtin_ia32_cmpps512_mask:
- case X86::BI__builtin_ia32_cmppd512_mask:
- case X86::BI__builtin_ia32_cmpps128_mask:
- case X86::BI__builtin_ia32_cmpps256_mask:
- case X86::BI__builtin_ia32_cmppd128_mask:
- case X86::BI__builtin_ia32_cmppd256_mask: {
- // FIXME: Support SAE.
+ if (IsMaskFCmp) {
+ // We ignore SAE if strict FP is disabled. We only keep precise
+ // exception behavior under strict FP.
unsigned NumElts =
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
Value *Cmp;
if (IsSignaling)
Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
@@ -13818,9 +14182,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
}
- default:
- return getVectorFCmpIR(Pred, IsSignaling);
- }
+
+ return getVectorFCmpIR(Pred, IsSignaling);
}
// SSE scalar comparison intrinsics
@@ -13869,7 +14232,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
Ops[2] = getMaskVecValue(
*this, Ops[2],
- cast<llvm::VectorType>(Ops[0]->getType())->getNumElements());
+ cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
@@ -13935,25 +14298,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
case X86::BI__shiftleft128:
case X86::BI__shiftright128: {
- // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
- // llvm::Function *F = CGM.getIntrinsic(
- // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
- // Int64Ty);
- // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
- // return Builder.CreateCall(F, Ops);
- llvm::Type *Int128Ty = Builder.getInt128Ty();
- Value *HighPart128 =
- Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64);
- Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty);
- Value *Val = Builder.CreateOr(HighPart128, LowPart128);
- Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
- llvm::ConstantInt::get(Int128Ty, 0x3f));
- Value *Res;
- if (BuiltinID == X86::BI__shiftleft128)
- Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
- else
- Res = Builder.CreateLShr(Val, Amt);
- return Builder.CreateTrunc(Res, Int64Ty);
+ llvm::Function *F = CGM.getIntrinsic(
+ BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
+ Int64Ty);
+ // Flip low/high ops and zero-extend amount to matching type.
+ // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
+ // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
+ std::swap(Ops[0], Ops[1]);
+ Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
+ return Builder.CreateCall(F, Ops);
}
case X86::BI_ReadWriteBarrier:
case X86::BI_ReadBarrier:
@@ -13961,65 +14314,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::SingleThread);
}
- case X86::BI_BitScanForward:
- case X86::BI_BitScanForward64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
- case X86::BI_BitScanReverse:
- case X86::BI_BitScanReverse64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
-
- case X86::BI_InterlockedAnd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
- case X86::BI_InterlockedExchange64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
- case X86::BI_InterlockedExchangeAdd64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
- case X86::BI_InterlockedExchangeSub64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
- case X86::BI_InterlockedOr64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
- case X86::BI_InterlockedXor64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
- case X86::BI_InterlockedDecrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
- case X86::BI_InterlockedIncrement64:
- return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
- case X86::BI_InterlockedCompareExchange128: {
- // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
- // instead it takes pointers to 64bit ints for Destination and
- // ComparandResult, and exchange is taken as two 64bit ints (high & low).
- // The previous value is written to ComparandResult, and success is
- // returned.
-
- llvm::Type *Int128Ty = Builder.getInt128Ty();
- llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
-
- Value *Destination =
- Builder.CreateBitCast(Ops[0], Int128PtrTy);
- Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
- Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
- Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
- getContext().toCharUnitsFromBits(128));
-
- Value *Exchange = Builder.CreateOr(
- Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
- ExchangeLow128);
-
- Value *Comparand = Builder.CreateLoad(ComparandResult);
-
- AtomicCmpXchgInst *CXI =
- Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
- CXI->setVolatile(true);
-
- // Write the result back to the inout pointer.
- Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
-
- // Get the success boolean and zero extend it to i8.
- Value *Success = Builder.CreateExtractValue(CXI, 1);
- return Builder.CreateZExt(Success, ConvertType(E->getType()));
- }
case X86::BI_AddressOfReturnAddress: {
Function *F =
@@ -14076,28 +14370,124 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_paddsw256:
case X86::BI__builtin_ia32_paddsb128:
case X86::BI__builtin_ia32_paddsw128:
- return EmitX86AddSubSatExpr(*this, Ops, true, true);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat);
case X86::BI__builtin_ia32_paddusb512:
case X86::BI__builtin_ia32_paddusw512:
case X86::BI__builtin_ia32_paddusb256:
case X86::BI__builtin_ia32_paddusw256:
case X86::BI__builtin_ia32_paddusb128:
case X86::BI__builtin_ia32_paddusw128:
- return EmitX86AddSubSatExpr(*this, Ops, false, true);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat);
case X86::BI__builtin_ia32_psubsb512:
case X86::BI__builtin_ia32_psubsw512:
case X86::BI__builtin_ia32_psubsb256:
case X86::BI__builtin_ia32_psubsw256:
case X86::BI__builtin_ia32_psubsb128:
case X86::BI__builtin_ia32_psubsw128:
- return EmitX86AddSubSatExpr(*this, Ops, true, false);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat);
case X86::BI__builtin_ia32_psubusb512:
case X86::BI__builtin_ia32_psubusw512:
case X86::BI__builtin_ia32_psubusb256:
case X86::BI__builtin_ia32_psubusw256:
case X86::BI__builtin_ia32_psubusb128:
case X86::BI__builtin_ia32_psubusw128:
- return EmitX86AddSubSatExpr(*this, Ops, false, false);
+ return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat);
+ case X86::BI__builtin_ia32_encodekey128_u32: {
+ Intrinsic::ID IID = Intrinsic::x86_encodekey128;
+
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
+
+ for (int i = 0; i < 6; ++i) {
+ Value *Extract = Builder.CreateExtractValue(Call, i + 1);
+ Value *Ptr = Builder.CreateConstGEP1_32(Ops[2], i * 16);
+ Ptr = Builder.CreateBitCast(
+ Ptr, llvm::PointerType::getUnqual(Extract->getType()));
+ Builder.CreateAlignedStore(Extract, Ptr, Align(1));
+ }
+
+ return Builder.CreateExtractValue(Call, 0);
+ }
+ case X86::BI__builtin_ia32_encodekey256_u32: {
+ Intrinsic::ID IID = Intrinsic::x86_encodekey256;
+
+ Value *Call =
+ Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
+
+ for (int i = 0; i < 7; ++i) {
+ Value *Extract = Builder.CreateExtractValue(Call, i + 1);
+ Value *Ptr = Builder.CreateConstGEP1_32(Ops[3], i * 16);
+ Ptr = Builder.CreateBitCast(
+ Ptr, llvm::PointerType::getUnqual(Extract->getType()));
+ Builder.CreateAlignedStore(Extract, Ptr, Align(1));
+ }
+
+ return Builder.CreateExtractValue(Call, 0);
+ }
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ case X86::BI__builtin_ia32_aesdec256kl_u8: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ IID = Intrinsic::x86_aesenc128kl;
+ break;
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ IID = Intrinsic::x86_aesdec128kl;
+ break;
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ IID = Intrinsic::x86_aesenc256kl;
+ break;
+ case X86::BI__builtin_ia32_aesdec256kl_u8:
+ IID = Intrinsic::x86_aesdec256kl;
+ break;
+ }
+
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
+
+ Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+ Ops[0]);
+
+ return Builder.CreateExtractValue(Call, 0);
+ }
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ IID = Intrinsic::x86_aesencwide128kl;
+ break;
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ IID = Intrinsic::x86_aesdecwide128kl;
+ break;
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ IID = Intrinsic::x86_aesencwide256kl;
+ break;
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ IID = Intrinsic::x86_aesdecwide256kl;
+ break;
+ }
+
+ Value *InOps[9];
+ InOps[0] = Ops[2];
+ for (int i = 0; i != 8; ++i) {
+ Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i);
+ InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16));
+ }
+
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
+
+ for (int i = 0; i != 8; ++i) {
+ Value *Extract = Builder.CreateExtractValue(Call, i + 1);
+ Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i);
+ Builder.CreateAlignedStore(Extract, Ptr, Align(16));
+ }
+
+ return Builder.CreateExtractValue(Call, 0);
+ }
}
}
@@ -14284,6 +14674,63 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
return Builder.CreateCall(F, {X, Undef});
}
+ case PPC::BI__builtin_altivec_vec_replace_elt:
+ case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+ // The third argument of vec_replace_elt and vec_replace_unaligned must
+ // be a compile time constant and will be emitted either to the vinsw
+ // or vinsd instruction.
+ ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+ assert(ArgCI &&
+ "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Function *F = nullptr;
+ Value *Call = nullptr;
+ int64_t ConstArg = ArgCI->getSExtValue();
+ unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
+ bool Is32Bit = false;
+ assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
+ // The input to vec_replace_elt is an element index, not a byte index.
+ if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
+ ConstArg *= ArgWidth / 8;
+ if (ArgWidth == 32) {
+ Is32Bit = true;
+ // When the second argument is 32 bits, it can either be an integer or
+ // a float. The vinsw intrinsic is used in this case.
+ F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+ // Fix the constant according to endianess.
+ if (getTarget().isLittleEndian())
+ ConstArg = 12 - ConstArg;
+ } else {
+ // When the second argument is 64 bits, it can either be a long long or
+ // a double. The vinsd intrinsic is used in this case.
+ F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+ // Fix the constant for little endian.
+ if (getTarget().isLittleEndian())
+ ConstArg = 8 - ConstArg;
+ }
+ Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+ // Depending on ArgWidth, the input vector could be a float or a double.
+ // If the input vector is a float type, bitcast the inputs to integers. Or,
+ // if the input vector is a double, bitcast the inputs to 64-bit integers.
+ if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
+ Ops[0] = Builder.CreateBitCast(
+ Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
+ : llvm::FixedVectorType::get(Int64Ty, 2));
+ Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
+ }
+ // Emit the call to vinsw or vinsd.
+ Call = Builder.CreateCall(F, Ops);
+ // Depending on the builtin, bitcast to the approriate result type.
+ if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+ !Ops[1]->getType()->isIntegerTy())
+ return Builder.CreateBitCast(Call, ResultType);
+ else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+ Ops[1]->getType()->isIntegerTy())
+ return Call;
+ else
+ return Builder.CreateBitCast(Call,
+ llvm::FixedVectorType::get(Int8Ty, 16));
+ }
case PPC::BI__builtin_altivec_vpopcntb:
case PPC::BI__builtin_altivec_vpopcnth:
case PPC::BI__builtin_altivec_vpopcntw:
@@ -14329,8 +14776,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
BuiltinID == PPC::BI__builtin_vsx_xvrspic)
ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_nearbyint
- : Intrinsic::nearbyint;
+ ? Intrinsic::experimental_constrained_rint
+ : Intrinsic::rint;
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
BuiltinID == PPC::BI__builtin_vsx_xvrspip)
ID = Builder.getIsFPConstrained()
@@ -14565,6 +15012,77 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractElement(Unpacked, Index);
}
+
+ // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
+ // Some of the MMA instructions accumulate their result into an existing
+ // accumulator whereas the others generate a new accumulator. So we need to
+ // use custom code generation to expand a builtin call with a pointer to a
+ // load (if the corresponding instruction accumulates its result) followed by
+ // the call to the intrinsic and a store of the result.
+#define CUSTOM_BUILTIN(Name, Types, Accumulate) \
+ case PPC::BI__builtin_##Name:
+#include "clang/Basic/BuiltinsPPC.def"
+ {
+ // The first argument of these two builtins is a pointer used to store their
+ // result. However, the llvm intrinsics return their result in multiple
+ // return values. So, here we emit code extracting these values from the
+ // intrinsic results and storing them using that pointer.
+ if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
+ BuiltinID == PPC::BI__builtin_vsx_disassemble_pair) {
+ unsigned NumVecs = 2;
+ auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
+ if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
+ NumVecs = 4;
+ Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
+ }
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic);
+ Address Addr = EmitPointerWithAlignment(E->getArg(1));
+ Value *Vec = Builder.CreateLoad(Addr);
+ Value *Call = Builder.CreateCall(F, {Vec});
+ llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
+ Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo());
+ for (unsigned i=0; i<NumVecs; i++) {
+ Value *Vec = Builder.CreateExtractValue(Call, i);
+ llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
+ Value *GEP = Builder.CreateInBoundsGEP(Ptr, Index);
+ Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
+ }
+ return Call;
+ }
+ bool Accumulate;
+ switch (BuiltinID) {
+ #define CUSTOM_BUILTIN(Name, Types, Acc) \
+ case PPC::BI__builtin_##Name: \
+ ID = Intrinsic::ppc_##Name; \
+ Accumulate = Acc; \
+ break;
+ #include "clang/Basic/BuiltinsPPC.def"
+ }
+ if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
+ BuiltinID == PPC::BI__builtin_vsx_stxvp) {
+ if (BuiltinID == PPC::BI__builtin_vsx_lxvp) {
+ Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
+ Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
+ } else {
+ Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
+ Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
+ }
+ Ops.pop_back();
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Ops, "");
+ }
+ SmallVector<Value*, 4> CallOps;
+ if (Accumulate) {
+ Address Addr = EmitPointerWithAlignment(E->getArg(0));
+ Value *Acc = Builder.CreateLoad(Addr);
+ CallOps.push_back(Acc);
+ }
+ for (unsigned i=1; i<Ops.size(); i++)
+ CallOps.push_back(Ops[i]);
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ Value *Call = Builder.CreateCall(F, CallOps);
+ return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
+ }
}
}
@@ -14608,6 +15126,22 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
llvm::MDNode::get(CGF.getLLVMContext(), None));
return LD;
}
+
+// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
+Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
+ const unsigned XOffset = 12;
+ auto *DP = EmitAMDGPUDispatchPtr(CGF);
+ // Indexing the HSA kernel_dispatch_packet struct.
+ auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
+ auto *GEP = CGF.Builder.CreateGEP(DP, Offset);
+ auto *DstTy =
+ CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
+ auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
+ auto *LD = CGF.Builder.CreateLoad(Address(Cast, CharUnits::fromQuantity(4)));
+ LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
+ llvm::MDNode::get(CGF.getLLVMContext(), None));
+ return LD;
+}
} // namespace
// For processing memory ordering and memory scope arguments of various
@@ -14811,6 +15345,32 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
return Builder.CreateCall(F, { Src0, Builder.getFalse() });
}
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
+ Intrinsic::ID Intrin;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ Intrin = Intrinsic::amdgcn_ds_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ Intrin = Intrinsic::amdgcn_ds_fmin;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+ Intrin = Intrinsic::amdgcn_ds_fmax;
+ break;
+ }
+ llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+ llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
+ llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
+ llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
+ llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
+ llvm::FunctionType *FTy = F->getFunctionType();
+ llvm::Type *PTy = FTy->getParamType(0);
+ Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
+ return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
+ }
case AMDGPU::BI__builtin_amdgcn_read_exec: {
CallInst *CI = cast<CallInst>(
EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
@@ -14842,6 +15402,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
return EmitAMDGPUWorkGroupSize(*this, 2);
+ // amdgcn grid size
+ case AMDGPU::BI__builtin_amdgcn_grid_size_x:
+ return EmitAMDGPUGridSize(*this, 0);
+ case AMDGPU::BI__builtin_amdgcn_grid_size_y:
+ return EmitAMDGPUGridSize(*this, 1);
+ case AMDGPU::BI__builtin_amdgcn_grid_size_z:
+ return EmitAMDGPUGridSize(*this, 2);
+
// r600 intrinsics
case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
@@ -15089,11 +15657,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
// Constant-fold the M4 and M5 mask arguments.
- llvm::APSInt M4, M5;
- bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
- bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
- assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
- (void)IsConstM4; (void)IsConstM5;
+ llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
+ llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
// Check whether this instance can be represented via a LLVM standard
// intrinsic. We only support some combinations of M4 and M5.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15148,10 +15713,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
// Constant-fold the M4 mask argument.
- llvm::APSInt M4;
- bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
- assert(IsConstM4 && "Constant arg isn't actually constant?");
- (void)IsConstM4;
+ llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
// Check whether this instance can be represented via a LLVM standard
// intrinsic. We only support some values of M4.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15185,10 +15747,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
// Constant-fold the M4 mask argument.
- llvm::APSInt M4;
- bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
- assert(IsConstM4 && "Constant arg isn't actually constant?");
- (void)IsConstM4;
+ llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
// Check whether this instance can be represented via a LLVM standard
// intrinsic. We only support some values of M4.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15855,10 +16414,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
Address Dst = EmitPointerWithAlignment(E->getArg(0));
Value *Src = EmitScalarExpr(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
- llvm::APSInt isColMajorArg;
- if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+ Optional<llvm::APSInt> isColMajorArg =
+ E->getArg(3)->getIntegerConstantExpr(getContext());
+ if (!isColMajorArg)
return nullptr;
- bool isColMajor = isColMajorArg.getSExtValue();
+ bool isColMajor = isColMajorArg->getSExtValue();
NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
unsigned IID = isColMajor ? II.IID_col : II.IID_row;
if (IID == 0)
@@ -15899,10 +16459,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
Value *Dst = EmitScalarExpr(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *Ldm = EmitScalarExpr(E->getArg(2));
- llvm::APSInt isColMajorArg;
- if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+ Optional<llvm::APSInt> isColMajorArg =
+ E->getArg(3)->getIntegerConstantExpr(getContext());
+ if (!isColMajorArg)
return nullptr;
- bool isColMajor = isColMajorArg.getSExtValue();
+ bool isColMajor = isColMajorArg->getSExtValue();
NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
unsigned IID = isColMajor ? II.IID_col : II.IID_row;
if (IID == 0)
@@ -15949,16 +16510,20 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
Address SrcA = EmitPointerWithAlignment(E->getArg(1));
Address SrcB = EmitPointerWithAlignment(E->getArg(2));
Address SrcC = EmitPointerWithAlignment(E->getArg(3));
- llvm::APSInt LayoutArg;
- if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
+ Optional<llvm::APSInt> LayoutArg =
+ E->getArg(4)->getIntegerConstantExpr(getContext());
+ if (!LayoutArg)
return nullptr;
- int Layout = LayoutArg.getSExtValue();
+ int Layout = LayoutArg->getSExtValue();
if (Layout < 0 || Layout > 3)
return nullptr;
llvm::APSInt SatfArg;
if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1)
SatfArg = 0; // .b1 does not have satf argument.
- else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
+ else if (Optional<llvm::APSInt> OptSatfArg =
+ E->getArg(5)->getIntegerConstantExpr(getContext()))
+ SatfArg = *OptSatfArg;
+ else
return nullptr;
bool Satf = SatfArg.getSExtValue();
NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
@@ -16106,16 +16671,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_memory_size: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *I = EmitScalarExpr(E->getArg(0));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
return Builder.CreateCall(Callee, I);
}
case WebAssembly::BI__builtin_wasm_memory_grow: {
llvm::Type *ResultType = ConvertType(E->getType());
- Value *Args[] = {
- EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(1))
- };
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
+ Value *Args[] = {EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1))};
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
return Builder.CreateCall(Callee, Args);
}
case WebAssembly::BI__builtin_wasm_tls_size: {
@@ -16138,28 +16703,28 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
return Builder.CreateCall(Callee, {Tag, Obj});
}
- case WebAssembly::BI__builtin_wasm_rethrow_in_catch: {
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch);
+ case WebAssembly::BI__builtin_wasm_rethrow: {
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
return Builder.CreateCall(Callee);
}
- case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
+ case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
Value *Addr = EmitScalarExpr(E->getArg(0));
Value *Expected = EmitScalarExpr(E->getArg(1));
Value *Timeout = EmitScalarExpr(E->getArg(2));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
}
- case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
+ case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
Value *Addr = EmitScalarExpr(E->getArg(0));
Value *Expected = EmitScalarExpr(E->getArg(1));
Value *Timeout = EmitScalarExpr(E->getArg(2));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
}
- case WebAssembly::BI__builtin_wasm_atomic_notify: {
+ case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
Value *Addr = EmitScalarExpr(E->getArg(0));
Value *Count = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
return Builder.CreateCall(Callee, {Addr, Count});
}
case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
@@ -16190,7 +16755,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
- {ResT, Src->getType()});
+ {ResT, Src->getType()});
return Builder.CreateCall(Callee, {Src});
}
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
@@ -16201,7 +16766,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
- {ResT, Src->getType()});
+ {ResT, Src->getType()});
return Builder.CreateCall(Callee, {Src});
}
case WebAssembly::BI__builtin_wasm_min_f32:
@@ -16210,8 +16775,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_min_f64x2: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::minimum,
- ConvertType(E->getType()));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_max_f32:
@@ -16220,8 +16785,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_max_f64x2: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
- Function *Callee = CGM.getIntrinsic(Intrinsic::maximum,
- ConvertType(E->getType()));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_pmin_f32x4:
@@ -16287,9 +16852,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
- llvm::APSInt LaneConst;
- if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
- llvm_unreachable("Constant arg isn't actually constant?");
+ llvm::APSInt LaneConst =
+ *E->getArg(1)->getIntegerConstantExpr(getContext());
Value *Vec = EmitScalarExpr(E->getArg(0));
Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
Value *Extract = Builder.CreateExtractElement(Vec, Lane);
@@ -16315,9 +16879,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
- llvm::APSInt LaneConst;
- if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
- llvm_unreachable("Constant arg isn't actually constant?");
+ llvm::APSInt LaneConst =
+ *E->getArg(1)->getIntegerConstantExpr(getContext());
Value *Vec = EmitScalarExpr(E->getArg(0));
Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
Value *Val = EmitScalarExpr(E->getArg(2));
@@ -16430,12 +16993,95 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_q15mulr_saturate_s_i16x8: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_q15mulr_saturate_signed);
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2:
+ IntNo = Intrinsic::wasm_extmul_low_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2:
+ IntNo = Intrinsic::wasm_extmul_low_unsigned;
+ break;
+ case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2:
+ IntNo = Intrinsic::wasm_extmul_high_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4:
+ case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2:
+ IntNo = Intrinsic::wasm_extmul_high_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexptected builtin ID");
+ }
+
+ Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
+ IntNo = Intrinsic::wasm_extadd_pairwise_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
+ IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexptected builtin ID");
+ }
+
+ Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, Vec);
+ }
case WebAssembly::BI__builtin_wasm_bitselect: {
Value *V1 = EmitScalarExpr(E->getArg(0));
Value *V2 = EmitScalarExpr(E->getArg(1));
Value *C = EmitScalarExpr(E->getArg(2));
- Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
- ConvertType(E->getType()));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {V1, V2, C});
+ }
+ case WebAssembly::BI__builtin_wasm_signselect_i8x16:
+ case WebAssembly::BI__builtin_wasm_signselect_i16x8:
+ case WebAssembly::BI__builtin_wasm_signselect_i32x4:
+ case WebAssembly::BI__builtin_wasm_signselect_i64x2: {
+ Value *V1 = EmitScalarExpr(E->getArg(0));
+ Value *V2 = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_signselect, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {V1, V2, C});
}
case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
@@ -16444,6 +17090,17 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_popcnt);
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_eq_i64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_eq);
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
case WebAssembly::BI__builtin_wasm_any_true_i8x16:
case WebAssembly::BI__builtin_wasm_any_true_i16x8:
case WebAssembly::BI__builtin_wasm_any_true_i32x4:
@@ -16475,7 +17132,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
}
case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
- case WebAssembly::BI__builtin_wasm_bitmask_i32x4: {
+ case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
+ case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
Function *Callee =
CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
@@ -16539,39 +17197,124 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
return Builder.CreateCall(Callee, {Low, High});
}
- case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
- case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
- case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
- case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: {
+ case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i64x2:
+ case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i64x2:
+ case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i64x2:
+ case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
unsigned IntNo;
switch (BuiltinID) {
- case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i64x2:
IntNo = Intrinsic::wasm_widen_low_signed;
break;
- case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i64x2:
IntNo = Intrinsic::wasm_widen_high_signed;
break;
- case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i64x2:
IntNo = Intrinsic::wasm_widen_low_unsigned;
break;
- case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16:
- case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8:
+ case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i64x2:
IntNo = Intrinsic::wasm_widen_high_unsigned;
break;
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo);
+ return Builder.CreateCall(Callee, Vec);
+ }
+ case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
+ case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
+ IntNo = Intrinsic::wasm_convert_low_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2:
+ IntNo = Intrinsic::wasm_convert_low_unsigned;
+ break;
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo);
+ return Builder.CreateCall(Callee, Vec);
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_s_f64x2_i32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_u_f64x2_i32x4: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_s_f64x2_i32x4:
+ IntNo = Intrinsic::wasm_trunc_saturate_zero_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_u_f64x2_i32x4:
+ IntNo = Intrinsic::wasm_trunc_saturate_zero_unsigned;
+ break;
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo);
+ return Builder.CreateCall(Callee, Vec);
+ }
+ case WebAssembly::BI__builtin_wasm_demote_zero_f64x2_f32x4: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_demote_zero);
+ return Builder.CreateCall(Callee, Vec);
+ }
+ case WebAssembly::BI__builtin_wasm_promote_low_f32x4_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_promote_low);
+ return Builder.CreateCall(Callee, Vec);
+ }
+ case WebAssembly::BI__builtin_wasm_load32_zero: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero);
+ return Builder.CreateCall(Callee, {Ptr});
+ }
+ case WebAssembly::BI__builtin_wasm_load64_zero: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero);
+ return Builder.CreateCall(Callee, {Ptr});
+ }
+ case WebAssembly::BI__builtin_wasm_load8_lane:
+ case WebAssembly::BI__builtin_wasm_load16_lane:
+ case WebAssembly::BI__builtin_wasm_load32_lane:
+ case WebAssembly::BI__builtin_wasm_load64_lane:
+ case WebAssembly::BI__builtin_wasm_store8_lane:
+ case WebAssembly::BI__builtin_wasm_store16_lane:
+ case WebAssembly::BI__builtin_wasm_store32_lane:
+ case WebAssembly::BI__builtin_wasm_store64_lane: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Vec = EmitScalarExpr(E->getArg(1));
+ Optional<llvm::APSInt> LaneIdxConst =
+ E->getArg(2)->getIntegerConstantExpr(getContext());
+ assert(LaneIdxConst && "Constant arg isn't actually constant?");
+ Value *LaneIdx = llvm::ConstantInt::get(getLLVMContext(), *LaneIdxConst);
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_load8_lane:
+ IntNo = Intrinsic::wasm_load8_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_load16_lane:
+ IntNo = Intrinsic::wasm_load16_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_load32_lane:
+ IntNo = Intrinsic::wasm_load32_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_load64_lane:
+ IntNo = Intrinsic::wasm_load64_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_store8_lane:
+ IntNo = Intrinsic::wasm_store8_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_store16_lane:
+ IntNo = Intrinsic::wasm_store16_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_store32_lane:
+ IntNo = Intrinsic::wasm_store32_lane;
+ break;
+ case WebAssembly::BI__builtin_wasm_store64_lane:
+ IntNo = Intrinsic::wasm_store64_lane;
+ break;
default:
llvm_unreachable("unexpected builtin ID");
}
- Function *Callee =
- CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()});
- return Builder.CreateCall(Callee, Vec);
+ Function *Callee = CGM.getIntrinsic(IntNo);
+ return Builder.CreateCall(Callee, {Ptr, Vec, LaneIdx});
}
case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
Value *Ops[18];
@@ -16579,14 +17322,24 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
while (OpIdx < 18) {
- llvm::APSInt LaneConst;
- if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext()))
- llvm_unreachable("Constant arg isn't actually constant?");
- Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Optional<llvm::APSInt> LaneConst =
+ E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
+ assert(LaneConst && "Constant arg isn't actually constant?");
+ Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
}
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
return Builder.CreateCall(Callee, Ops);
}
+ case WebAssembly::BI__builtin_wasm_prefetch_t: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_prefetch_t);
+ return Builder.CreateCall(Callee, Ptr);
+ }
+ case WebAssembly::BI__builtin_wasm_prefetch_nt: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_prefetch_nt);
+ return Builder.CreateCall(Callee, Ptr);
+ }
default:
return nullptr;
}