diff options
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 2363 |
1 files changed, 1558 insertions, 805 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8994b939093e..113541bd5024 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -78,7 +78,8 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, } if (CGF.CGM.stopAutoInit()) return; - CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); + auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); + I->addAnnotationMetadata("auto-init"); } /// getBuiltinLibFunction - Given a builtin id for a function like @@ -303,6 +304,10 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering::Monotonic : SuccessOrdering; + // The atomic instruction is marked volatile for consistency with MSVC. This + // blocks the few atomics optimizations that LLVM has. If we want to optimize + // _Interlocked* operations in the future, we will have to remove the volatile + // marker. auto *Result = CGF.Builder.CreateAtomicCmpXchg( Destination, Comparand, Exchange, SuccessOrdering, FailureOrdering); @@ -310,6 +315,68 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, return CGF.Builder.CreateExtractValue(Result, 0); } +// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are +// prototyped like this: +// +// unsigned char _InterlockedCompareExchange128...( +// __int64 volatile * _Destination, +// __int64 _ExchangeHigh, +// __int64 _ExchangeLow, +// __int64 * _ComparandResult); +static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, + const CallExpr *E, + AtomicOrdering SuccessOrdering) { + assert(E->getNumArgs() == 4); + llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); + llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3)); + + assert(Destination->getType()->isPointerTy()); + assert(!ExchangeHigh->getType()->isPointerTy()); + assert(!ExchangeLow->getType()->isPointerTy()); + assert(ComparandPtr->getType()->isPointerTy()); + + // For Release ordering, the failure ordering should be Monotonic. + auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release + ? AtomicOrdering::Monotonic + : SuccessOrdering; + + // Convert to i128 pointers and values. + llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); + llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); + Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy); + Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy), + CGF.getContext().toCharUnitsFromBits(128)); + + // (((i128)hi) << 64) | ((i128)lo) + ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); + ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty); + ExchangeHigh = + CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64)); + llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); + + // Load the comparand for the instruction. + llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult); + + auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + SuccessOrdering, FailureOrdering); + + // The atomic instruction is marked volatile for consistency with MSVC. This + // blocks the few atomics optimizations that LLVM has. If we want to optimize + // _Interlocked* operations in the future, we will have to remove the volatile + // marker. + CXI->setVolatile(true); + + // Store the result as an outparameter. + CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), + ComparandResult); + + // Get the success boolean and zero extend it to i8. + Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); + return CGF.Builder.CreateZExt(Success, CGF.Int8Ty); +} + static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { assert(E->getArg(0)->getType()->isPointerType()); @@ -373,6 +440,7 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); } else { @@ -390,6 +458,7 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -408,6 +477,7 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { @@ -489,6 +559,7 @@ emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); if (CGF.Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {ResultType, Src0->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); @@ -805,10 +876,15 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, AsmOS << "bt"; if (Action) AsmOS << Action; - AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}"; + AsmOS << SizeSuffix << " $2, ($1)"; // Build the constraints. FIXME: We should support immediates when possible. - std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}"; + std::string Constraints = "={@ccc},r,r,~{cc},~{memory}"; + std::string MachineClobbers = CGF.getTarget().getClobbers(); + if (!MachineClobbers.empty()) { + Constraints += ','; + Constraints += MachineClobbers; + } llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(E->getArg(1)->getType())); @@ -987,6 +1063,10 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedCompareExchange_acq, _InterlockedCompareExchange_rel, _InterlockedCompareExchange_nf, + _InterlockedCompareExchange128, + _InterlockedCompareExchange128_acq, + _InterlockedCompareExchange128_rel, + _InterlockedCompareExchange128_nf, _InterlockedOr_acq, _InterlockedOr_rel, _InterlockedOr_nf, @@ -1005,16 +1085,352 @@ enum class CodeGenFunction::MSVCIntrin { __fastfail, }; +static Optional<CodeGenFunction::MSVCIntrin> +translateArmToMsvcIntrin(unsigned BuiltinID) { + using MSVCIntrin = CodeGenFunction::MSVCIntrin; + switch (BuiltinID) { + default: + return None; + case ARM::BI_BitScanForward: + case ARM::BI_BitScanForward64: + return MSVCIntrin::_BitScanForward; + case ARM::BI_BitScanReverse: + case ARM::BI_BitScanReverse64: + return MSVCIntrin::_BitScanReverse; + case ARM::BI_InterlockedAnd64: + return MSVCIntrin::_InterlockedAnd; + case ARM::BI_InterlockedExchange64: + return MSVCIntrin::_InterlockedExchange; + case ARM::BI_InterlockedExchangeAdd64: + return MSVCIntrin::_InterlockedExchangeAdd; + case ARM::BI_InterlockedExchangeSub64: + return MSVCIntrin::_InterlockedExchangeSub; + case ARM::BI_InterlockedOr64: + return MSVCIntrin::_InterlockedOr; + case ARM::BI_InterlockedXor64: + return MSVCIntrin::_InterlockedXor; + case ARM::BI_InterlockedDecrement64: + return MSVCIntrin::_InterlockedDecrement; + case ARM::BI_InterlockedIncrement64: + return MSVCIntrin::_InterlockedIncrement; + case ARM::BI_InterlockedExchangeAdd8_acq: + case ARM::BI_InterlockedExchangeAdd16_acq: + case ARM::BI_InterlockedExchangeAdd_acq: + case ARM::BI_InterlockedExchangeAdd64_acq: + return MSVCIntrin::_InterlockedExchangeAdd_acq; + case ARM::BI_InterlockedExchangeAdd8_rel: + case ARM::BI_InterlockedExchangeAdd16_rel: + case ARM::BI_InterlockedExchangeAdd_rel: + case ARM::BI_InterlockedExchangeAdd64_rel: + return MSVCIntrin::_InterlockedExchangeAdd_rel; + case ARM::BI_InterlockedExchangeAdd8_nf: + case ARM::BI_InterlockedExchangeAdd16_nf: + case ARM::BI_InterlockedExchangeAdd_nf: + case ARM::BI_InterlockedExchangeAdd64_nf: + return MSVCIntrin::_InterlockedExchangeAdd_nf; + case ARM::BI_InterlockedExchange8_acq: + case ARM::BI_InterlockedExchange16_acq: + case ARM::BI_InterlockedExchange_acq: + case ARM::BI_InterlockedExchange64_acq: + return MSVCIntrin::_InterlockedExchange_acq; + case ARM::BI_InterlockedExchange8_rel: + case ARM::BI_InterlockedExchange16_rel: + case ARM::BI_InterlockedExchange_rel: + case ARM::BI_InterlockedExchange64_rel: + return MSVCIntrin::_InterlockedExchange_rel; + case ARM::BI_InterlockedExchange8_nf: + case ARM::BI_InterlockedExchange16_nf: + case ARM::BI_InterlockedExchange_nf: + case ARM::BI_InterlockedExchange64_nf: + return MSVCIntrin::_InterlockedExchange_nf; + case ARM::BI_InterlockedCompareExchange8_acq: + case ARM::BI_InterlockedCompareExchange16_acq: + case ARM::BI_InterlockedCompareExchange_acq: + case ARM::BI_InterlockedCompareExchange64_acq: + return MSVCIntrin::_InterlockedCompareExchange_acq; + case ARM::BI_InterlockedCompareExchange8_rel: + case ARM::BI_InterlockedCompareExchange16_rel: + case ARM::BI_InterlockedCompareExchange_rel: + case ARM::BI_InterlockedCompareExchange64_rel: + return MSVCIntrin::_InterlockedCompareExchange_rel; + case ARM::BI_InterlockedCompareExchange8_nf: + case ARM::BI_InterlockedCompareExchange16_nf: + case ARM::BI_InterlockedCompareExchange_nf: + case ARM::BI_InterlockedCompareExchange64_nf: + return MSVCIntrin::_InterlockedCompareExchange_nf; + case ARM::BI_InterlockedOr8_acq: + case ARM::BI_InterlockedOr16_acq: + case ARM::BI_InterlockedOr_acq: + case ARM::BI_InterlockedOr64_acq: + return MSVCIntrin::_InterlockedOr_acq; + case ARM::BI_InterlockedOr8_rel: + case ARM::BI_InterlockedOr16_rel: + case ARM::BI_InterlockedOr_rel: + case ARM::BI_InterlockedOr64_rel: + return MSVCIntrin::_InterlockedOr_rel; + case ARM::BI_InterlockedOr8_nf: + case ARM::BI_InterlockedOr16_nf: + case ARM::BI_InterlockedOr_nf: + case ARM::BI_InterlockedOr64_nf: + return MSVCIntrin::_InterlockedOr_nf; + case ARM::BI_InterlockedXor8_acq: + case ARM::BI_InterlockedXor16_acq: + case ARM::BI_InterlockedXor_acq: + case ARM::BI_InterlockedXor64_acq: + return MSVCIntrin::_InterlockedXor_acq; + case ARM::BI_InterlockedXor8_rel: + case ARM::BI_InterlockedXor16_rel: + case ARM::BI_InterlockedXor_rel: + case ARM::BI_InterlockedXor64_rel: + return MSVCIntrin::_InterlockedXor_rel; + case ARM::BI_InterlockedXor8_nf: + case ARM::BI_InterlockedXor16_nf: + case ARM::BI_InterlockedXor_nf: + case ARM::BI_InterlockedXor64_nf: + return MSVCIntrin::_InterlockedXor_nf; + case ARM::BI_InterlockedAnd8_acq: + case ARM::BI_InterlockedAnd16_acq: + case ARM::BI_InterlockedAnd_acq: + case ARM::BI_InterlockedAnd64_acq: + return MSVCIntrin::_InterlockedAnd_acq; + case ARM::BI_InterlockedAnd8_rel: + case ARM::BI_InterlockedAnd16_rel: + case ARM::BI_InterlockedAnd_rel: + case ARM::BI_InterlockedAnd64_rel: + return MSVCIntrin::_InterlockedAnd_rel; + case ARM::BI_InterlockedAnd8_nf: + case ARM::BI_InterlockedAnd16_nf: + case ARM::BI_InterlockedAnd_nf: + case ARM::BI_InterlockedAnd64_nf: + return MSVCIntrin::_InterlockedAnd_nf; + case ARM::BI_InterlockedIncrement16_acq: + case ARM::BI_InterlockedIncrement_acq: + case ARM::BI_InterlockedIncrement64_acq: + return MSVCIntrin::_InterlockedIncrement_acq; + case ARM::BI_InterlockedIncrement16_rel: + case ARM::BI_InterlockedIncrement_rel: + case ARM::BI_InterlockedIncrement64_rel: + return MSVCIntrin::_InterlockedIncrement_rel; + case ARM::BI_InterlockedIncrement16_nf: + case ARM::BI_InterlockedIncrement_nf: + case ARM::BI_InterlockedIncrement64_nf: + return MSVCIntrin::_InterlockedIncrement_nf; + case ARM::BI_InterlockedDecrement16_acq: + case ARM::BI_InterlockedDecrement_acq: + case ARM::BI_InterlockedDecrement64_acq: + return MSVCIntrin::_InterlockedDecrement_acq; + case ARM::BI_InterlockedDecrement16_rel: + case ARM::BI_InterlockedDecrement_rel: + case ARM::BI_InterlockedDecrement64_rel: + return MSVCIntrin::_InterlockedDecrement_rel; + case ARM::BI_InterlockedDecrement16_nf: + case ARM::BI_InterlockedDecrement_nf: + case ARM::BI_InterlockedDecrement64_nf: + return MSVCIntrin::_InterlockedDecrement_nf; + } + llvm_unreachable("must return from switch"); +} + +static Optional<CodeGenFunction::MSVCIntrin> +translateAarch64ToMsvcIntrin(unsigned BuiltinID) { + using MSVCIntrin = CodeGenFunction::MSVCIntrin; + switch (BuiltinID) { + default: + return None; + case AArch64::BI_BitScanForward: + case AArch64::BI_BitScanForward64: + return MSVCIntrin::_BitScanForward; + case AArch64::BI_BitScanReverse: + case AArch64::BI_BitScanReverse64: + return MSVCIntrin::_BitScanReverse; + case AArch64::BI_InterlockedAnd64: + return MSVCIntrin::_InterlockedAnd; + case AArch64::BI_InterlockedExchange64: + return MSVCIntrin::_InterlockedExchange; + case AArch64::BI_InterlockedExchangeAdd64: + return MSVCIntrin::_InterlockedExchangeAdd; + case AArch64::BI_InterlockedExchangeSub64: + return MSVCIntrin::_InterlockedExchangeSub; + case AArch64::BI_InterlockedOr64: + return MSVCIntrin::_InterlockedOr; + case AArch64::BI_InterlockedXor64: + return MSVCIntrin::_InterlockedXor; + case AArch64::BI_InterlockedDecrement64: + return MSVCIntrin::_InterlockedDecrement; + case AArch64::BI_InterlockedIncrement64: + return MSVCIntrin::_InterlockedIncrement; + case AArch64::BI_InterlockedExchangeAdd8_acq: + case AArch64::BI_InterlockedExchangeAdd16_acq: + case AArch64::BI_InterlockedExchangeAdd_acq: + case AArch64::BI_InterlockedExchangeAdd64_acq: + return MSVCIntrin::_InterlockedExchangeAdd_acq; + case AArch64::BI_InterlockedExchangeAdd8_rel: + case AArch64::BI_InterlockedExchangeAdd16_rel: + case AArch64::BI_InterlockedExchangeAdd_rel: + case AArch64::BI_InterlockedExchangeAdd64_rel: + return MSVCIntrin::_InterlockedExchangeAdd_rel; + case AArch64::BI_InterlockedExchangeAdd8_nf: + case AArch64::BI_InterlockedExchangeAdd16_nf: + case AArch64::BI_InterlockedExchangeAdd_nf: + case AArch64::BI_InterlockedExchangeAdd64_nf: + return MSVCIntrin::_InterlockedExchangeAdd_nf; + case AArch64::BI_InterlockedExchange8_acq: + case AArch64::BI_InterlockedExchange16_acq: + case AArch64::BI_InterlockedExchange_acq: + case AArch64::BI_InterlockedExchange64_acq: + return MSVCIntrin::_InterlockedExchange_acq; + case AArch64::BI_InterlockedExchange8_rel: + case AArch64::BI_InterlockedExchange16_rel: + case AArch64::BI_InterlockedExchange_rel: + case AArch64::BI_InterlockedExchange64_rel: + return MSVCIntrin::_InterlockedExchange_rel; + case AArch64::BI_InterlockedExchange8_nf: + case AArch64::BI_InterlockedExchange16_nf: + case AArch64::BI_InterlockedExchange_nf: + case AArch64::BI_InterlockedExchange64_nf: + return MSVCIntrin::_InterlockedExchange_nf; + case AArch64::BI_InterlockedCompareExchange8_acq: + case AArch64::BI_InterlockedCompareExchange16_acq: + case AArch64::BI_InterlockedCompareExchange_acq: + case AArch64::BI_InterlockedCompareExchange64_acq: + return MSVCIntrin::_InterlockedCompareExchange_acq; + case AArch64::BI_InterlockedCompareExchange8_rel: + case AArch64::BI_InterlockedCompareExchange16_rel: + case AArch64::BI_InterlockedCompareExchange_rel: + case AArch64::BI_InterlockedCompareExchange64_rel: + return MSVCIntrin::_InterlockedCompareExchange_rel; + case AArch64::BI_InterlockedCompareExchange8_nf: + case AArch64::BI_InterlockedCompareExchange16_nf: + case AArch64::BI_InterlockedCompareExchange_nf: + case AArch64::BI_InterlockedCompareExchange64_nf: + return MSVCIntrin::_InterlockedCompareExchange_nf; + case AArch64::BI_InterlockedCompareExchange128: + return MSVCIntrin::_InterlockedCompareExchange128; + case AArch64::BI_InterlockedCompareExchange128_acq: + return MSVCIntrin::_InterlockedCompareExchange128_acq; + case AArch64::BI_InterlockedCompareExchange128_nf: + return MSVCIntrin::_InterlockedCompareExchange128_nf; + case AArch64::BI_InterlockedCompareExchange128_rel: + return MSVCIntrin::_InterlockedCompareExchange128_rel; + case AArch64::BI_InterlockedOr8_acq: + case AArch64::BI_InterlockedOr16_acq: + case AArch64::BI_InterlockedOr_acq: + case AArch64::BI_InterlockedOr64_acq: + return MSVCIntrin::_InterlockedOr_acq; + case AArch64::BI_InterlockedOr8_rel: + case AArch64::BI_InterlockedOr16_rel: + case AArch64::BI_InterlockedOr_rel: + case AArch64::BI_InterlockedOr64_rel: + return MSVCIntrin::_InterlockedOr_rel; + case AArch64::BI_InterlockedOr8_nf: + case AArch64::BI_InterlockedOr16_nf: + case AArch64::BI_InterlockedOr_nf: + case AArch64::BI_InterlockedOr64_nf: + return MSVCIntrin::_InterlockedOr_nf; + case AArch64::BI_InterlockedXor8_acq: + case AArch64::BI_InterlockedXor16_acq: + case AArch64::BI_InterlockedXor_acq: + case AArch64::BI_InterlockedXor64_acq: + return MSVCIntrin::_InterlockedXor_acq; + case AArch64::BI_InterlockedXor8_rel: + case AArch64::BI_InterlockedXor16_rel: + case AArch64::BI_InterlockedXor_rel: + case AArch64::BI_InterlockedXor64_rel: + return MSVCIntrin::_InterlockedXor_rel; + case AArch64::BI_InterlockedXor8_nf: + case AArch64::BI_InterlockedXor16_nf: + case AArch64::BI_InterlockedXor_nf: + case AArch64::BI_InterlockedXor64_nf: + return MSVCIntrin::_InterlockedXor_nf; + case AArch64::BI_InterlockedAnd8_acq: + case AArch64::BI_InterlockedAnd16_acq: + case AArch64::BI_InterlockedAnd_acq: + case AArch64::BI_InterlockedAnd64_acq: + return MSVCIntrin::_InterlockedAnd_acq; + case AArch64::BI_InterlockedAnd8_rel: + case AArch64::BI_InterlockedAnd16_rel: + case AArch64::BI_InterlockedAnd_rel: + case AArch64::BI_InterlockedAnd64_rel: + return MSVCIntrin::_InterlockedAnd_rel; + case AArch64::BI_InterlockedAnd8_nf: + case AArch64::BI_InterlockedAnd16_nf: + case AArch64::BI_InterlockedAnd_nf: + case AArch64::BI_InterlockedAnd64_nf: + return MSVCIntrin::_InterlockedAnd_nf; + case AArch64::BI_InterlockedIncrement16_acq: + case AArch64::BI_InterlockedIncrement_acq: + case AArch64::BI_InterlockedIncrement64_acq: + return MSVCIntrin::_InterlockedIncrement_acq; + case AArch64::BI_InterlockedIncrement16_rel: + case AArch64::BI_InterlockedIncrement_rel: + case AArch64::BI_InterlockedIncrement64_rel: + return MSVCIntrin::_InterlockedIncrement_rel; + case AArch64::BI_InterlockedIncrement16_nf: + case AArch64::BI_InterlockedIncrement_nf: + case AArch64::BI_InterlockedIncrement64_nf: + return MSVCIntrin::_InterlockedIncrement_nf; + case AArch64::BI_InterlockedDecrement16_acq: + case AArch64::BI_InterlockedDecrement_acq: + case AArch64::BI_InterlockedDecrement64_acq: + return MSVCIntrin::_InterlockedDecrement_acq; + case AArch64::BI_InterlockedDecrement16_rel: + case AArch64::BI_InterlockedDecrement_rel: + case AArch64::BI_InterlockedDecrement64_rel: + return MSVCIntrin::_InterlockedDecrement_rel; + case AArch64::BI_InterlockedDecrement16_nf: + case AArch64::BI_InterlockedDecrement_nf: + case AArch64::BI_InterlockedDecrement64_nf: + return MSVCIntrin::_InterlockedDecrement_nf; + } + llvm_unreachable("must return from switch"); +} + +static Optional<CodeGenFunction::MSVCIntrin> +translateX86ToMsvcIntrin(unsigned BuiltinID) { + using MSVCIntrin = CodeGenFunction::MSVCIntrin; + switch (BuiltinID) { + default: + return None; + case clang::X86::BI_BitScanForward: + case clang::X86::BI_BitScanForward64: + return MSVCIntrin::_BitScanForward; + case clang::X86::BI_BitScanReverse: + case clang::X86::BI_BitScanReverse64: + return MSVCIntrin::_BitScanReverse; + case clang::X86::BI_InterlockedAnd64: + return MSVCIntrin::_InterlockedAnd; + case clang::X86::BI_InterlockedCompareExchange128: + return MSVCIntrin::_InterlockedCompareExchange128; + case clang::X86::BI_InterlockedExchange64: + return MSVCIntrin::_InterlockedExchange; + case clang::X86::BI_InterlockedExchangeAdd64: + return MSVCIntrin::_InterlockedExchangeAdd; + case clang::X86::BI_InterlockedExchangeSub64: + return MSVCIntrin::_InterlockedExchangeSub; + case clang::X86::BI_InterlockedOr64: + return MSVCIntrin::_InterlockedOr; + case clang::X86::BI_InterlockedXor64: + return MSVCIntrin::_InterlockedXor; + case clang::X86::BI_InterlockedDecrement64: + return MSVCIntrin::_InterlockedDecrement; + case clang::X86::BI_InterlockedIncrement64: + return MSVCIntrin::_InterlockedIncrement; + } + llvm_unreachable("must return from switch"); +} + +// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated. Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E) { switch (BuiltinID) { case MSVCIntrin::_BitScanForward: case MSVCIntrin::_BitScanReverse: { + Address IndexAddress(EmitPointerWithAlignment(E->getArg(0))); Value *ArgValue = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = ArgValue->getType(); llvm::Type *IndexType = - EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); + IndexAddress.getPointer()->getType()->getPointerElementType(); llvm::Type *ResultType = ConvertType(E->getType()); Value *ArgZero = llvm::Constant::getNullValue(ArgType); @@ -1033,7 +1449,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Result->addIncoming(ResZero, Begin); Builder.SetInsertPoint(NotZero); - Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); if (BuiltinID == MSVCIntrin::_BitScanForward) { Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); @@ -1092,6 +1507,15 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedCompareExchange_nf: return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedCompareExchange128: + return EmitAtomicCmpXchg128ForMSIntrin( + *this, E, AtomicOrdering::SequentiallyConsistent); + case MSVCIntrin::_InterlockedCompareExchange128_acq: + return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedCompareExchange128_rel: + return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedCompareExchange128_nf: + return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedOr_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, AtomicOrdering::Acquire); @@ -1408,6 +1832,47 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { return RValue::get(BufAddr.getPointer()); } +static bool isSpecialUnsignedMultiplySignedResult( + unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, + WidthAndSignedness ResultInfo) { + return BuiltinID == Builtin::BI__builtin_mul_overflow && + Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width && + !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed; +} + +static RValue EmitCheckedUnsignedMultiplySignedResult( + CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, + const clang::Expr *Op2, WidthAndSignedness Op2Info, + const clang::Expr *ResultArg, QualType ResultQTy, + WidthAndSignedness ResultInfo) { + assert(isSpecialUnsignedMultiplySignedResult( + Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) && + "Cannot specialize this multiply"); + + llvm::Value *V1 = CGF.EmitScalarExpr(Op1); + llvm::Value *V2 = CGF.EmitScalarExpr(Op2); + + llvm::Value *HasOverflow; + llvm::Value *Result = EmitOverflowIntrinsic( + CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow); + + // The intrinsic call will detect overflow when the value is > UINT_MAX, + // however, since the original builtin had a signed result, we need to report + // an overflow when the result is greater than INT_MAX. + auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width); + llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax); + + llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue); + HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow); + + bool isVolatile = + ResultArg->getType()->getPointeeType().isVolatileQualified(); + Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); + CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, + isVolatile); + return RValue::get(HasOverflow); +} + /// Determine if a binop is a checked mixed-sign multiply we can specialize. static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, @@ -1642,6 +2107,78 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } +// Map math builtins for long-double to f128 version. +static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) { + switch (BuiltinID) { +#define MUTATE_LDBL(func) \ + case Builtin::BI__builtin_##func##l: \ + return Builtin::BI__builtin_##func##f128; + MUTATE_LDBL(sqrt) + MUTATE_LDBL(cbrt) + MUTATE_LDBL(fabs) + MUTATE_LDBL(log) + MUTATE_LDBL(log2) + MUTATE_LDBL(log10) + MUTATE_LDBL(log1p) + MUTATE_LDBL(logb) + MUTATE_LDBL(exp) + MUTATE_LDBL(exp2) + MUTATE_LDBL(expm1) + MUTATE_LDBL(fdim) + MUTATE_LDBL(hypot) + MUTATE_LDBL(ilogb) + MUTATE_LDBL(pow) + MUTATE_LDBL(fmin) + MUTATE_LDBL(fmax) + MUTATE_LDBL(ceil) + MUTATE_LDBL(trunc) + MUTATE_LDBL(rint) + MUTATE_LDBL(nearbyint) + MUTATE_LDBL(round) + MUTATE_LDBL(floor) + MUTATE_LDBL(lround) + MUTATE_LDBL(llround) + MUTATE_LDBL(lrint) + MUTATE_LDBL(llrint) + MUTATE_LDBL(fmod) + MUTATE_LDBL(modf) + MUTATE_LDBL(nan) + MUTATE_LDBL(nans) + MUTATE_LDBL(inf) + MUTATE_LDBL(fma) + MUTATE_LDBL(sin) + MUTATE_LDBL(cos) + MUTATE_LDBL(tan) + MUTATE_LDBL(sinh) + MUTATE_LDBL(cosh) + MUTATE_LDBL(tanh) + MUTATE_LDBL(asin) + MUTATE_LDBL(acos) + MUTATE_LDBL(atan) + MUTATE_LDBL(asinh) + MUTATE_LDBL(acosh) + MUTATE_LDBL(atanh) + MUTATE_LDBL(atan2) + MUTATE_LDBL(erf) + MUTATE_LDBL(erfc) + MUTATE_LDBL(ldexp) + MUTATE_LDBL(frexp) + MUTATE_LDBL(huge_val) + MUTATE_LDBL(copysign) + MUTATE_LDBL(nextafter) + MUTATE_LDBL(nexttoward) + MUTATE_LDBL(remainder) + MUTATE_LDBL(remquo) + MUTATE_LDBL(scalbln) + MUTATE_LDBL(scalbn) + MUTATE_LDBL(tgamma) + MUTATE_LDBL(lgamma) +#undef MUTATE_LDBL + default: + return BuiltinID; + } +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -1658,13 +2195,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Result.Val.getFloat())); } + // If current long-double semantics is IEEE 128-bit, replace math builtins + // of long-double with f128 equivalent. + // TODO: This mutation should also be applied to other targets other than PPC, + // after backend supports IEEE 128-bit style libcalls. + if (getTarget().getTriple().isPPC64() && + &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) + BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + + // If the builtin has been declared explicitly with an assembler label, + // disable the specialized emitting below. Ideally we should communicate the + // rename in IR, or at least avoid generating the intrinsic calls that are + // likely to get lowered to the renamed library functions. + const unsigned BuiltinIDIfNoAsmLabel = + FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID; + // There are LLVM math intrinsics/instructions corresponding to math library // functions except the LLVM op will never set errno while the math library // might. Also, math builtins have the same semantics as their math library // twins. Thus, we can transform math library and builtin calls to their // LLVM counterparts if the call is marked 'const' (known to never set errno). if (FD->hasAttr<ConstAttr>()) { - switch (BuiltinID) { + switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIceil: case Builtin::BIceilf: case Builtin::BIceill: @@ -1672,6 +2224,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ceilf: case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: + case Builtin::BI__builtin_ceilf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)); @@ -1693,6 +2246,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_cosf: case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: + case Builtin::BI__builtin_cosf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos)); @@ -1704,6 +2258,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_expf: case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: + case Builtin::BI__builtin_expf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp)); @@ -1715,6 +2270,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp2f: case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: + case Builtin::BI__builtin_exp2f128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2)); @@ -1736,6 +2292,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_floorf: case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: + case Builtin::BI__builtin_floorf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)); @@ -1747,6 +2304,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmaf: case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: + case Builtin::BI__builtin_fmaf128: return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::fma, Intrinsic::experimental_constrained_fma)); @@ -1758,6 +2316,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmaxf: case Builtin::BI__builtin_fmaxf16: case Builtin::BI__builtin_fmaxl: + case Builtin::BI__builtin_fmaxf128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::maxnum, Intrinsic::experimental_constrained_maxnum)); @@ -1769,6 +2328,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fminf: case Builtin::BI__builtin_fminf16: case Builtin::BI__builtin_fminl: + case Builtin::BI__builtin_fminf128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::minnum, Intrinsic::experimental_constrained_minnum)); @@ -1781,7 +2341,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmod: case Builtin::BI__builtin_fmodf: case Builtin::BI__builtin_fmodf16: - case Builtin::BI__builtin_fmodl: { + case Builtin::BI__builtin_fmodl: + case Builtin::BI__builtin_fmodf128: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *Arg1 = EmitScalarExpr(E->getArg(0)); Value *Arg2 = EmitScalarExpr(E->getArg(1)); return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); @@ -1794,6 +2356,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_logf: case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: + case Builtin::BI__builtin_logf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log, Intrinsic::experimental_constrained_log)); @@ -1805,6 +2368,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log10f: case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: + case Builtin::BI__builtin_log10f128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10)); @@ -1816,6 +2380,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log2f: case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: + case Builtin::BI__builtin_log2f128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2)); @@ -1826,6 +2391,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_nearbyint: case Builtin::BI__builtin_nearbyintf: case Builtin::BI__builtin_nearbyintl: + case Builtin::BI__builtin_nearbyintf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::nearbyint, Intrinsic::experimental_constrained_nearbyint)); @@ -1837,6 +2403,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_powf: case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: + case Builtin::BI__builtin_powf128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow)); @@ -1848,6 +2415,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_rintf: case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: + case Builtin::BI__builtin_rintf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)); @@ -1859,6 +2427,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_roundf: case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: + case Builtin::BI__builtin_roundf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)); @@ -1870,6 +2439,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinf: case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: + case Builtin::BI__builtin_sinf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin)); @@ -1881,6 +2451,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: + case Builtin::BI__builtin_sqrtf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt)); @@ -1892,6 +2463,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_truncf: case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: + case Builtin::BI__builtin_truncf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)); @@ -1902,6 +2474,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_lround: case Builtin::BI__builtin_lroundf: case Builtin::BI__builtin_lroundl: + case Builtin::BI__builtin_lroundf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::lround, Intrinsic::experimental_constrained_lround)); @@ -1912,6 +2485,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_llround: case Builtin::BI__builtin_llroundf: case Builtin::BI__builtin_llroundl: + case Builtin::BI__builtin_llroundf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::llround, Intrinsic::experimental_constrained_llround)); @@ -1922,6 +2496,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_lrint: case Builtin::BI__builtin_lrintf: case Builtin::BI__builtin_lrintl: + case Builtin::BI__builtin_lrintf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::lrint, Intrinsic::experimental_constrained_lrint)); @@ -1932,6 +2507,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_llrint: case Builtin::BI__builtin_llrintf: case Builtin::BI__builtin_llrintl: + case Builtin::BI__builtin_llrintf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::llrint, Intrinsic::experimental_constrained_llrint)); @@ -1941,7 +2517,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } - switch (BuiltinID) { + switch (BuiltinIDIfNoAsmLabel) { default: break; case Builtin::BI__builtin___CFStringMakeConstantString: case Builtin::BI__builtin___NSStringMakeConstantString: @@ -1978,6 +2554,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs"); return RValue::get(Result); } + case Builtin::BI__builtin_complex: { + Value *Real = EmitScalarExpr(E->getArg(0)); + Value *Imag = EmitScalarExpr(E->getArg(1)); + return RValue::getComplex({Real, Imag}); + } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: case Builtin::BI__builtin_conjl: @@ -2373,6 +2954,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_isunordered: { // Ordered comparisons: we know the arguments to these are matching scalar // floating point values. + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); @@ -2401,6 +2984,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); } case Builtin::BI__builtin_isnan: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *V = EmitScalarExpr(E->getArg(0)); V = Builder.CreateFCmpUNO(V, V, "cmp"); return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); @@ -2464,6 +3049,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // isinf(x) --> fabs(x) == infinity // isfinite(x) --> fabs(x) != infinity // x != NaN via the ordered compare in either case. + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *V = EmitScalarExpr(E->getArg(0)); Value *Fabs = EmitFAbs(*this, V); Constant *Infinity = ConstantFP::getInfinity(V->getType()); @@ -2476,6 +3063,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_isinf_sign: { // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *Arg = EmitScalarExpr(E->getArg(0)); Value *AbsArg = EmitFAbs(*this, Arg); Value *IsInf = Builder.CreateFCmpOEQ( @@ -2493,6 +3082,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_isnormal: { // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *V = EmitScalarExpr(E->getArg(0)); Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); @@ -2521,6 +3112,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_fpclassify: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -3386,7 +3979,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // non-wide string literal, potentially casted, so the cast<> is safe. const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); - return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); + return RValue::get( + EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr)); } case Builtin::BI__builtin_addcb: case Builtin::BI__builtin_addcs: @@ -3479,6 +4073,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, RightInfo, ResultArg, ResultQTy, ResultInfo); + if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo, + ResultInfo)) + return EmitCheckedUnsignedMultiplySignedResult( + *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy, + ResultInfo); + WidthAndSignedness EncompassingInfo = EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); @@ -3754,11 +4354,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_abnormal_termination: return RValue::get(EmitSEHAbnormalTermination()); case Builtin::BI_setjmpex: - if (getTarget().getTriple().isOSMSVCRT()) + if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && + E->getArg(0)->getType()->isPointerType()) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); break; case Builtin::BI_setjmp: - if (getTarget().getTriple().isOSMSVCRT()) { + if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && + E->getArg(0)->getType()->isPointerType()) { if (getTarget().getTriple().getArch() == llvm::Triple::x86) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) @@ -3838,8 +4440,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, BCast, PacketSize, PacketAlign})); + EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, BCast, PacketSize, PacketAlign})); } else { assert(4 == E->getNumArgs() && "Illegal number of parameters to pipe function"); @@ -3857,9 +4459,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // it to i32. if (Arg2->getType() != Int32Ty) Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); - return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); + return RValue::get( + EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); } } // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write @@ -3900,9 +4502,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // it to i32. if (Arg1->getType() != Int32Ty) Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, Arg1, PacketSize, PacketAlign})); + return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, Arg1, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write // functions @@ -3938,9 +4539,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, Arg1, PacketSize, PacketAlign})); + return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, Arg1, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions case Builtin::BIget_pipe_num_packets: @@ -3963,8 +4563,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, PacketSize, PacketAlign})); + return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + {Arg0, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.9 - Address space qualifier functions. @@ -3986,7 +4586,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); auto NewCall = - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); + EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); return RValue::get(Builder.CreateBitOrPointerCast(NewCall, ConvertType(E->getType()))); } @@ -4029,8 +4629,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); auto RTCall = - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), - {Queue, Flags, Range, Kernel, Block}); + EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), + {Queue, Flags, Range, Kernel, Block}); RTCall->setAttributes(ByValAttrSet); return RValue::get(RTCall); } @@ -4089,7 +4689,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); auto Call = RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); + EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); if (TmpSize) EmitLifetimeEnd(TmpSize, TmpPtr); return Call; @@ -4147,8 +4747,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); } // Has event info and variadics // Pass the number of variadics to the runtime function too. @@ -4164,8 +4764,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); auto Call = - RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); if (TmpSize) EmitLifetimeEnd(TmpSize, TmpPtr); return Call; @@ -4181,7 +4781,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); - return RValue::get(Builder.CreateCall( + return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, false), @@ -4195,7 +4795,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); - return RValue::get(Builder.CreateCall( + return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, false), @@ -4216,7 +4816,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange ? "__get_kernel_max_sub_group_size_for_ndrange_impl" : "__get_kernel_sub_group_count_for_ndrange_impl"; - return RValue::get(Builder.CreateCall( + return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( llvm::FunctionType::get( IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, @@ -4419,11 +5019,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } else { // If this is required to be a constant, constant fold it so that we // know that the generated intrinsic gets a ConstantInt. - llvm::APSInt Result; - bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); - assert(IsConst && "Constant arg isn't actually constant?"); - (void)IsConst; - ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); + ArgValue = llvm::ConstantInt::get( + getLLVMContext(), + *E->getArg(i)->getIntegerConstantExpr(getContext())); } // If the intrinsic arg type is different from the builtin arg type @@ -4523,6 +5121,7 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::x86_64: return CGF->EmitX86BuiltinExpr(BuiltinID, E); case llvm::Triple::ppc: + case llvm::Triple::ppcle: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: return CGF->EmitPPCBuiltinExpr(BuiltinID, E); @@ -4558,11 +5157,11 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, getTarget().getTriple().getArch()); } -static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, - NeonTypeFlags TypeFlags, - bool HasLegalHalfType = true, - bool V1Ty = false, - bool AllowBFloatArgsAndRet = true) { +static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF, + NeonTypeFlags TypeFlags, + bool HasLegalHalfType = true, + bool V1Ty = false, + bool AllowBFloatArgsAndRet = true) { int IsQuad = TypeFlags.isQuad(); switch (TypeFlags.getEltType()) { case NeonTypeFlags::Int8: @@ -5075,6 +5674,14 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), + NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType), + NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType), + NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType), + NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType), + NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), @@ -5248,6 +5855,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0), NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), @@ -5265,6 +5874,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), @@ -5423,6 +6034,10 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), @@ -5583,8 +6198,8 @@ static Value *EmitCommonNeonSISDBuiltinExpr( Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); - if (ResultType->getPrimitiveSizeInBits() < - Result->getType()->getPrimitiveSizeInBits()) + if (ResultType->getPrimitiveSizeInBits().getFixedSize() < + Result->getType()->getPrimitiveSizeInBits().getFixedSize()) return CGF.Builder.CreateExtractElement(Result, C0); return CGF.Builder.CreateBitCast(Result, ResultType, s); @@ -5596,21 +6211,22 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch) { // Get the last argument, which specifies the vector type. - llvm::APSInt NeonTypeConst; const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) + Optional<llvm::APSInt> NeonTypeConst = + Arg->getIntegerConstantExpr(getContext()); + if (!NeonTypeConst) return nullptr; // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(NeonTypeConst.getZExtValue()); + NeonTypeFlags Type(NeonTypeConst->getZExtValue()); bool Usgn = Type.isUnsigned(); bool Quad = Type.isQuad(); const bool HasLegalHalfType = getTarget().hasLegalHalfType(); const bool AllowBFloatArgsAndRet = getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); - llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType, false, - AllowBFloatArgsAndRet); + llvm::FixedVectorType *VTy = + GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -5633,7 +6249,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v) NumElements = NumElements * 2; if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v) - NumElements = NumElements / 2; + NumElements = NumElements.divideCoefficientBy(2); Ops[0] = Builder.CreateBitCast(Ops[0], VTy); return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements); @@ -5651,8 +6267,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); case NEON::BI__builtin_neon_vaddhn_v: { - llvm::VectorType *SrcTy = - llvm::VectorType::getExtendedElementVectorType(VTy); + llvm::FixedVectorType *SrcTy = + llvm::FixedVectorType::getExtendedElementVectorType(VTy); // %sum = add <4 x i32> %lhs, %rhs Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); @@ -5924,14 +6540,16 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vmovl_v: { - llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); + llvm::FixedVectorType *DTy = + llvm::FixedVectorType::getTruncatedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], DTy); if (Usgn) return Builder.CreateZExt(Ops[0], Ty, "vmovl"); return Builder.CreateSExt(Ops[0], Ty, "vmovl"); } case NEON::BI__builtin_neon_vmovn_v: { - llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); + llvm::FixedVectorType *QTy = + llvm::FixedVectorType::getExtendedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], QTy); return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); } @@ -5977,7 +6595,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vqdmulh_lane_v: case NEON::BI__builtin_neon_vqrdmulhq_lane_v: case NEON::BI__builtin_neon_vqrdmulh_lane_v: { - auto *RTy = cast<llvm::VectorType>(Ty); + auto *RTy = cast<llvm::FixedVectorType>(Ty); if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v) RTy = llvm::FixedVectorType::get(RTy->getElementType(), @@ -6026,7 +6644,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n"); case NEON::BI__builtin_neon_vshll_n_v: { - llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); + llvm::FixedVectorType *SrcTy = + llvm::FixedVectorType::getTruncatedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); if (Usgn) Ops[0] = Builder.CreateZExt(Ops[0], VTy); @@ -6036,7 +6655,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); } case NEON::BI__builtin_neon_vshrn_n_v: { - llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); + llvm::FixedVectorType *SrcTy = + llvm::FixedVectorType::getExtendedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); if (Usgn) @@ -6085,8 +6705,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } case NEON::BI__builtin_neon_vsubhn_v: { - llvm::VectorType *SrcTy = - llvm::VectorType::getExtendedElementVectorType(VTy); + llvm::FixedVectorType *SrcTy = + llvm::FixedVectorType::getExtendedElementVectorType(VTy); // %sum = add <4 x i32> %lhs, %rhs Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); @@ -6225,28 +6845,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vbfdot_v: case NEON::BI__builtin_neon_vbfdotq_v: { llvm::Type *InputTy = - llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot"); } - case NEON::BI__builtin_neon_vbfmmlaq_v: { - llvm::Type *InputTy = - llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); - llvm::Type *Tys[2] = { Ty, InputTy }; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmmla"); - } - case NEON::BI__builtin_neon_vbfmlalbq_v: { - llvm::Type *InputTy = - llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); - llvm::Type *Tys[2] = { Ty, InputTy }; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalb"); - } - case NEON::BI__builtin_neon_vbfmlaltq_v: { - llvm::Type *InputTy = - llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); - llvm::Type *Tys[2] = { Ty, InputTy }; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalt"); - } case NEON::BI__builtin_neon___a32_vcvt_bf16_v: { llvm::Type *Tys[1] = { Ty }; Function *F = CGM.getIntrinsic(Int, Tys); @@ -6298,7 +6900,7 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, // Build a vector containing sequential number like (0, 1, 2, ..., 15) SmallVector<int, 16> Indices; - llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); + auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType()); for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { Indices.push_back(2*i); Indices.push_back(2*i+1); @@ -6810,6 +7412,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, AccessKind); } + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) + return EmitMSVCBuiltinExpr(*MsvcIntId, E); + // Deal with MVE builtins if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) return Result; @@ -6898,10 +7505,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } else { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. - llvm::APSInt Result; - bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); - assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); + Ops.push_back(llvm::ConstantInt::get( + getLLVMContext(), + *E->getArg(i)->getIntegerConstantExpr(getContext()))); } } @@ -6971,150 +7577,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], Ops[3], Ops[4], Ops[5]}); } - case ARM::BI_BitScanForward: - case ARM::BI_BitScanForward64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); - case ARM::BI_BitScanReverse: - case ARM::BI_BitScanReverse64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); - - case ARM::BI_InterlockedAnd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); - case ARM::BI_InterlockedExchange64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); - case ARM::BI_InterlockedExchangeAdd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); - case ARM::BI_InterlockedExchangeSub64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); - case ARM::BI_InterlockedOr64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); - case ARM::BI_InterlockedXor64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); - case ARM::BI_InterlockedDecrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); - case ARM::BI_InterlockedIncrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); - case ARM::BI_InterlockedExchangeAdd8_acq: - case ARM::BI_InterlockedExchangeAdd16_acq: - case ARM::BI_InterlockedExchangeAdd_acq: - case ARM::BI_InterlockedExchangeAdd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); - case ARM::BI_InterlockedExchangeAdd8_rel: - case ARM::BI_InterlockedExchangeAdd16_rel: - case ARM::BI_InterlockedExchangeAdd_rel: - case ARM::BI_InterlockedExchangeAdd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); - case ARM::BI_InterlockedExchangeAdd8_nf: - case ARM::BI_InterlockedExchangeAdd16_nf: - case ARM::BI_InterlockedExchangeAdd_nf: - case ARM::BI_InterlockedExchangeAdd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); - case ARM::BI_InterlockedExchange8_acq: - case ARM::BI_InterlockedExchange16_acq: - case ARM::BI_InterlockedExchange_acq: - case ARM::BI_InterlockedExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); - case ARM::BI_InterlockedExchange8_rel: - case ARM::BI_InterlockedExchange16_rel: - case ARM::BI_InterlockedExchange_rel: - case ARM::BI_InterlockedExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); - case ARM::BI_InterlockedExchange8_nf: - case ARM::BI_InterlockedExchange16_nf: - case ARM::BI_InterlockedExchange_nf: - case ARM::BI_InterlockedExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); - case ARM::BI_InterlockedCompareExchange8_acq: - case ARM::BI_InterlockedCompareExchange16_acq: - case ARM::BI_InterlockedCompareExchange_acq: - case ARM::BI_InterlockedCompareExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); - case ARM::BI_InterlockedCompareExchange8_rel: - case ARM::BI_InterlockedCompareExchange16_rel: - case ARM::BI_InterlockedCompareExchange_rel: - case ARM::BI_InterlockedCompareExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); - case ARM::BI_InterlockedCompareExchange8_nf: - case ARM::BI_InterlockedCompareExchange16_nf: - case ARM::BI_InterlockedCompareExchange_nf: - case ARM::BI_InterlockedCompareExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); - case ARM::BI_InterlockedOr8_acq: - case ARM::BI_InterlockedOr16_acq: - case ARM::BI_InterlockedOr_acq: - case ARM::BI_InterlockedOr64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); - case ARM::BI_InterlockedOr8_rel: - case ARM::BI_InterlockedOr16_rel: - case ARM::BI_InterlockedOr_rel: - case ARM::BI_InterlockedOr64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); - case ARM::BI_InterlockedOr8_nf: - case ARM::BI_InterlockedOr16_nf: - case ARM::BI_InterlockedOr_nf: - case ARM::BI_InterlockedOr64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); - case ARM::BI_InterlockedXor8_acq: - case ARM::BI_InterlockedXor16_acq: - case ARM::BI_InterlockedXor_acq: - case ARM::BI_InterlockedXor64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); - case ARM::BI_InterlockedXor8_rel: - case ARM::BI_InterlockedXor16_rel: - case ARM::BI_InterlockedXor_rel: - case ARM::BI_InterlockedXor64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); - case ARM::BI_InterlockedXor8_nf: - case ARM::BI_InterlockedXor16_nf: - case ARM::BI_InterlockedXor_nf: - case ARM::BI_InterlockedXor64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); - case ARM::BI_InterlockedAnd8_acq: - case ARM::BI_InterlockedAnd16_acq: - case ARM::BI_InterlockedAnd_acq: - case ARM::BI_InterlockedAnd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); - case ARM::BI_InterlockedAnd8_rel: - case ARM::BI_InterlockedAnd16_rel: - case ARM::BI_InterlockedAnd_rel: - case ARM::BI_InterlockedAnd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); - case ARM::BI_InterlockedAnd8_nf: - case ARM::BI_InterlockedAnd16_nf: - case ARM::BI_InterlockedAnd_nf: - case ARM::BI_InterlockedAnd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); - case ARM::BI_InterlockedIncrement16_acq: - case ARM::BI_InterlockedIncrement_acq: - case ARM::BI_InterlockedIncrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); - case ARM::BI_InterlockedIncrement16_rel: - case ARM::BI_InterlockedIncrement_rel: - case ARM::BI_InterlockedIncrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); - case ARM::BI_InterlockedIncrement16_nf: - case ARM::BI_InterlockedIncrement_nf: - case ARM::BI_InterlockedIncrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); - case ARM::BI_InterlockedDecrement16_acq: - case ARM::BI_InterlockedDecrement_acq: - case ARM::BI_InterlockedDecrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); - case ARM::BI_InterlockedDecrement16_rel: - case ARM::BI_InterlockedDecrement_rel: - case ARM::BI_InterlockedDecrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); - case ARM::BI_InterlockedDecrement16_nf: - case ARM::BI_InterlockedDecrement_nf: - case ARM::BI_InterlockedDecrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); } // Get the last argument, which specifies the vector type. assert(HasExtraArg); - llvm::APSInt Result; const Expr *Arg = E->getArg(E->getNumArgs()-1); - if (!Arg->isIntegerConstantExpr(Result, getContext())) + Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()); + if (!Result) return nullptr; if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || @@ -7127,7 +7596,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ty = DoubleTy; // Determine whether this is an unsigned conversion or not. - bool usgn = Result.getZExtValue() == 1; + bool usgn = Result->getZExtValue() == 1; unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; // Call the appropriate intrinsic. @@ -7136,14 +7605,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); + NeonTypeFlags Type = Result->getZExtValue(); bool usgn = Type.isUnsigned(); bool rightShift = false; - llvm::VectorType *VTy = GetNeonType(this, Type, - getTarget().hasLegalHalfType(), - false, - getTarget().hasBFloat16Type()); + llvm::FixedVectorType *VTy = + GetNeonType(this, Type, getTarget().hasLegalHalfType(), false, + getTarget().hasBFloat16Type()); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -7280,11 +7748,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, template<typename Integer> static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { - llvm::APSInt IntVal; - bool IsConst = E->isIntegerConstantExpr(IntVal, Context); - assert(IsConst && "Sema should have checked this was a constant"); - (void)IsConst; - return IntVal.getExtValue(); + return E->getIntegerConstantExpr(Context)->getExtValue(); } static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, @@ -7355,11 +7819,10 @@ static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) // or odds, as desired). SmallVector<int, 16> Indices; unsigned InputElements = - cast<llvm::VectorType>(V->getType())->getNumElements(); + cast<llvm::FixedVectorType>(V->getType())->getNumElements(); for (unsigned i = 0; i < InputElements; i += 2) Indices.push_back(i + Odd); - return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), - Indices); + return Builder.CreateShuffleVector(V, Indices); } static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, @@ -7368,7 +7831,7 @@ static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, assert(V0->getType() == V1->getType() && "Can't zip different vector types"); SmallVector<int, 16> Indices; unsigned InputElements = - cast<llvm::VectorType>(V0->getType())->getNumElements(); + cast<llvm::FixedVectorType>(V0->getType())->getNumElements(); for (unsigned i = 0; i < InputElements; i++) { Indices.push_back(i); Indices.push_back(i + InputElements); @@ -7400,8 +7863,7 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, unsigned Mask = ReverseWidth / LaneSize - 1; for (unsigned i = 0; i < Elements; i++) Indices.push_back(i ^ Mask); - return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), - Indices); + return Builder.CreateShuffleVector(V, Indices); } Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, @@ -7557,14 +8019,14 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID assert(E->getNumArgs() >= 3); // Get the last argument, which specifies the vector type. - llvm::APSInt Result; const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) + Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext()); + if (!Result) return nullptr; // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *Ty = GetNeonType(&CGF, Type); + NeonTypeFlags Type = Result->getZExtValue(); + llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type); if (!Ty) return nullptr; @@ -8240,15 +8702,15 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, else { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. - llvm::APSInt Result; - if (!E->getArg(i)->isIntegerConstantExpr(Result, getContext())) - llvm_unreachable("Expected argument to be a constant"); + Optional<llvm::APSInt> Result = + E->getArg(i)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); // Immediates for SVE llvm intrinsics are always 32bit. We can safely // truncate because the immediate has been range checked and no valid // immediate requires more than a handful of bits. - Result = Result.extOrTrunc(32); - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); + *Result = Result->extOrTrunc(32); + Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); } } @@ -8465,7 +8927,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svlen_u64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast<llvm::VectorType>(getSVEType(TF)); - auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min); + auto *NumEls = + llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue()); Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); return Builder.CreateMul(NumEls, Builder.CreateCall(F)); @@ -8485,8 +8948,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast<llvm::VectorType>(getSVEType(TF)); - auto TupleTy = llvm::VectorType::get(VTy->getElementType(), - VTy->getElementCount() * 2); + auto TupleTy = llvm::VectorType::getDoubleElementsVectorType(VTy); Function *FExtr = CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)}); @@ -8597,6 +9059,46 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); } + if (BuiltinID == AArch64::BI__builtin_arm_ld64b || + BuiltinID == AArch64::BI__builtin_arm_st64b || + BuiltinID == AArch64::BI__builtin_arm_st64bv || + BuiltinID == AArch64::BI__builtin_arm_st64bv0) { + llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0)); + llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1)); + + if (BuiltinID == AArch64::BI__builtin_arm_ld64b) { + // Load from the address via an LLVM intrinsic, receiving a + // tuple of 8 i64 words, and store each one to ValPtr. + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b); + llvm::Value *Val = Builder.CreateCall(F, MemAddr); + llvm::Value *ToRet; + for (size_t i = 0; i < 8; i++) { + llvm::Value *ValOffsetPtr = Builder.CreateGEP(ValPtr, Builder.getInt32(i)); + Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8)); + ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr); + } + return ToRet; + } else { + // Load 8 i64 words from ValPtr, and store them to the address + // via an LLVM intrinsic. + SmallVector<llvm::Value *, 9> Args; + Args.push_back(MemAddr); + for (size_t i = 0; i < 8; i++) { + llvm::Value *ValOffsetPtr = Builder.CreateGEP(ValPtr, Builder.getInt32(i)); + Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8)); + Args.push_back(Builder.CreateLoad(Addr)); + } + + auto Intr = (BuiltinID == AArch64::BI__builtin_arm_st64b + ? Intrinsic::aarch64_st64b + : BuiltinID == AArch64::BI__builtin_arm_st64bv + ? Intrinsic::aarch64_st64bv + : Intrinsic::aarch64_st64bv0); + Function *F = CGM.getIntrinsic(Intr); + return Builder.CreateCall(F, Args); + } + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); @@ -8918,6 +9420,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) + return EmitMSVCBuiltinExpr(*MsvcIntId, E); + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -8952,11 +9459,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } else { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. - llvm::APSInt Result; - bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); - assert(IsConst && "Constant arg isn't actually constant?"); - (void)IsConst; - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); + Ops.push_back(llvm::ConstantInt::get( + getLLVMContext(), + *E->getArg(i)->getIntegerConstantExpr(getContext()))); } } @@ -8971,12 +9476,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Result; } - llvm::APSInt Result; const Expr *Arg = E->getArg(E->getNumArgs()-1); NeonTypeFlags Type(0); - if (Arg->isIntegerConstantExpr(Result, getContext())) + if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext())) // Determine the type of this overloaded NEON intrinsic. - Type = NeonTypeFlags(Result.getZExtValue()); + Type = NeonTypeFlags(Result->getZExtValue()); bool usgn = Type.isUnsigned(); bool quad = Type.isQuad(); @@ -8999,21 +9503,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); } - case NEON::BI__builtin_neon_vcvts_u32_f32: - case NEON::BI__builtin_neon_vcvtd_u64_f64: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvts_s32_f32: - case NEON::BI__builtin_neon_vcvtd_s64_f64: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; - llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; - llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; - Ops[0] = Builder.CreateBitCast(Ops[0], FTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], InTy); - return Builder.CreateFPToSI(Ops[0], InTy); - } case NEON::BI__builtin_neon_vcvts_f32_u32: case NEON::BI__builtin_neon_vcvtd_f64_u64: usgn = true; @@ -9051,44 +9540,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateUIToFP(Ops[0], FTy); return Builder.CreateSIToFP(Ops[0], FTy); } - case NEON::BI__builtin_neon_vcvth_u16_f16: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvth_s16_f16: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Int16Ty); - return Builder.CreateFPToSI(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vcvth_u32_f16: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvth_s32_f16: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Int32Ty); - return Builder.CreateFPToSI(Ops[0], Int32Ty); - } - case NEON::BI__builtin_neon_vcvth_u64_f16: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvth_s64_f16: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Int64Ty); - return Builder.CreateFPToSI(Ops[0], Int64Ty); - } case NEON::BI__builtin_neon_vcvtah_u16_f16: case NEON::BI__builtin_neon_vcvtmh_u16_f16: case NEON::BI__builtin_neon_vcvtnh_u16_f16: case NEON::BI__builtin_neon_vcvtph_u16_f16: + case NEON::BI__builtin_neon_vcvth_u16_f16: case NEON::BI__builtin_neon_vcvtah_s16_f16: case NEON::BI__builtin_neon_vcvtmh_s16_f16: case NEON::BI__builtin_neon_vcvtnh_s16_f16: - case NEON::BI__builtin_neon_vcvtph_s16_f16: { + case NEON::BI__builtin_neon_vcvtph_s16_f16: + case NEON::BI__builtin_neon_vcvth_s16_f16: { unsigned Int; llvm::Type* InTy = Int32Ty; llvm::Type* FTy = HalfTy; @@ -9104,6 +9565,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fcvtnu; break; case NEON::BI__builtin_neon_vcvtph_u16_f16: Int = Intrinsic::aarch64_neon_fcvtpu; break; + case NEON::BI__builtin_neon_vcvth_u16_f16: + Int = Intrinsic::aarch64_neon_fcvtzu; break; case NEON::BI__builtin_neon_vcvtah_s16_f16: Int = Intrinsic::aarch64_neon_fcvtas; break; case NEON::BI__builtin_neon_vcvtmh_s16_f16: @@ -9112,6 +9575,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fcvtns; break; case NEON::BI__builtin_neon_vcvtph_s16_f16: Int = Intrinsic::aarch64_neon_fcvtps; break; + case NEON::BI__builtin_neon_vcvth_s16_f16: + Int = Intrinsic::aarch64_neon_fcvtzs; break; } Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); return Builder.CreateTrunc(Ops[0], Int16Ty); @@ -9661,142 +10126,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); } - case AArch64::BI_BitScanForward: - case AArch64::BI_BitScanForward64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); - case AArch64::BI_BitScanReverse: - case AArch64::BI_BitScanReverse64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); - case AArch64::BI_InterlockedAnd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); - case AArch64::BI_InterlockedExchange64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); - case AArch64::BI_InterlockedExchangeAdd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); - case AArch64::BI_InterlockedExchangeSub64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); - case AArch64::BI_InterlockedOr64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); - case AArch64::BI_InterlockedXor64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); - case AArch64::BI_InterlockedDecrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); - case AArch64::BI_InterlockedIncrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); - case AArch64::BI_InterlockedExchangeAdd8_acq: - case AArch64::BI_InterlockedExchangeAdd16_acq: - case AArch64::BI_InterlockedExchangeAdd_acq: - case AArch64::BI_InterlockedExchangeAdd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); - case AArch64::BI_InterlockedExchangeAdd8_rel: - case AArch64::BI_InterlockedExchangeAdd16_rel: - case AArch64::BI_InterlockedExchangeAdd_rel: - case AArch64::BI_InterlockedExchangeAdd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); - case AArch64::BI_InterlockedExchangeAdd8_nf: - case AArch64::BI_InterlockedExchangeAdd16_nf: - case AArch64::BI_InterlockedExchangeAdd_nf: - case AArch64::BI_InterlockedExchangeAdd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); - case AArch64::BI_InterlockedExchange8_acq: - case AArch64::BI_InterlockedExchange16_acq: - case AArch64::BI_InterlockedExchange_acq: - case AArch64::BI_InterlockedExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); - case AArch64::BI_InterlockedExchange8_rel: - case AArch64::BI_InterlockedExchange16_rel: - case AArch64::BI_InterlockedExchange_rel: - case AArch64::BI_InterlockedExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); - case AArch64::BI_InterlockedExchange8_nf: - case AArch64::BI_InterlockedExchange16_nf: - case AArch64::BI_InterlockedExchange_nf: - case AArch64::BI_InterlockedExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); - case AArch64::BI_InterlockedCompareExchange8_acq: - case AArch64::BI_InterlockedCompareExchange16_acq: - case AArch64::BI_InterlockedCompareExchange_acq: - case AArch64::BI_InterlockedCompareExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); - case AArch64::BI_InterlockedCompareExchange8_rel: - case AArch64::BI_InterlockedCompareExchange16_rel: - case AArch64::BI_InterlockedCompareExchange_rel: - case AArch64::BI_InterlockedCompareExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); - case AArch64::BI_InterlockedCompareExchange8_nf: - case AArch64::BI_InterlockedCompareExchange16_nf: - case AArch64::BI_InterlockedCompareExchange_nf: - case AArch64::BI_InterlockedCompareExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); - case AArch64::BI_InterlockedOr8_acq: - case AArch64::BI_InterlockedOr16_acq: - case AArch64::BI_InterlockedOr_acq: - case AArch64::BI_InterlockedOr64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); - case AArch64::BI_InterlockedOr8_rel: - case AArch64::BI_InterlockedOr16_rel: - case AArch64::BI_InterlockedOr_rel: - case AArch64::BI_InterlockedOr64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); - case AArch64::BI_InterlockedOr8_nf: - case AArch64::BI_InterlockedOr16_nf: - case AArch64::BI_InterlockedOr_nf: - case AArch64::BI_InterlockedOr64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); - case AArch64::BI_InterlockedXor8_acq: - case AArch64::BI_InterlockedXor16_acq: - case AArch64::BI_InterlockedXor_acq: - case AArch64::BI_InterlockedXor64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); - case AArch64::BI_InterlockedXor8_rel: - case AArch64::BI_InterlockedXor16_rel: - case AArch64::BI_InterlockedXor_rel: - case AArch64::BI_InterlockedXor64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); - case AArch64::BI_InterlockedXor8_nf: - case AArch64::BI_InterlockedXor16_nf: - case AArch64::BI_InterlockedXor_nf: - case AArch64::BI_InterlockedXor64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); - case AArch64::BI_InterlockedAnd8_acq: - case AArch64::BI_InterlockedAnd16_acq: - case AArch64::BI_InterlockedAnd_acq: - case AArch64::BI_InterlockedAnd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); - case AArch64::BI_InterlockedAnd8_rel: - case AArch64::BI_InterlockedAnd16_rel: - case AArch64::BI_InterlockedAnd_rel: - case AArch64::BI_InterlockedAnd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); - case AArch64::BI_InterlockedAnd8_nf: - case AArch64::BI_InterlockedAnd16_nf: - case AArch64::BI_InterlockedAnd_nf: - case AArch64::BI_InterlockedAnd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); - case AArch64::BI_InterlockedIncrement16_acq: - case AArch64::BI_InterlockedIncrement_acq: - case AArch64::BI_InterlockedIncrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); - case AArch64::BI_InterlockedIncrement16_rel: - case AArch64::BI_InterlockedIncrement_rel: - case AArch64::BI_InterlockedIncrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); - case AArch64::BI_InterlockedIncrement16_nf: - case AArch64::BI_InterlockedIncrement_nf: - case AArch64::BI_InterlockedIncrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); - case AArch64::BI_InterlockedDecrement16_acq: - case AArch64::BI_InterlockedDecrement_acq: - case AArch64::BI_InterlockedDecrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); - case AArch64::BI_InterlockedDecrement16_rel: - case AArch64::BI_InterlockedDecrement_rel: - case AArch64::BI_InterlockedDecrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); - case AArch64::BI_InterlockedDecrement16_nf: - case AArch64::BI_InterlockedDecrement_nf: - case AArch64::BI_InterlockedDecrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); case AArch64::BI_InterlockedAdd: { Value *Arg0 = EmitScalarExpr(E->getArg(0)); @@ -9808,7 +10137,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } } - llvm::VectorType *VTy = GetNeonType(this, Type); + llvm::FixedVectorType *VTy = GetNeonType(this, Type); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -9869,13 +10198,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); } case NEON::BI__builtin_neon_vfma_laneq_v: { - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); + auto *VTy = cast<llvm::FixedVectorType>(Ty); // v1f64 fma should be mapped to Neon scalar f64 fma if (VTy && VTy->getElementType() == DoubleTy) { Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, true)); + llvm::FixedVectorType *VTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); Value *Result; @@ -10152,10 +10481,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtq_u64_v: case NEON::BI__builtin_neon_vcvtq_s16_v: case NEON::BI__builtin_neon_vcvtq_u16_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Ty); - return Builder.CreateFPToSI(Ops[0], Ty); + Int = + usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs; + llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); } case NEON::BI__builtin_neon_vcvta_s16_v: case NEON::BI__builtin_neon_vcvta_u16_v: @@ -10243,8 +10572,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) Quad = true; Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); + llvm::FixedVectorType *VTy = + GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); @@ -10760,8 +11089,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld2q_lane_v: { llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); - Ops.push_back(Ops[1]); - Ops.erase(Ops.begin()+1); + std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); @@ -10774,8 +11102,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld3q_lane_v: { llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); - Ops.push_back(Ops[1]); - Ops.erase(Ops.begin()+1); + std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); @@ -10789,8 +11116,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld4q_lane_v: { llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); - Ops.push_back(Ops[1]); - Ops.erase(Ops.begin()+1); + std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); @@ -10803,16 +11129,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vst2_v: case NEON::BI__builtin_neon_vst2q_v: { - Ops.push_back(Ops[0]); - Ops.erase(Ops.begin()); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst2_lane_v: case NEON::BI__builtin_neon_vst2q_lane_v: { - Ops.push_back(Ops[0]); - Ops.erase(Ops.begin()); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), @@ -10820,16 +11144,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vst3_v: case NEON::BI__builtin_neon_vst3q_v: { - Ops.push_back(Ops[0]); - Ops.erase(Ops.begin()); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst3_lane_v: case NEON::BI__builtin_neon_vst3q_lane_v: { - Ops.push_back(Ops[0]); - Ops.erase(Ops.begin()); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), @@ -10837,16 +11159,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vst4_v: case NEON::BI__builtin_neon_vst4q_v: { - Ops.push_back(Ops[0]); - Ops.erase(Ops.begin()); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst4_lane_v: case NEON::BI__builtin_neon_vst4q_lane_v: { - Ops.push_back(Ops[0]); - Ops.erase(Ops.begin()); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), @@ -10956,9 +11276,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { assert((BuiltinID == BPF::BI__builtin_preserve_field_info || - BuiltinID == BPF::BI__builtin_btf_type_id) && + BuiltinID == BPF::BI__builtin_btf_type_id || + BuiltinID == BPF::BI__builtin_preserve_type_info || + BuiltinID == BPF::BI__builtin_preserve_enum_value) && "unexpected BPF builtin"); + // A sequence number, injected into IR builtin functions, to + // prevent CSE given the only difference of the funciton + // may just be the debuginfo metadata. + static uint32_t BuiltinSeqNum; + switch (BuiltinID) { default: llvm_unreachable("Unexpected BPF builtin"); @@ -10989,65 +11316,65 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, {FieldAddr->getType()}); return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); } - case BPF::BI__builtin_btf_type_id: { - Value *FieldVal = nullptr; - - // The LValue cannot be converted Value in order to be used as the function - // parameter. If it is a structure, it is the "alloca" result of the LValue - // (a pointer) is used in the parameter. If it is a simple type, - // the value will be loaded from its corresponding "alloca" and used as - // the parameter. In our case, let us just get a pointer of the LValue - // since we do not really use the parameter. The purpose of parameter - // is to prevent the generated IR llvm.bpf.btf.type.id intrinsic call, - // which carries metadata, from being changed. - bool IsLValue = E->getArg(0)->isLValue(); - if (IsLValue) - FieldVal = EmitLValue(E->getArg(0)).getPointer(*this); - else - FieldVal = EmitScalarExpr(E->getArg(0)); + case BPF::BI__builtin_btf_type_id: + case BPF::BI__builtin_preserve_type_info: { + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin function without -g"); + return nullptr; + } + + const Expr *Arg0 = E->getArg(0); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( + Arg0->getType(), Arg0->getExprLoc()); + + ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); + Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); + Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); + llvm::Function *FnDecl; + if (BuiltinID == BPF::BI__builtin_btf_type_id) + FnDecl = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {}); + else + FnDecl = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); + CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue}); + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + return Fn; + } + case BPF::BI__builtin_preserve_enum_value: { if (!getDebugInfo()) { - CGM.Error(E->getExprLoc(), "using __builtin_btf_type_id() without -g"); + CGM.Error(E->getExprLoc(), "using builtin function without -g"); return nullptr; } - // Generate debuginfo type for the first argument. - llvm::DIType *DbgInfo = - getDebugInfo()->getOrCreateStandaloneType(E->getArg(0)->getType(), - E->getArg(0)->getExprLoc()); + const Expr *Arg0 = E->getArg(0); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( + Arg0->getType(), Arg0->getExprLoc()); + + // Find enumerator + const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens()); + const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr()); + const auto *DR = cast<DeclRefExpr>(CE->getSubExpr()); + const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl()); + + auto &InitVal = Enumerator->getInitVal(); + std::string InitValStr; + if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX)) + InitValStr = std::to_string(InitVal.getSExtValue()); + else + InitValStr = std::to_string(InitVal.getZExtValue()); + std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr; + Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr); ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); + Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); - // Built the IR for the btf_type_id intrinsic. - // - // In the above, we converted LValue argument to a pointer to LValue. - // For example, the following - // int v; - // C1: __builtin_btf_type_id(v, flag); - // will be converted to - // L1: llvm.bpf.btf.type.id(&v, flag) - // This makes it hard to differentiate from - // C2: __builtin_btf_type_id(&v, flag); - // to - // L2: llvm.bpf.btf.type.id(&v, flag) - // - // If both C1 and C2 are present in the code, the llvm may later - // on do CSE on L1 and L2, which will result in incorrect tagged types. - // - // The C1->L1 transformation only happens if the argument of - // __builtin_btf_type_id() is a LValue. So Let us put whether - // the argument is an LValue or not into generated IR. This should - // prevent potential CSE from causing debuginfo type loss. - // - // The generated IR intrinsics will hence look like - // L1: llvm.bpf.btf.type.id(&v, 1, flag) !di_type_for_{v}; - // L2: llvm.bpf.btf.type.id(&v, 0, flag) !di_type_for_{&v}; - Constant *CV = ConstantInt::get(IntTy, IsLValue); - llvm::Function *FnBtfTypeId = llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, - {FieldVal->getType(), CV->getType()}); - CallInst *Fn = Builder.CreateCall(FnBtfTypeId, {FieldVal, CV, FlagValue}); + llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {}); + CallInst *Fn = + Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue}); Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } @@ -11109,7 +11436,8 @@ static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, llvm::PointerType::getUnqual(Ops[1]->getType())); Value *MaskVec = getMaskVecValue( - CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements()); + CGF, Ops[2], + cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements()); return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec); } @@ -11121,7 +11449,8 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, llvm::PointerType::getUnqual(Ops[1]->getType())); Value *MaskVec = getMaskVecValue( - CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements()); + CGF, Ops[2], + cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements()); return CGF.Builder.CreateMaskedLoad(Ptr, Alignment, MaskVec, Ops[1]); } @@ -11135,7 +11464,8 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(PtrTy)); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); + Value *MaskVec = getMaskVecValue( + CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements()); llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, ResultTy); @@ -11145,7 +11475,7 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, static Value *EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef<Value *> Ops, bool IsCompress) { - auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); + auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); @@ -11157,7 +11487,7 @@ static Value *EmitX86CompressExpand(CodeGenFunction &CGF, static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { - auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); + auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); llvm::Type *PtrTy = ResultTy->getElementType(); // Cast the pointer to element type. @@ -11193,7 +11523,7 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, // Funnel shifts amounts are treated as modulo and types are all power-of-2 so // we only care about the lowest log2 bits anyway. if (Amt->getType() != Ty) { - unsigned NumElts = cast<llvm::VectorType>(Ty)->getNumElements(); + unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements(); Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); } @@ -11252,7 +11582,7 @@ static Value *EmitX86Select(CodeGenFunction &CGF, return Op0; Mask = getMaskVecValue( - CGF, Mask, cast<llvm::VectorType>(Op0->getType())->getNumElements()); + CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements()); return CGF.Builder.CreateSelect(Mask, Op0, Op1); } @@ -11299,7 +11629,7 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, assert((Ops.size() == 2 || Ops.size() == 4) && "Unexpected number of arguments"); unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); Value *Cmp; if (CC == 3) { @@ -11353,25 +11683,6 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, return EmitX86Select(CGF, Ops[2], Res, Ops[1]); } -static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { - - llvm::Type *Ty = Ops[0]->getType(); - Value *Zero = llvm::Constant::getNullValue(Ty); - Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); - Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); - Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); - return Res; -} - -static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, - ArrayRef<Value *> Ops) { - Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); - Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); - - assert(Ops.size() == 2); - return Res; -} - // Lowers X86 FMA intrinsics to IR. static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, unsigned BuiltinID, bool IsAddSub) { @@ -11576,18 +11887,15 @@ static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy) { - unsigned NumberOfElements = cast<llvm::VectorType>(DstTy)->getNumElements(); + unsigned NumberOfElements = + cast<llvm::FixedVectorType>(DstTy)->getNumElements(); Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } -// Emit addition or subtraction with signed/unsigned saturation. -static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, - ArrayRef<Value *> Ops, bool IsSigned, - bool IsAddition) { - Intrinsic::ID IID = - IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) - : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); +// Emit binary intrinsic with the same type used in result/args. +static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, Intrinsic::ID IID) { llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -11612,14 +11920,14 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]}); } - unsigned NumDstElts = cast<llvm::VectorType>(DstTy)->getNumElements(); + unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements(); Value *Src = Ops[0]; // Extract the subvector. - if (NumDstElts != cast<llvm::VectorType>(Src->getType())->getNumElements()) { + if (NumDstElts != + cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) { assert(NumDstElts == 4 && "Unexpected vector size"); - Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()), - ArrayRef<int>{0, 1, 2, 3}); + Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3}); } // Bitcast from vXi16 to vXf16. @@ -11790,7 +12098,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, if (BuiltinID == X86::BI__builtin_cpu_init) return EmitX86CpuInit(); + // Handle MSVC intrinsics before argument evaluation to prevent double + // evaluation. + if (Optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID)) + return EmitMSVCBuiltinExpr(*MsvcIntId, E); + SmallVector<Value*, 4> Ops; + bool IsMaskFCmp = false; // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; @@ -11807,10 +12121,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. - llvm::APSInt Result; - bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); - assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; - Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); + Ops.push_back(llvm::ConstantInt::get( + getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); } // These exist so that the builtin that takes an immediate can be bounds @@ -11916,7 +12228,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vec_ext_v8si: case X86::BI__builtin_ia32_vec_ext_v4di: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. @@ -11932,7 +12244,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vec_set_v8si: case X86::BI__builtin_ia32_vec_set_v4di: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. @@ -12358,9 +12670,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, break; } - unsigned MinElts = - std::min(cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(), - cast<llvm::VectorType>(Ops[2]->getType())->getNumElements()); + unsigned MinElts = std::min( + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(), + cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements()); Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); @@ -12467,9 +12779,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, break; } - unsigned MinElts = - std::min(cast<llvm::VectorType>(Ops[2]->getType())->getNumElements(), - cast<llvm::VectorType>(Ops[3]->getType())->getNumElements()); + unsigned MinElts = std::min( + cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(), + cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements()); Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); @@ -12491,10 +12803,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_extracti64x2_256_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: { - auto *DstTy = cast<llvm::VectorType>(ConvertType(E->getType())); + auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType())); unsigned NumElts = DstTy->getNumElements(); unsigned SrcNumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); unsigned SubVectors = SrcNumElts / NumElts; unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); @@ -12506,7 +12818,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[i] = i + Index; Value *Res = Builder.CreateShuffleVector(Ops[0], - UndefValue::get(Ops[0]->getType()), makeArrayRef(Indices, NumElts), "extract"); @@ -12532,9 +12843,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_insertf64x2_512: case X86::BI__builtin_ia32_inserti64x2_512: { unsigned DstNumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); unsigned SrcNumElts = - cast<llvm::VectorType>(Ops[1]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements(); unsigned SubVectors = DstNumElts / SrcNumElts; unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); @@ -12546,7 +12857,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; Value *Op1 = Builder.CreateShuffleVector(Ops[1], - UndefValue::get(Ops[1]->getType()), makeArrayRef(Indices, DstNumElts), "widen"); @@ -12599,7 +12909,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pblendd128: case X86::BI__builtin_ia32_pblendd256: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); int Indices[16]; @@ -12616,7 +12926,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); // Splat the 8-bits of immediate 4 times to help the loop wrap around. @@ -12632,15 +12942,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Indices[l + i] = l + i; } - return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), - makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), "pshuflw"); } case X86::BI__builtin_ia32_pshufhw: case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); // Splat the 8-bits of immediate 4 times to help the loop wrap around. @@ -12656,8 +12965,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), - makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), "pshufhw"); } case X86::BI__builtin_ia32_pshufd: @@ -12670,7 +12978,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vpermilpd512: case X86::BI__builtin_ia32_vpermilps512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; @@ -12686,8 +12994,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), - makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), "permil"); } case X86::BI__builtin_ia32_shufpd: @@ -12697,7 +13004,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; @@ -12725,7 +13032,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_permdi512: case X86::BI__builtin_ia32_permdf512: { unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); // These intrinsics operate on 256-bit lanes of four 64-bit elements. @@ -12734,8 +13041,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); - return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), - makeArrayRef(Indices, NumElts), + return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts), "perm"); } case X86::BI__builtin_ia32_palignr128: @@ -12744,7 +13050,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); assert(NumElts % 16 == 0); // If palignr is shifting the pair of vectors more than the size of two @@ -12782,7 +13088,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; // Mask the shift amount to width of two vectors. @@ -12805,7 +13111,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_shuf_i32x4: case X86::BI__builtin_ia32_shuf_i64x2: { unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; unsigned NumLaneElts = NumElts / NumLanes; @@ -12832,7 +13138,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_permti256: { unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); // This takes a very simple approach since there are two lanes and a // shuffle can have 2 inputs. So we reserve the first input for the first @@ -12870,7 +13176,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; - auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType()); + auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType()); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getNumElements() * 8; @@ -12900,7 +13206,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; - auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType()); + auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType()); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getNumElements() * 8; @@ -13342,9 +13648,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pabsb512: case X86::BI__builtin_ia32_pabsw512: case X86::BI__builtin_ia32_pabsd512: - case X86::BI__builtin_ia32_pabsq512: - return EmitX86Abs(*this, Ops); - + case X86::BI__builtin_ia32_pabsq512: { + Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: @@ -13357,7 +13664,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pmaxsw512: case X86::BI__builtin_ia32_pmaxsd512: case X86::BI__builtin_ia32_pmaxsq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax); case X86::BI__builtin_ia32_pmaxub128: case X86::BI__builtin_ia32_pmaxuw128: case X86::BI__builtin_ia32_pmaxud128: @@ -13370,7 +13677,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pmaxuw512: case X86::BI__builtin_ia32_pmaxud512: case X86::BI__builtin_ia32_pmaxuq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax); case X86::BI__builtin_ia32_pminsb128: case X86::BI__builtin_ia32_pminsw128: case X86::BI__builtin_ia32_pminsd128: @@ -13383,7 +13690,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pminsw512: case X86::BI__builtin_ia32_pminsd512: case X86::BI__builtin_ia32_pminsq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin); case X86::BI__builtin_ia32_pminub128: case X86::BI__builtin_ia32_pminuw128: case X86::BI__builtin_ia32_pminud128: @@ -13396,7 +13703,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pminuw512: case X86::BI__builtin_ia32_pminud512: case X86::BI__builtin_ia32_pminuq512: - return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin); case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: @@ -13470,6 +13777,68 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ops 0 and 1 are swapped. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + // Reductions + case X86::BI__builtin_ia32_reduce_add_d512: + case X86::BI__builtin_ia32_reduce_add_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_and_d512: + case X86::BI__builtin_ia32_reduce_and_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_fadd_pd512: + case X86::BI__builtin_ia32_reduce_fadd_ps512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); + return Builder.CreateCall(F, {Ops[0], Ops[1]}); + } + case X86::BI__builtin_ia32_reduce_fmul_pd512: + case X86::BI__builtin_ia32_reduce_fmul_ps512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); + return Builder.CreateCall(F, {Ops[0], Ops[1]}); + } + case X86::BI__builtin_ia32_reduce_mul_d512: + case X86::BI__builtin_ia32_reduce_mul_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_or_d512: + case X86::BI__builtin_ia32_reduce_or_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_or, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_smax_d512: + case X86::BI__builtin_ia32_reduce_smax_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_smax, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_smin_d512: + case X86::BI__builtin_ia32_reduce_smin_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_smin, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_umax_d512: + case X86::BI__builtin_ia32_reduce_umax_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_umax, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_reduce_umin_d512: + case X86::BI__builtin_ia32_reduce_umin_q512: { + Function *F = + CGM.getIntrinsic(Intrinsic::vector_reduce_umin, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0]}); + } + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { @@ -13547,7 +13916,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); Value *MaskIn = Ops[2]; Ops.erase(&Ops[2]); @@ -13585,7 +13954,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vp2intersect_d_256: case X86::BI__builtin_ia32_vp2intersect_d_128: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); Intrinsic::ID ID; switch (BuiltinID) { @@ -13644,7 +14013,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vpshufbitqmb256_mask: case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); Value *MaskIn = Ops[2]; Ops.erase(&Ops[2]); @@ -13691,21 +14060,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); - case X86::BI__builtin_ia32_cmpps: - case X86::BI__builtin_ia32_cmpps256: - case X86::BI__builtin_ia32_cmppd: - case X86::BI__builtin_ia32_cmppd256: case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: case X86::BI__builtin_ia32_cmppd128_mask: case X86::BI__builtin_ia32_cmppd256_mask: - case X86::BI__builtin_ia32_cmppd512_mask: { + case X86::BI__builtin_ia32_cmppd512_mask: + IsMaskFCmp = true; + LLVM_FALLTHROUGH; + case X86::BI__builtin_ia32_cmpps: + case X86::BI__builtin_ia32_cmpps256: + case X86::BI__builtin_ia32_cmppd: + case X86::BI__builtin_ia32_cmppd256: { // Lowering vector comparisons to fcmp instructions, while // ignoring signalling behaviour requested // ignoring rounding mode requested - // This is is only possible as long as FENV_ACCESS is not implemented. - // See also: https://reviews.llvm.org/D45616 + // This is only possible if fp-model is not strict and FENV_ACCESS is off. // The third argument is the comparison condition, and integer in the // range [0, 31] @@ -13745,8 +14115,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // If the predicate is true or false and we're using constrained intrinsics, // we don't have a compare intrinsic we can use. Just use the legacy X86 // specific intrinsic. - if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) && - Builder.getIsFPConstrained()) { + // If the intrinsic is mask enabled and we're using constrained intrinsics, + // use the legacy X86 specific intrinsic. + if (Builder.getIsFPConstrained() && + (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE || + IsMaskFCmp)) { Intrinsic::ID IID; switch (BuiltinID) { @@ -13764,36 +14137,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, IID = Intrinsic::x86_avx_cmp_pd_256; break; case X86::BI__builtin_ia32_cmpps512_mask: - IID = Intrinsic::x86_avx512_cmp_ps_512; + IID = Intrinsic::x86_avx512_mask_cmp_ps_512; break; case X86::BI__builtin_ia32_cmppd512_mask: - IID = Intrinsic::x86_avx512_cmp_pd_512; + IID = Intrinsic::x86_avx512_mask_cmp_pd_512; break; case X86::BI__builtin_ia32_cmpps128_mask: - IID = Intrinsic::x86_avx512_cmp_ps_128; + IID = Intrinsic::x86_avx512_mask_cmp_ps_128; break; case X86::BI__builtin_ia32_cmpps256_mask: - IID = Intrinsic::x86_avx512_cmp_ps_256; + IID = Intrinsic::x86_avx512_mask_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd128_mask: - IID = Intrinsic::x86_avx512_cmp_pd_128; + IID = Intrinsic::x86_avx512_mask_cmp_pd_128; break; case X86::BI__builtin_ia32_cmppd256_mask: - IID = Intrinsic::x86_avx512_cmp_pd_256; + IID = Intrinsic::x86_avx512_mask_cmp_pd_256; break; } Function *Intr = CGM.getIntrinsic(IID); - if (cast<llvm::VectorType>(Intr->getReturnType()) - ->getElementType() - ->isIntegerTy(1)) { + if (IsMaskFCmp) { unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); - Value *MaskIn = Ops[3]; - Ops.erase(&Ops[3]); - + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); + Ops[3] = getMaskVecValue(*this, Ops[3], NumElts); Value *Cmp = Builder.CreateCall(Intr, Ops); - return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr); } return Builder.CreateCall(Intr, Ops); @@ -13801,16 +14170,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Builtins without the _mask suffix return a vector of integers // of the same width as the input vectors - switch (BuiltinID) { - case X86::BI__builtin_ia32_cmpps512_mask: - case X86::BI__builtin_ia32_cmppd512_mask: - case X86::BI__builtin_ia32_cmpps128_mask: - case X86::BI__builtin_ia32_cmpps256_mask: - case X86::BI__builtin_ia32_cmppd128_mask: - case X86::BI__builtin_ia32_cmppd256_mask: { - // FIXME: Support SAE. + if (IsMaskFCmp) { + // We ignore SAE if strict FP is disabled. We only keep precise + // exception behavior under strict FP. unsigned NumElts = - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); Value *Cmp; if (IsSignaling) Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); @@ -13818,9 +14182,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); } - default: - return getVectorFCmpIR(Pred, IsSignaling); - } + + return getVectorFCmpIR(Pred, IsSignaling); } // SSE scalar comparison intrinsics @@ -13869,7 +14232,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { Ops[2] = getMaskVecValue( *this, Ops[2], - cast<llvm::VectorType>(Ops[0]->getType())->getNumElements()); + cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements()); Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } @@ -13935,25 +14298,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI__shiftleft128: case X86::BI__shiftright128: { - // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: - // llvm::Function *F = CGM.getIntrinsic( - // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, - // Int64Ty); - // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); - // return Builder.CreateCall(F, Ops); - llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *HighPart128 = - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); - Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); - Value *Val = Builder.CreateOr(HighPart128, LowPart128); - Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), - llvm::ConstantInt::get(Int128Ty, 0x3f)); - Value *Res; - if (BuiltinID == X86::BI__shiftleft128) - Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); - else - Res = Builder.CreateLShr(Val, Amt); - return Builder.CreateTrunc(Res, Int64Ty); + llvm::Function *F = CGM.getIntrinsic( + BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, + Int64Ty); + // Flip low/high ops and zero-extend amount to matching type. + // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) + // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) + std::swap(Ops[0], Ops[1]); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + return Builder.CreateCall(F, Ops); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: @@ -13961,65 +14314,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::SingleThread); } - case X86::BI_BitScanForward: - case X86::BI_BitScanForward64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); - case X86::BI_BitScanReverse: - case X86::BI_BitScanReverse64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); - - case X86::BI_InterlockedAnd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); - case X86::BI_InterlockedExchange64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); - case X86::BI_InterlockedExchangeAdd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); - case X86::BI_InterlockedExchangeSub64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); - case X86::BI_InterlockedOr64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); - case X86::BI_InterlockedXor64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); - case X86::BI_InterlockedDecrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); - case X86::BI_InterlockedIncrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); - case X86::BI_InterlockedCompareExchange128: { - // InterlockedCompareExchange128 doesn't directly refer to 128bit ints, - // instead it takes pointers to 64bit ints for Destination and - // ComparandResult, and exchange is taken as two 64bit ints (high & low). - // The previous value is written to ComparandResult, and success is - // returned. - - llvm::Type *Int128Ty = Builder.getInt128Ty(); - llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); - - Value *Destination = - Builder.CreateBitCast(Ops[0], Int128PtrTy); - Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty); - Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty); - Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy), - getContext().toCharUnitsFromBits(128)); - - Value *Exchange = Builder.CreateOr( - Builder.CreateShl(ExchangeHigh128, 64, "", false, false), - ExchangeLow128); - - Value *Comparand = Builder.CreateLoad(ComparandResult); - - AtomicCmpXchgInst *CXI = - Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); - CXI->setVolatile(true); - - // Write the result back to the inout pointer. - Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult); - - // Get the success boolean and zero extend it to i8. - Value *Success = Builder.CreateExtractValue(CXI, 1); - return Builder.CreateZExt(Success, ConvertType(E->getType())); - } case X86::BI_AddressOfReturnAddress: { Function *F = @@ -14076,28 +14370,124 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_paddsw256: case X86::BI__builtin_ia32_paddsb128: case X86::BI__builtin_ia32_paddsw128: - return EmitX86AddSubSatExpr(*this, Ops, true, true); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat); case X86::BI__builtin_ia32_paddusb512: case X86::BI__builtin_ia32_paddusw512: case X86::BI__builtin_ia32_paddusb256: case X86::BI__builtin_ia32_paddusw256: case X86::BI__builtin_ia32_paddusb128: case X86::BI__builtin_ia32_paddusw128: - return EmitX86AddSubSatExpr(*this, Ops, false, true); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat); case X86::BI__builtin_ia32_psubsb512: case X86::BI__builtin_ia32_psubsw512: case X86::BI__builtin_ia32_psubsb256: case X86::BI__builtin_ia32_psubsw256: case X86::BI__builtin_ia32_psubsb128: case X86::BI__builtin_ia32_psubsw128: - return EmitX86AddSubSatExpr(*this, Ops, true, false); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat); case X86::BI__builtin_ia32_psubusb512: case X86::BI__builtin_ia32_psubusw512: case X86::BI__builtin_ia32_psubusb256: case X86::BI__builtin_ia32_psubusw256: case X86::BI__builtin_ia32_psubusb128: case X86::BI__builtin_ia32_psubusw128: - return EmitX86AddSubSatExpr(*this, Ops, false, false); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); + case X86::BI__builtin_ia32_encodekey128_u32: { + Intrinsic::ID IID = Intrinsic::x86_encodekey128; + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); + + for (int i = 0; i < 6; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[2], i * 16); + Ptr = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Extract->getType())); + Builder.CreateAlignedStore(Extract, Ptr, Align(1)); + } + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_encodekey256_u32: { + Intrinsic::ID IID = Intrinsic::x86_encodekey256; + + Value *Call = + Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); + + for (int i = 0; i < 7; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[3], i * 16); + Ptr = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Extract->getType())); + Builder.CreateAlignedStore(Extract, Ptr, Align(1)); + } + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_aesenc128kl_u8: + case X86::BI__builtin_ia32_aesdec128kl_u8: + case X86::BI__builtin_ia32_aesenc256kl_u8: + case X86::BI__builtin_ia32_aesdec256kl_u8: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_aesenc128kl_u8: + IID = Intrinsic::x86_aesenc128kl; + break; + case X86::BI__builtin_ia32_aesdec128kl_u8: + IID = Intrinsic::x86_aesdec128kl; + break; + case X86::BI__builtin_ia32_aesenc256kl_u8: + IID = Intrinsic::x86_aesenc256kl; + break; + case X86::BI__builtin_ia32_aesdec256kl_u8: + IID = Intrinsic::x86_aesdec256kl; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); + + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[0]); + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_aesencwide128kl_u8: + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + case X86::BI__builtin_ia32_aesencwide256kl_u8: + case X86::BI__builtin_ia32_aesdecwide256kl_u8: { + Intrinsic::ID IID; + switch (BuiltinID) { + case X86::BI__builtin_ia32_aesencwide128kl_u8: + IID = Intrinsic::x86_aesencwide128kl; + break; + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + IID = Intrinsic::x86_aesdecwide128kl; + break; + case X86::BI__builtin_ia32_aesencwide256kl_u8: + IID = Intrinsic::x86_aesencwide256kl; + break; + case X86::BI__builtin_ia32_aesdecwide256kl_u8: + IID = Intrinsic::x86_aesdecwide256kl; + break; + } + + Value *InOps[9]; + InOps[0] = Ops[2]; + for (int i = 0; i != 8; ++i) { + Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i); + InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16)); + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); + + for (int i = 0; i != 8; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i); + Builder.CreateAlignedStore(Extract, Ptr, Align(16)); + } + + return Builder.CreateExtractValue(Call, 0); + } } } @@ -14284,6 +14674,63 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } + case PPC::BI__builtin_altivec_vec_replace_elt: + case PPC::BI__builtin_altivec_vec_replace_unaligned: { + // The third argument of vec_replace_elt and vec_replace_unaligned must + // be a compile time constant and will be emitted either to the vinsw + // or vinsd instruction. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && + "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = nullptr; + Value *Call = nullptr; + int64_t ConstArg = ArgCI->getSExtValue(); + unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits(); + bool Is32Bit = false; + assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width"); + // The input to vec_replace_elt is an element index, not a byte index. + if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt) + ConstArg *= ArgWidth / 8; + if (ArgWidth == 32) { + Is32Bit = true; + // When the second argument is 32 bits, it can either be an integer or + // a float. The vinsw intrinsic is used in this case. + F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw); + // Fix the constant according to endianess. + if (getTarget().isLittleEndian()) + ConstArg = 12 - ConstArg; + } else { + // When the second argument is 64 bits, it can either be a long long or + // a double. The vinsd intrinsic is used in this case. + F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd); + // Fix the constant for little endian. + if (getTarget().isLittleEndian()) + ConstArg = 8 - ConstArg; + } + Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); + // Depending on ArgWidth, the input vector could be a float or a double. + // If the input vector is a float type, bitcast the inputs to integers. Or, + // if the input vector is a double, bitcast the inputs to 64-bit integers. + if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) { + Ops[0] = Builder.CreateBitCast( + Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4) + : llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty); + } + // Emit the call to vinsw or vinsd. + Call = Builder.CreateCall(F, Ops); + // Depending on the builtin, bitcast to the approriate result type. + if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt && + !Ops[1]->getType()->isIntegerTy()) + return Builder.CreateBitCast(Call, ResultType); + else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt && + Ops[1]->getType()->isIntegerTy()) + return Call; + else + return Builder.CreateBitCast(Call, + llvm::FixedVectorType::get(Int8Ty, 16)); + } case PPC::BI__builtin_altivec_vpopcntb: case PPC::BI__builtin_altivec_vpopcnth: case PPC::BI__builtin_altivec_vpopcntw: @@ -14329,8 +14776,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || BuiltinID == PPC::BI__builtin_vsx_xvrspic) ID = Builder.getIsFPConstrained() - ? Intrinsic::experimental_constrained_nearbyint - : Intrinsic::nearbyint; + ? Intrinsic::experimental_constrained_rint + : Intrinsic::rint; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || BuiltinID == PPC::BI__builtin_vsx_xvrspip) ID = Builder.getIsFPConstrained() @@ -14565,6 +15012,77 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractElement(Unpacked, Index); } + + // The PPC MMA builtins take a pointer to a __vector_quad as an argument. + // Some of the MMA instructions accumulate their result into an existing + // accumulator whereas the others generate a new accumulator. So we need to + // use custom code generation to expand a builtin call with a pointer to a + // load (if the corresponding instruction accumulates its result) followed by + // the call to the intrinsic and a store of the result. +#define CUSTOM_BUILTIN(Name, Types, Accumulate) \ + case PPC::BI__builtin_##Name: +#include "clang/Basic/BuiltinsPPC.def" + { + // The first argument of these two builtins is a pointer used to store their + // result. However, the llvm intrinsics return their result in multiple + // return values. So, here we emit code extracting these values from the + // intrinsic results and storing them using that pointer. + if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || + BuiltinID == PPC::BI__builtin_vsx_disassemble_pair) { + unsigned NumVecs = 2; + auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; + if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { + NumVecs = 4; + Intrinsic = Intrinsic::ppc_mma_disassemble_acc; + } + llvm::Function *F = CGM.getIntrinsic(Intrinsic); + Address Addr = EmitPointerWithAlignment(E->getArg(1)); + Value *Vec = Builder.CreateLoad(Addr); + Value *Call = Builder.CreateCall(F, {Vec}); + llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); + Value *Ptr = Builder.CreateBitCast(Ops[0], VTy->getPointerTo()); + for (unsigned i=0; i<NumVecs; i++) { + Value *Vec = Builder.CreateExtractValue(Call, i); + llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i); + Value *GEP = Builder.CreateInBoundsGEP(Ptr, Index); + Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16)); + } + return Call; + } + bool Accumulate; + switch (BuiltinID) { + #define CUSTOM_BUILTIN(Name, Types, Acc) \ + case PPC::BI__builtin_##Name: \ + ID = Intrinsic::ppc_##Name; \ + Accumulate = Acc; \ + break; + #include "clang/Basic/BuiltinsPPC.def" + } + if (BuiltinID == PPC::BI__builtin_vsx_lxvp || + BuiltinID == PPC::BI__builtin_vsx_stxvp) { + if (BuiltinID == PPC::BI__builtin_vsx_lxvp) { + Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); + Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); + } else { + Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); + Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); + } + Ops.pop_back(); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, ""); + } + SmallVector<Value*, 4> CallOps; + if (Accumulate) { + Address Addr = EmitPointerWithAlignment(E->getArg(0)); + Value *Acc = Builder.CreateLoad(Addr); + CallOps.push_back(Acc); + } + for (unsigned i=1; i<Ops.size(); i++) + CallOps.push_back(Ops[i]); + llvm::Function *F = CGM.getIntrinsic(ID); + Value *Call = Builder.CreateCall(F, CallOps); + return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64)); + } } } @@ -14608,6 +15126,22 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { llvm::MDNode::get(CGF.getLLVMContext(), None)); return LD; } + +// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. +Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { + const unsigned XOffset = 12; + auto *DP = EmitAMDGPUDispatchPtr(CGF); + // Indexing the HSA kernel_dispatch_packet struct. + auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4); + auto *GEP = CGF.Builder.CreateGEP(DP, Offset); + auto *DstTy = + CGF.Int32Ty->getPointerTo(GEP->getType()->getPointerAddressSpace()); + auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy); + auto *LD = CGF.Builder.CreateLoad(Address(Cast, CharUnits::fromQuantity(4))); + LD->setMetadata(llvm::LLVMContext::MD_invariant_load, + llvm::MDNode::get(CGF.getLLVMContext(), None)); + return LD; +} } // namespace // For processing memory ordering and memory scope arguments of various @@ -14811,6 +15345,32 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { + Intrinsic::ID Intrin; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + Intrin = Intrinsic::amdgcn_ds_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + Intrin = Intrinsic::amdgcn_ds_fmin; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + Intrin = Intrinsic::amdgcn_ds_fmax; + break; + } + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); + llvm::Value *Src4 = EmitScalarExpr(E->getArg(4)); + llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() }); + llvm::FunctionType *FTy = F->getFunctionType(); + llvm::Type *PTy = FTy->getParamType(0); + Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy); + return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec")); @@ -14842,6 +15402,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_workgroup_size_z: return EmitAMDGPUWorkGroupSize(*this, 2); + // amdgcn grid size + case AMDGPU::BI__builtin_amdgcn_grid_size_x: + return EmitAMDGPUGridSize(*this, 0); + case AMDGPU::BI__builtin_amdgcn_grid_size_y: + return EmitAMDGPUGridSize(*this, 1); + case AMDGPU::BI__builtin_amdgcn_grid_size_z: + return EmitAMDGPUGridSize(*this, 2); + // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: @@ -15089,11 +15657,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); // Constant-fold the M4 and M5 mask arguments. - llvm::APSInt M4, M5; - bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); - bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); - assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); - (void)IsConstM4; (void)IsConstM5; + llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext()); + llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext()); // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -15148,10 +15713,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); // Constant-fold the M4 mask argument. - llvm::APSInt M4; - bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); - assert(IsConstM4 && "Constant arg isn't actually constant?"); - (void)IsConstM4; + llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some values of M4. Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -15185,10 +15747,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); // Constant-fold the M4 mask argument. - llvm::APSInt M4; - bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext()); - assert(IsConstM4 && "Constant arg isn't actually constant?"); - (void)IsConstM4; + llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some values of M4. Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -15855,10 +16414,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); - llvm::APSInt isColMajorArg; - if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) + Optional<llvm::APSInt> isColMajorArg = + E->getArg(3)->getIntegerConstantExpr(getContext()); + if (!isColMajorArg) return nullptr; - bool isColMajor = isColMajorArg.getSExtValue(); + bool isColMajor = isColMajorArg->getSExtValue(); NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); unsigned IID = isColMajor ? II.IID_col : II.IID_row; if (IID == 0) @@ -15899,10 +16459,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); - llvm::APSInt isColMajorArg; - if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) + Optional<llvm::APSInt> isColMajorArg = + E->getArg(3)->getIntegerConstantExpr(getContext()); + if (!isColMajorArg) return nullptr; - bool isColMajor = isColMajorArg.getSExtValue(); + bool isColMajor = isColMajorArg->getSExtValue(); NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); unsigned IID = isColMajor ? II.IID_col : II.IID_row; if (IID == 0) @@ -15949,16 +16510,20 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); Address SrcC = EmitPointerWithAlignment(E->getArg(3)); - llvm::APSInt LayoutArg; - if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext())) + Optional<llvm::APSInt> LayoutArg = + E->getArg(4)->getIntegerConstantExpr(getContext()); + if (!LayoutArg) return nullptr; - int Layout = LayoutArg.getSExtValue(); + int Layout = LayoutArg->getSExtValue(); if (Layout < 0 || Layout > 3) return nullptr; llvm::APSInt SatfArg; if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1) SatfArg = 0; // .b1 does not have satf argument. - else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + else if (Optional<llvm::APSInt> OptSatfArg = + E->getArg(5)->getIntegerConstantExpr(getContext())) + SatfArg = *OptSatfArg; + else return nullptr; bool Satf = SatfArg.getSExtValue(); NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); @@ -16106,16 +16671,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_memory_size: { llvm::Type *ResultType = ConvertType(E->getType()); Value *I = EmitScalarExpr(E->getArg(0)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); return Builder.CreateCall(Callee, I); } case WebAssembly::BI__builtin_wasm_memory_grow: { llvm::Type *ResultType = ConvertType(E->getType()); - Value *Args[] = { - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)) - }; - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + Value *Args[] = {EmitScalarExpr(E->getArg(0)), + EmitScalarExpr(E->getArg(1))}; + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } case WebAssembly::BI__builtin_wasm_tls_size: { @@ -16138,28 +16703,28 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); return Builder.CreateCall(Callee, {Tag, Obj}); } - case WebAssembly::BI__builtin_wasm_rethrow_in_catch: { - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch); + case WebAssembly::BI__builtin_wasm_rethrow: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); return Builder.CreateCall(Callee); } - case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { + case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } - case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { + case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } - case WebAssembly::BI__builtin_wasm_atomic_notify: { + case WebAssembly::BI__builtin_wasm_memory_atomic_notify: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Count = EmitScalarExpr(E->getArg(1)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: @@ -16190,7 +16755,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, - {ResT, Src->getType()}); + {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: @@ -16201,7 +16766,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, - {ResT, Src->getType()}); + {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_min_f32: @@ -16210,8 +16775,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, - ConvertType(E->getType())); + Function *Callee = + CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_max_f32: @@ -16220,8 +16785,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, - ConvertType(E->getType())); + Function *Callee = + CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_pmin_f32x4: @@ -16287,9 +16852,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: { - llvm::APSInt LaneConst; - if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) - llvm_unreachable("Constant arg isn't actually constant?"); + llvm::APSInt LaneConst = + *E->getArg(1)->getIntegerConstantExpr(getContext()); Value *Vec = EmitScalarExpr(E->getArg(0)); Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); Value *Extract = Builder.CreateExtractElement(Vec, Lane); @@ -16315,9 +16879,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: { - llvm::APSInt LaneConst; - if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) - llvm_unreachable("Constant arg isn't actually constant?"); + llvm::APSInt LaneConst = + *E->getArg(1)->getIntegerConstantExpr(getContext()); Value *Vec = EmitScalarExpr(E->getArg(0)); Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); Value *Val = EmitScalarExpr(E->getArg(2)); @@ -16430,12 +16993,95 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_q15mulr_saturate_s_i16x8: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_q15mulr_saturate_signed); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2: + IntNo = Intrinsic::wasm_extmul_low_signed; + break; + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2: + IntNo = Intrinsic::wasm_extmul_low_unsigned; + break; + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2: + IntNo = Intrinsic::wasm_extmul_high_signed; + break; + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: + IntNo = Intrinsic::wasm_extmul_high_unsigned; + break; + default: + llvm_unreachable("unexptected builtin ID"); + } + + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: + IntNo = Intrinsic::wasm_extadd_pairwise_signed; + break; + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: + IntNo = Intrinsic::wasm_extadd_pairwise_unsigned; + break; + default: + llvm_unreachable("unexptected builtin ID"); + } + + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, Vec); + } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, - ConvertType(E->getType())); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {V1, V2, C}); + } + case WebAssembly::BI__builtin_wasm_signselect_i8x16: + case WebAssembly::BI__builtin_wasm_signselect_i16x8: + case WebAssembly::BI__builtin_wasm_signselect_i32x4: + case WebAssembly::BI__builtin_wasm_signselect_i64x2: { + Value *V1 = EmitScalarExpr(E->getArg(0)); + Value *V2 = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_signselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { @@ -16444,6 +17090,17 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_popcnt_i8x16: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_popcnt); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_eq_i64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_eq); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_any_true_i8x16: case WebAssembly::BI__builtin_wasm_any_true_i16x8: case WebAssembly::BI__builtin_wasm_any_true_i32x4: @@ -16475,7 +17132,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_bitmask_i8x16: case WebAssembly::BI__builtin_wasm_bitmask_i16x8: - case WebAssembly::BI__builtin_wasm_bitmask_i32x4: { + case WebAssembly::BI__builtin_wasm_bitmask_i32x4: + case WebAssembly::BI__builtin_wasm_bitmask_i64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); @@ -16539,39 +17197,124 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); return Builder.CreateCall(Callee, {Low, High}); } - case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: - case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: - case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: - case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: { + case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i64x2: + case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i64x2: + case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i64x2: + case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); unsigned IntNo; switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i64x2: IntNo = Intrinsic::wasm_widen_low_signed; break; - case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i64x2: IntNo = Intrinsic::wasm_widen_high_signed; break; - case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i64x2: IntNo = Intrinsic::wasm_widen_low_unsigned; break; - case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: - case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i64x2: IntNo = Intrinsic::wasm_widen_high_unsigned; break; + } + Function *Callee = CGM.getIntrinsic(IntNo); + return Builder.CreateCall(Callee, Vec); + } + case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2: + case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2: + IntNo = Intrinsic::wasm_convert_low_signed; + break; + case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: + IntNo = Intrinsic::wasm_convert_low_unsigned; + break; + } + Function *Callee = CGM.getIntrinsic(IntNo); + return Builder.CreateCall(Callee, Vec); + } + case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_s_f64x2_i32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_u_f64x2_i32x4: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_s_f64x2_i32x4: + IntNo = Intrinsic::wasm_trunc_saturate_zero_signed; + break; + case WebAssembly::BI__builtin_wasm_trunc_saturate_zero_u_f64x2_i32x4: + IntNo = Intrinsic::wasm_trunc_saturate_zero_unsigned; + break; + } + Function *Callee = CGM.getIntrinsic(IntNo); + return Builder.CreateCall(Callee, Vec); + } + case WebAssembly::BI__builtin_wasm_demote_zero_f64x2_f32x4: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_demote_zero); + return Builder.CreateCall(Callee, Vec); + } + case WebAssembly::BI__builtin_wasm_promote_low_f32x4_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_promote_low); + return Builder.CreateCall(Callee, Vec); + } + case WebAssembly::BI__builtin_wasm_load32_zero: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero); + return Builder.CreateCall(Callee, {Ptr}); + } + case WebAssembly::BI__builtin_wasm_load64_zero: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero); + return Builder.CreateCall(Callee, {Ptr}); + } + case WebAssembly::BI__builtin_wasm_load8_lane: + case WebAssembly::BI__builtin_wasm_load16_lane: + case WebAssembly::BI__builtin_wasm_load32_lane: + case WebAssembly::BI__builtin_wasm_load64_lane: + case WebAssembly::BI__builtin_wasm_store8_lane: + case WebAssembly::BI__builtin_wasm_store16_lane: + case WebAssembly::BI__builtin_wasm_store32_lane: + case WebAssembly::BI__builtin_wasm_store64_lane: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Vec = EmitScalarExpr(E->getArg(1)); + Optional<llvm::APSInt> LaneIdxConst = + E->getArg(2)->getIntegerConstantExpr(getContext()); + assert(LaneIdxConst && "Constant arg isn't actually constant?"); + Value *LaneIdx = llvm::ConstantInt::get(getLLVMContext(), *LaneIdxConst); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_load8_lane: + IntNo = Intrinsic::wasm_load8_lane; + break; + case WebAssembly::BI__builtin_wasm_load16_lane: + IntNo = Intrinsic::wasm_load16_lane; + break; + case WebAssembly::BI__builtin_wasm_load32_lane: + IntNo = Intrinsic::wasm_load32_lane; + break; + case WebAssembly::BI__builtin_wasm_load64_lane: + IntNo = Intrinsic::wasm_load64_lane; + break; + case WebAssembly::BI__builtin_wasm_store8_lane: + IntNo = Intrinsic::wasm_store8_lane; + break; + case WebAssembly::BI__builtin_wasm_store16_lane: + IntNo = Intrinsic::wasm_store16_lane; + break; + case WebAssembly::BI__builtin_wasm_store32_lane: + IntNo = Intrinsic::wasm_store32_lane; + break; + case WebAssembly::BI__builtin_wasm_store64_lane: + IntNo = Intrinsic::wasm_store64_lane; + break; default: llvm_unreachable("unexpected builtin ID"); } - Function *Callee = - CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()}); - return Builder.CreateCall(Callee, Vec); + Function *Callee = CGM.getIntrinsic(IntNo); + return Builder.CreateCall(Callee, {Ptr, Vec, LaneIdx}); } case WebAssembly::BI__builtin_wasm_shuffle_v8x16: { Value *Ops[18]; @@ -16579,14 +17322,24 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); while (OpIdx < 18) { - llvm::APSInt LaneConst; - if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext())) - llvm_unreachable("Constant arg isn't actually constant?"); - Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Optional<llvm::APSInt> LaneConst = + E->getArg(OpIdx)->getIntegerConstantExpr(getContext()); + assert(LaneConst && "Constant arg isn't actually constant?"); + Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst); } Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } + case WebAssembly::BI__builtin_wasm_prefetch_t: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_prefetch_t); + return Builder.CreateCall(Callee, Ptr); + } + case WebAssembly::BI__builtin_wasm_prefetch_nt: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_prefetch_nt); + return Builder.CreateCall(Callee, Ptr); + } default: return nullptr; } |