diff options
Diffstat (limited to 'lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 1787 |
1 files changed, 1505 insertions, 282 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index e99121c46d9b6..a718f2f19aa65 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -21,10 +21,11 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/OSLog.h" +#include "clang/AST/OSLog.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -93,11 +94,11 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, return V; } -/// Utility to insert an atomic instruction based on Instrinsic::ID +/// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. -static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, - llvm::AtomicRMWInst::BinOp Kind, - const CallExpr *E) { +static Value *MakeBinaryAtomicValue( + CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, @@ -119,7 +120,7 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, Args[1] = EmitToInt(CGF, Args[1], T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); + Kind, Args[0], Args[1], Ordering); return EmitFromInt(CGF, Result, T, ValueType); } @@ -151,7 +152,7 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF, return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); } -/// Utility to insert an atomic instruction based Instrinsic::ID and +/// Utility to insert an atomic instruction based Intrinsic::ID and /// the expression node, where the return value is the result of the /// operation. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, @@ -200,6 +201,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, /// cmpxchg result or the old value. /// /// @returns result of cmpxchg, according to ReturnBool +/// +/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics +/// invoke the function EmitAtomicCmpXchgForMSIntrin. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); @@ -230,6 +234,72 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, ValueType); } +/// This function should be invoked to emit atomic cmpxchg for Microsoft's +/// _InterlockedCompareExchange* intrinsics which have the following signature: +/// T _InterlockedCompareExchange(T volatile *Destination, +/// T Exchange, +/// T Comparand); +/// +/// Whereas the llvm 'cmpxchg' instruction has the following syntax: +/// cmpxchg *Destination, Comparand, Exchange. +/// So we need to swap Comparand and Exchange when invoking +/// CreateAtomicCmpXchg. That is the reason we could not use the above utility +/// function MakeAtomicCmpXchgValue since it expects the arguments to be +/// already swapped. + +static +Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + assert(CGF.getContext().hasSameUnqualifiedType( + E->getType(), E->getArg(0)->getType()->getPointeeType())); + assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), + E->getArg(1)->getType())); + assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), + E->getArg(2)->getType())); + + auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); + auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); + auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); + + // For Release ordering, the failure ordering should be Monotonic. + auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? + AtomicOrdering::Monotonic : + SuccessOrdering; + + auto *Result = CGF.Builder.CreateAtomicCmpXchg( + Destination, Comparand, Exchange, + SuccessOrdering, FailureOrdering); + Result->setVolatile(true); + return CGF.Builder.CreateExtractValue(Result, 0); +} + +static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + + auto *IntTy = CGF.ConvertType(E->getType()); + auto *Result = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Add, + CGF.EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + Ordering); + return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); +} + +static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + + auto *IntTy = CGF.ConvertType(E->getType()); + auto *Result = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Sub, + CGF.EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + Ordering); + return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -316,7 +386,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { - CGCallee callee = CGCallee::forDirect(calleeValue, FD); + CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); } @@ -461,7 +531,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(DIter != LocalDeclMap.end()); return EmitLoadOfScalar(DIter->second, /*volatile=*/false, - getContext().getSizeType(), E->getLocStart()); + getContext().getSizeType(), E->getBeginLoc()); } } @@ -485,7 +555,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, } namespace { -/// A struct to generically desribe a bit test intrinsic. +/// A struct to generically describe a bit test intrinsic. struct BitTest { enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; enum InterlockingKind : uint8_t { @@ -711,8 +781,11 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, } else { Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), - llvm::ConstantInt::get(CGF.Int32Ty, 0)); + if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); + } else + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. @@ -745,6 +818,30 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _InterlockedExchangeAdd_acq, + _InterlockedExchangeAdd_rel, + _InterlockedExchangeAdd_nf, + _InterlockedExchange_acq, + _InterlockedExchange_rel, + _InterlockedExchange_nf, + _InterlockedCompareExchange_acq, + _InterlockedCompareExchange_rel, + _InterlockedCompareExchange_nf, + _InterlockedOr_acq, + _InterlockedOr_rel, + _InterlockedOr_nf, + _InterlockedXor_acq, + _InterlockedXor_rel, + _InterlockedXor_nf, + _InterlockedAnd_acq, + _InterlockedAnd_rel, + _InterlockedAnd_nf, + _InterlockedIncrement_acq, + _InterlockedIncrement_rel, + _InterlockedIncrement_nf, + _InterlockedDecrement_acq, + _InterlockedDecrement_rel, + _InterlockedDecrement_nf, __fastfail, }; @@ -811,25 +908,74 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); - - case MSVCIntrin::_InterlockedDecrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Sub, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); - } - case MSVCIntrin::_InterlockedIncrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); - } + case MSVCIntrin::_InterlockedExchangeAdd_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedExchangeAdd_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedExchangeAdd_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedExchange_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedExchange_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedExchange_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedCompareExchange_acq: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedCompareExchange_rel: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedCompareExchange_nf: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedOr_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedOr_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedOr_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedXor_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedXor_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedXor_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedAnd_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedAnd_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedAnd_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedIncrement_acq: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedIncrement_rel: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedIncrement_nf: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedDecrement_acq: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedDecrement_rel: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedDecrement_nf: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); + + case MSVCIntrin::_InterlockedDecrement: + return EmitAtomicDecrementValue(*this, E); + case MSVCIntrin::_InterlockedIncrement: + return EmitAtomicIncrementValue(*this, E); case MSVCIntrin::__fastfail: { // Request immediate process termination from the kernel. The instruction @@ -923,35 +1069,42 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( if (llvm::Function *F = CGM.getModule().getFunction(Name)) return F; + llvm::SmallVector<QualType, 4> ArgTys; llvm::SmallVector<ImplicitParamDecl, 4> Params; Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, ImplicitParamDecl::Other); + ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { char Size = Layout.Items[I].getSizeByte(); if (!Size) continue; + QualType ArgTy = getOSLogArgType(Ctx, Size); Params.emplace_back( Ctx, nullptr, SourceLocation(), - &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), - getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); + &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, + ImplicitParamDecl::Other); + ArgTys.emplace_back(ArgTy); } FunctionArgList Args; for (auto &P : Params) Args.push_back(&P); + QualType ReturnTy = Ctx.VoidTy; + QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); + // The helper function has linkonce_odr linkage to enable the linker to merge // identical functions. To ensure the merging always happens, 'noinline' is // attached to the function when compiling with -Oz. const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = llvm::Function::Create( FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); // Attach 'noinline' at -Oz. @@ -962,9 +1115,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( IdentifierInfo *II = &Ctx.Idents.get(Name); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + FuncionTy, nullptr, SC_PrivateExtern, false, false); - StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); + StartFunction(FD, ReturnTy, Fn, FI, Args); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); @@ -1024,7 +1177,12 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { llvm::Value *ArgVal; - if (const Expr *TheExpr = Item.getExpr()) { + if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { + uint64_t Val = 0; + for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I) + Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; + ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); + } else if (const Expr *TheExpr = Item.getExpr()) { ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); // Check if this is a retainable type. @@ -1077,7 +1235,7 @@ static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo) { return BuiltinID == Builtin::BI__builtin_mul_overflow && - Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width && + std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width && Op1Info.Signed != Op2Info.Signed; } @@ -1098,11 +1256,20 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); + unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width; + unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width; + + // One of the operands may be smaller than the other. If so, [s|z]ext it. + if (SignedOpWidth < UnsignedOpWidth) + Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext"); + if (UnsignedOpWidth < SignedOpWidth) + Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext"); llvm::Type *OpTy = Signed->getType(); llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); llvm::Type *ResTy = ResultPtr.getElementType(); + unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); // Take the absolute value of the signed operand. llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); @@ -1120,8 +1287,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, if (ResultInfo.Signed) { // Signed overflow occurs if the result is greater than INT_MAX or lesser // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). - auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width) - .zextOrSelf(Op1Info.Width); + auto IntMax = + llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth); llvm::Value *MaxResult = CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), CGF.Builder.CreateZExt(IsNegative, OpTy)); @@ -1139,9 +1306,9 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, llvm::Value *Underflow = CGF.Builder.CreateAnd( IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); - if (ResultInfo.Width < Op1Info.Width) { + if (ResultInfo.Width < OpWidth) { auto IntMax = - llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width); + llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth); llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); @@ -1252,9 +1419,61 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, return Res; } -RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, - unsigned BuiltinID, const CallExpr *E, +static bool +TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, + llvm::SmallPtrSetImpl<const Decl *> &Seen) { + if (const auto *Arr = Ctx.getAsArrayType(Ty)) + Ty = Ctx.getBaseElementType(Arr); + + const auto *Record = Ty->getAsCXXRecordDecl(); + if (!Record) + return false; + + // We've already checked this type, or are in the process of checking it. + if (!Seen.insert(Record).second) + return false; + + assert(Record->hasDefinition() && + "Incomplete types should already be diagnosed"); + + if (Record->isDynamicClass()) + return true; + + for (FieldDecl *F : Record->fields()) { + if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) + return true; + } + return false; +} + +/// Determine if the specified type requires laundering by checking if it is a +/// dynamic class type or contains a subobject which is a dynamic class type. +static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { + if (!CGM.getCodeGenOpts().StrictVTablePointers) + return false; + llvm::SmallPtrSet<const Decl *, 16> Seen; + return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); +} + +RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); + + // The builtin's shift arg may have a different type than the source arg and + // result, but the LLVM intrinsic uses the same type for all values. + llvm::Type *Ty = Src->getType(); + ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); + + // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. + unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; + Value *F = CGM.getIntrinsic(IID, Ty); + return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); +} + +RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, + const CallExpr *E, ReturnValueSlot ReturnValue) { + const FunctionDecl *FD = GD.getDecl()->getAsFunction(); // See if we can constant fold this builtin. If so, don't emit it at all. Expr::EvalResult Result; if (E->EvaluateAsRValue(Result, CGM.getContext()) && @@ -1537,6 +1756,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(ComplexVal.second); } + case Builtin::BI__builtin_clrsb: + case Builtin::BI__builtin_clrsbl: + case Builtin::BI__builtin_clrsbll: { + // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Zero = llvm::Constant::getNullValue(ArgType); + Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); + Value *Inverse = Builder.CreateNot(ArgValue, "not"); + Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); + Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); + Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: @@ -1609,6 +1848,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } + case Builtin::BI__lzcnt16: + case Builtin::BI__lzcnt: + case Builtin::BI__lzcnt64: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } case Builtin::BI__popcnt16: case Builtin::BI__popcnt: case Builtin::BI__popcnt64: @@ -1627,46 +1881,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } - case Builtin::BI_rotr8: - case Builtin::BI_rotr16: - case Builtin::BI_rotr: - case Builtin::BI_lrotr: - case Builtin::BI_rotr64: { - Value *Val = EmitScalarExpr(E->getArg(0)); - Value *Shift = EmitScalarExpr(E->getArg(1)); - - llvm::Type *ArgType = Val->getType(); - Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = ArgType->getIntegerBitWidth(); - Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - - Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask); - Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); - Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); - Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); - Value *Result = Builder.CreateOr(LeftShifted, RightShifted); - return RValue::get(Result); - } - case Builtin::BI_rotl8: - case Builtin::BI_rotl16: - case Builtin::BI_rotl: - case Builtin::BI_lrotl: - case Builtin::BI_rotl64: { - Value *Val = EmitScalarExpr(E->getArg(0)); - Value *Shift = EmitScalarExpr(E->getArg(1)); - - llvm::Type *ArgType = Val->getType(); - Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = ArgType->getIntegerBitWidth(); - Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - - Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask); - Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); - Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); - Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); - Value *Result = Builder.CreateOr(LeftShifted, RightShifted); - return RValue::get(Result); - } case Builtin::BI__builtin_unpredictable: { // Always return the argument of __builtin_unpredictable. LLVM does not // handle this builtin. Metadata for this builtin should be added directly @@ -1690,15 +1904,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Result); } case Builtin::BI__builtin_assume_aligned: { - Value *PtrValue = EmitScalarExpr(E->getArg(0)); + const Expr *Ptr = E->getArg(0); + Value *PtrValue = EmitScalarExpr(Ptr); Value *OffsetValue = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); - unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); + unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); - EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); + EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), + Alignment, OffsetValue); return RValue::get(PtrValue); } case Builtin::BI__assume: @@ -1721,6 +1937,48 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_bitreverse64: { return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); } + case Builtin::BI__builtin_rotateleft8: + case Builtin::BI__builtin_rotateleft16: + case Builtin::BI__builtin_rotateleft32: + case Builtin::BI__builtin_rotateleft64: + case Builtin::BI_rotl8: // Microsoft variants of rotate left + case Builtin::BI_rotl16: + case Builtin::BI_rotl: + case Builtin::BI_lrotl: + case Builtin::BI_rotl64: + return emitRotate(E, false); + + case Builtin::BI__builtin_rotateright8: + case Builtin::BI__builtin_rotateright16: + case Builtin::BI__builtin_rotateright32: + case Builtin::BI__builtin_rotateright64: + case Builtin::BI_rotr8: // Microsoft variants of rotate right + case Builtin::BI_rotr16: + case Builtin::BI_rotr: + case Builtin::BI_lrotr: + case Builtin::BI_rotr64: + return emitRotate(E, true); + + case Builtin::BI__builtin_constant_p: { + llvm::Type *ResultType = ConvertType(E->getType()); + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + // At -O0, we don't perform inlining, so we don't need to delay the + // processing. + return RValue::get(ConstantInt::get(ResultType, 0)); + + const Expr *Arg = E->getArg(0); + QualType ArgType = Arg->getType(); + if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) + // We can only reason about scalar types. + return RValue::get(ConstantInt::get(ResultType, 0)); + + Value *ArgValue = EmitScalarExpr(Arg); + Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + Value *Result = Builder.CreateCall(F, ArgValue); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); + return RValue::get(Result); + } case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -1985,10 +2243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin___memcpy_chk: { // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2009,10 +2269,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin___memmove_chk: { // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2047,10 +2309,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2258,6 +2522,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } + case Builtin::BI__builtin_launder: { + const Expr *Arg = E->getArg(0); + QualType ArgTy = Arg->getType()->getPointeeType(); + Value *Ptr = EmitScalarExpr(Arg); + if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) + Ptr = Builder.CreateLaunderInvariantGroup(Ptr); + + return RValue::get(Ptr); + } case Builtin::BI__sync_fetch_and_add: case Builtin::BI__sync_fetch_and_sub: case Builtin::BI__sync_fetch_and_or: @@ -2952,7 +3225,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedExchangePointer: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); - case Builtin::BI_InterlockedCompareExchangePointer: { + case Builtin::BI_InterlockedCompareExchangePointer: + case Builtin::BI_InterlockedCompareExchangePointer_nf: { llvm::Type *RTy; llvm::IntegerType *IntType = IntegerType::get(getLLVMContext(), @@ -2969,10 +3243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Comparand = Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); - auto Result = - Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); + auto Ordering = + BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? + AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; + + auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + Ordering, Ordering); Result->setVolatile(true); return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, @@ -2982,16 +3258,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedCompareExchange8: case Builtin::BI_InterlockedCompareExchange16: case Builtin::BI_InterlockedCompareExchange: - case Builtin::BI_InterlockedCompareExchange64: { - AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(1)), - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); - CXI->setVolatile(true); - return RValue::get(Builder.CreateExtractValue(CXI, 0)); - } + case Builtin::BI_InterlockedCompareExchange64: + return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); case Builtin::BI_InterlockedIncrement16: case Builtin::BI_InterlockedIncrement: return RValue::get( @@ -3337,24 +3605,31 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Create a temporary array to hold the sizes of local pointer arguments // for the block. \p First is the position of the first size argument. - auto CreateArrayForSizeVar = [=](unsigned First) { - auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); - auto *Arr = Builder.CreateAlloca(AT); - llvm::Value *Ptr; + auto CreateArrayForSizeVar = [=](unsigned First) + -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { + llvm::APInt ArraySize(32, NumArgs - First); + QualType SizeArrayTy = getContext().getConstantArrayType( + getContext().getSizeType(), ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); + llvm::Value *TmpPtr = Tmp.getPointer(); + llvm::Value *TmpSize = EmitLifetimeStart( + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); + llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. auto *Zero = llvm::ConstantInt::get(IntTy, 0); for (unsigned I = First; I < NumArgs; ++I) { auto *Index = llvm::ConstantInt::get(IntTy, I - First); - auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); + auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index}); if (I == First) - Ptr = GEP; + ElemPtr = GEP; auto *V = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); } - return Ptr; + return std::tie(ElemPtr, TmpSize, TmpPtr); }; // Could have events and/or varargs. @@ -3366,24 +3641,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); - auto *PtrToSizeArray = CreateArrayForSizeVar(4); + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; + std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. std::vector<llvm::Value *> Args = { Queue, Flags, Range, Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), - PtrToSizeArray}; + ElemPtr}; std::vector<llvm::Type *> ArgTys = { - QueueTy, IntTy, RangeTy, - GenericVoidPtrTy, GenericVoidPtrTy, IntTy, - PtrToSizeArray->getType()}; + QueueTy, IntTy, RangeTy, GenericVoidPtrTy, + GenericVoidPtrTy, IntTy, ElemPtr->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + auto Call = + RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + if (TmpSize) + EmitLifetimeEnd(TmpSize, TmpPtr); + return Call; } // Any calls now have event arguments passed. if (NumArgs >= 7) { @@ -3400,7 +3678,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); + ClkEvent = ClkEvent->getType()->isIntegerTy() + ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) + : Builder.CreatePointerCast(ClkEvent, EventPtrTy); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3430,15 +3710,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_varargs"; - auto *PtrToSizeArray = CreateArrayForSizeVar(7); - Args.push_back(PtrToSizeArray); - ArgTys.push_back(PtrToSizeArray->getType()); + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; + std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); + Args.push_back(ElemPtr); + ArgTys.push_back(ElemPtr->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + auto Call = + RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + if (TmpSize) + EmitLifetimeEnd(TmpSize, TmpPtr); + return Call; } LLVM_FALLTHROUGH; } @@ -3530,13 +3814,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_os_log_format: return emitBuiltinOSLogFormat(*E); - case Builtin::BI__builtin_os_log_format_buffer_size: { - analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); - return RValue::get(ConstantInt::get(ConvertType(E->getType()), - Layout.size().getQuantity())); - } - case Builtin::BI__xray_customevent: { if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); @@ -3703,6 +3980,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); if (PTy != ArgValue->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { + if (PtrTy->getAddressSpace() != + ArgValue->getType()->getPointerAddressSpace()) { + ArgValue = Builder.CreateAddrSpaceCast( + ArgValue, + ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace())); + } + } + assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && "Must be able to losslessly bit cast to param"); ArgValue = Builder.CreateBitCast(ArgValue, PTy); @@ -3719,6 +4006,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, RetTy = ConvertType(BuiltinRetType); if (RetTy != V->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) { + if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { + V = Builder.CreateAddrSpaceCast( + V, V->getType()->getPointerTo(PtrTy->getAddressSpace())); + } + } + assert(V->getType()->canLosslesslyBitCastTo(RetTy) && "Must be able to losslessly bit cast result type"); V = Builder.CreateBitCast(V, RetTy); @@ -4286,6 +4581,14 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vextq_v), NEONMAP0(vfma_v), NEONMAP0(vfmaq_v), + NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0), NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), @@ -5259,6 +5562,34 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); } + case NEON::BI__builtin_neon_vfmlal_low_v: + case NEON::BI__builtin_neon_vfmlalq_low_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); + } + case NEON::BI__builtin_neon_vfmlsl_low_v: + case NEON::BI__builtin_neon_vfmlslq_low_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); + } + case NEON::BI__builtin_neon_vfmlal_high_v: + case NEON::BI__builtin_neon_vfmlalq_high_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); + } + case NEON::BI__builtin_neon_vfmlsl_high_v: + case NEON::BI__builtin_neon_vfmlslq_high_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); + } } assert(Int && "Expected valid intrinsic number"); @@ -5506,10 +5837,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic=*/false); - APSInt Value; - if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); + llvm::APSInt Value = Result.Val.getInt(); uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); llvm::InlineAsm *Emit = @@ -5991,6 +6323,120 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case ARM::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case ARM::BI_InterlockedExchangeAdd8_acq: + case ARM::BI_InterlockedExchangeAdd16_acq: + case ARM::BI_InterlockedExchangeAdd_acq: + case ARM::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case ARM::BI_InterlockedExchangeAdd8_rel: + case ARM::BI_InterlockedExchangeAdd16_rel: + case ARM::BI_InterlockedExchangeAdd_rel: + case ARM::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case ARM::BI_InterlockedExchangeAdd8_nf: + case ARM::BI_InterlockedExchangeAdd16_nf: + case ARM::BI_InterlockedExchangeAdd_nf: + case ARM::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case ARM::BI_InterlockedExchange8_acq: + case ARM::BI_InterlockedExchange16_acq: + case ARM::BI_InterlockedExchange_acq: + case ARM::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case ARM::BI_InterlockedExchange8_rel: + case ARM::BI_InterlockedExchange16_rel: + case ARM::BI_InterlockedExchange_rel: + case ARM::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case ARM::BI_InterlockedExchange8_nf: + case ARM::BI_InterlockedExchange16_nf: + case ARM::BI_InterlockedExchange_nf: + case ARM::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case ARM::BI_InterlockedCompareExchange8_acq: + case ARM::BI_InterlockedCompareExchange16_acq: + case ARM::BI_InterlockedCompareExchange_acq: + case ARM::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case ARM::BI_InterlockedCompareExchange8_rel: + case ARM::BI_InterlockedCompareExchange16_rel: + case ARM::BI_InterlockedCompareExchange_rel: + case ARM::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case ARM::BI_InterlockedCompareExchange8_nf: + case ARM::BI_InterlockedCompareExchange16_nf: + case ARM::BI_InterlockedCompareExchange_nf: + case ARM::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case ARM::BI_InterlockedOr8_acq: + case ARM::BI_InterlockedOr16_acq: + case ARM::BI_InterlockedOr_acq: + case ARM::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case ARM::BI_InterlockedOr8_rel: + case ARM::BI_InterlockedOr16_rel: + case ARM::BI_InterlockedOr_rel: + case ARM::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case ARM::BI_InterlockedOr8_nf: + case ARM::BI_InterlockedOr16_nf: + case ARM::BI_InterlockedOr_nf: + case ARM::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case ARM::BI_InterlockedXor8_acq: + case ARM::BI_InterlockedXor16_acq: + case ARM::BI_InterlockedXor_acq: + case ARM::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case ARM::BI_InterlockedXor8_rel: + case ARM::BI_InterlockedXor16_rel: + case ARM::BI_InterlockedXor_rel: + case ARM::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case ARM::BI_InterlockedXor8_nf: + case ARM::BI_InterlockedXor16_nf: + case ARM::BI_InterlockedXor_nf: + case ARM::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case ARM::BI_InterlockedAnd8_acq: + case ARM::BI_InterlockedAnd16_acq: + case ARM::BI_InterlockedAnd_acq: + case ARM::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case ARM::BI_InterlockedAnd8_rel: + case ARM::BI_InterlockedAnd16_rel: + case ARM::BI_InterlockedAnd_rel: + case ARM::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case ARM::BI_InterlockedAnd8_nf: + case ARM::BI_InterlockedAnd16_nf: + case ARM::BI_InterlockedAnd_nf: + case ARM::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case ARM::BI_InterlockedIncrement16_acq: + case ARM::BI_InterlockedIncrement_acq: + case ARM::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case ARM::BI_InterlockedIncrement16_rel: + case ARM::BI_InterlockedIncrement_rel: + case ARM::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case ARM::BI_InterlockedIncrement16_nf: + case ARM::BI_InterlockedIncrement_nf: + case ARM::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case ARM::BI_InterlockedDecrement16_acq: + case ARM::BI_InterlockedDecrement_acq: + case ARM::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case ARM::BI_InterlockedDecrement16_rel: + case ARM::BI_InterlockedDecrement_rel: + case ARM::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case ARM::BI_InterlockedDecrement16_nf: + case ARM::BI_InterlockedDecrement_nf: + case ARM::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); } // Get the last argument, which specifies the vector type. @@ -6497,11 +6943,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); } + if (BuiltinID == AArch64::BI__getReg) { + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable("Sema will ensure that the parameter is constant"); + + llvm::APSInt Value = Result.Val.getInt(); + LLVMContext &Context = CGM.getLLVMContext(); + std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10); + + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Value *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + return Builder.CreateCall(F, Metadata); + } + if (BuiltinID == AArch64::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI_ReadWriteBarrier) + return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + llvm::SyncScope::SingleThread); + // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { @@ -6564,6 +7032,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); } + if (BuiltinID == AArch64::BI_ReadStatusReg || + BuiltinID == AArch64::BI_WriteStatusReg) { + LLVMContext &Context = CGM.getLLVMContext(); + + unsigned SysReg = + E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); + + std::string SysRegStr; + llvm::raw_string_ostream(SysRegStr) << + ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << + ((SysReg >> 11) & 7) << ":" << + ((SysReg >> 7) & 15) << ":" << + ((SysReg >> 3) & 15) << ":" << + ( SysReg & 7); + + llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Type *RegisterType = Int64Ty; + llvm::Type *ValueType = Int32Ty; + llvm::Type *Types[] = { RegisterType }; + + if (BuiltinID == AArch64::BI_ReadStatusReg) { + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Value *Call = Builder.CreateCall(F, Metadata); + + return Builder.CreateTrunc(Call, ValueType); + } + + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); + ArgValue = Builder.CreateZExt(ArgValue, RegisterType); + + return Builder.CreateCall(F, { Metadata, ArgValue }); + } + + if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { + llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -6659,7 +7169,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvth_f16_u32: case NEON::BI__builtin_neon_vcvth_f16_u64: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_f16_s16: case NEON::BI__builtin_neon_vcvth_f16_s32: case NEON::BI__builtin_neon_vcvth_f16_s64: { @@ -6679,7 +7189,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u16_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s16_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -6689,7 +7199,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u32_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s32_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -6699,7 +7209,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u64_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s64_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -8414,6 +8924,129 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case AArch64::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case AArch64::BI_InterlockedExchangeAdd8_acq: + case AArch64::BI_InterlockedExchangeAdd16_acq: + case AArch64::BI_InterlockedExchangeAdd_acq: + case AArch64::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case AArch64::BI_InterlockedExchangeAdd8_rel: + case AArch64::BI_InterlockedExchangeAdd16_rel: + case AArch64::BI_InterlockedExchangeAdd_rel: + case AArch64::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case AArch64::BI_InterlockedExchangeAdd8_nf: + case AArch64::BI_InterlockedExchangeAdd16_nf: + case AArch64::BI_InterlockedExchangeAdd_nf: + case AArch64::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case AArch64::BI_InterlockedExchange8_acq: + case AArch64::BI_InterlockedExchange16_acq: + case AArch64::BI_InterlockedExchange_acq: + case AArch64::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case AArch64::BI_InterlockedExchange8_rel: + case AArch64::BI_InterlockedExchange16_rel: + case AArch64::BI_InterlockedExchange_rel: + case AArch64::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case AArch64::BI_InterlockedExchange8_nf: + case AArch64::BI_InterlockedExchange16_nf: + case AArch64::BI_InterlockedExchange_nf: + case AArch64::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case AArch64::BI_InterlockedCompareExchange8_acq: + case AArch64::BI_InterlockedCompareExchange16_acq: + case AArch64::BI_InterlockedCompareExchange_acq: + case AArch64::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case AArch64::BI_InterlockedCompareExchange8_rel: + case AArch64::BI_InterlockedCompareExchange16_rel: + case AArch64::BI_InterlockedCompareExchange_rel: + case AArch64::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case AArch64::BI_InterlockedCompareExchange8_nf: + case AArch64::BI_InterlockedCompareExchange16_nf: + case AArch64::BI_InterlockedCompareExchange_nf: + case AArch64::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case AArch64::BI_InterlockedOr8_acq: + case AArch64::BI_InterlockedOr16_acq: + case AArch64::BI_InterlockedOr_acq: + case AArch64::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case AArch64::BI_InterlockedOr8_rel: + case AArch64::BI_InterlockedOr16_rel: + case AArch64::BI_InterlockedOr_rel: + case AArch64::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case AArch64::BI_InterlockedOr8_nf: + case AArch64::BI_InterlockedOr16_nf: + case AArch64::BI_InterlockedOr_nf: + case AArch64::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case AArch64::BI_InterlockedXor8_acq: + case AArch64::BI_InterlockedXor16_acq: + case AArch64::BI_InterlockedXor_acq: + case AArch64::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case AArch64::BI_InterlockedXor8_rel: + case AArch64::BI_InterlockedXor16_rel: + case AArch64::BI_InterlockedXor_rel: + case AArch64::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case AArch64::BI_InterlockedXor8_nf: + case AArch64::BI_InterlockedXor16_nf: + case AArch64::BI_InterlockedXor_nf: + case AArch64::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case AArch64::BI_InterlockedAnd8_acq: + case AArch64::BI_InterlockedAnd16_acq: + case AArch64::BI_InterlockedAnd_acq: + case AArch64::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case AArch64::BI_InterlockedAnd8_rel: + case AArch64::BI_InterlockedAnd16_rel: + case AArch64::BI_InterlockedAnd_rel: + case AArch64::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case AArch64::BI_InterlockedAnd8_nf: + case AArch64::BI_InterlockedAnd16_nf: + case AArch64::BI_InterlockedAnd_nf: + case AArch64::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case AArch64::BI_InterlockedIncrement16_acq: + case AArch64::BI_InterlockedIncrement_acq: + case AArch64::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case AArch64::BI_InterlockedIncrement16_rel: + case AArch64::BI_InterlockedIncrement_rel: + case AArch64::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case AArch64::BI_InterlockedIncrement16_nf: + case AArch64::BI_InterlockedIncrement_nf: + case AArch64::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case AArch64::BI_InterlockedDecrement16_acq: + case AArch64::BI_InterlockedDecrement_acq: + case AArch64::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case AArch64::BI_InterlockedDecrement16_rel: + case AArch64::BI_InterlockedDecrement_rel: + case AArch64::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case AArch64::BI_InterlockedDecrement16_nf: + case AArch64::BI_InterlockedDecrement_nf: + case AArch64::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); + + case AArch64::BI_InterlockedAdd: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + Value *Arg1 = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Add, Arg0, Arg1, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Arg1); + } } } @@ -8524,8 +9157,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF, } static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, - unsigned NumElts, ArrayRef<Value *> Ops, + ArrayRef<Value *> Ops, bool InvertLHS = false) { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); @@ -8533,7 +9167,25 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, LHS = CGF.Builder.CreateNot(LHS); return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), - CGF.Builder.getIntNTy(std::max(NumElts, 8U))); + Ops[0]->getType()); +} + +static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, + Value *Amt, bool IsRight) { + llvm::Type *Ty = Op0->getType(); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 so + // we only care about the lowest log2 bits anyway. + if (Amt->getType() != Ty) { + unsigned NumElts = Ty->getVectorNumElements(); + Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); + Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); + } + + unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; + Value *F = CGF.CGM.getIntrinsic(IID, Ty); + return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } static Value *EmitX86Select(CodeGenFunction &CGF, @@ -8855,6 +9507,17 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } +// Emit addition or subtraction with signed/unsigned saturation. +static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, bool IsSigned, + bool IsAddition) { + Intrinsic::ID IID = + IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) + : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); + llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); + return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); +} + Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); @@ -8876,6 +9539,7 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { // Grab the global __cpu_model. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); // Calculate the index needed to access the correct field based on the // range. Also adjust the expected value. @@ -8911,17 +9575,17 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { return EmitX86CpuSupports(FeatureStr); } -uint32_t +uint64_t CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { // Processor features and mapping to processor feature value. - uint32_t FeaturesMask = 0; + uint64_t FeaturesMask = 0; for (const StringRef &FeatureStr : FeatureStrs) { unsigned Feature = StringSwitch<unsigned>(FeatureStr) #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) #include "llvm/Support/X86TargetParser.def" ; - FeaturesMask |= (1U << Feature); + FeaturesMask |= (1ULL << Feature); } return FeaturesMask; } @@ -8930,37 +9594,66 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs)); } -llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) { - // Matching the struct layout from the compiler-rt/libgcc structure that is - // filled in: - // unsigned int __cpu_vendor; - // unsigned int __cpu_type; - // unsigned int __cpu_subtype; - // unsigned int __cpu_features[1]; - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, - llvm::ArrayType::get(Int32Ty, 1)); +llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { + uint32_t Features1 = Lo_32(FeaturesMask); + uint32_t Features2 = Hi_32(FeaturesMask); - // Grab the global __cpu_model. - llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + Value *Result = Builder.getTrue(); + + if (Features1 != 0) { + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); + + // Grab the first (0th) element from the field __cpu_features off of the + // global in the struct STy. + Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), + Builder.getInt32(0)}; + Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Mask = Builder.getInt32(Features1); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } - // Grab the first (0th) element from the field __cpu_features off of the - // global in the struct STy. - Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), - ConstantInt::get(Int32Ty, 0)}; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); - Value *Features = - Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); - - // Check the value of the bit corresponding to the feature requested. - Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); - return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); + if (Features2 != 0) { + llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty, + "__cpu_features2"); + cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); + + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Mask = Builder.getInt32(Features2); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } + + return Result; } Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + cast<llvm::GlobalValue>(Func)->setDSOLocal(true); + cast<llvm::GlobalValue>(Func)->setDLLStorageClass( + llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } @@ -9051,6 +9744,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__rdtsc: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); } + case X86::BI__builtin_ia32_rdtscp: { + Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[0]); + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_lzcnt_u16: + case X86::BI__builtin_ia32_lzcnt_u32: + case X86::BI__builtin_ia32_lzcnt_u64: { + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } + case X86::BI__builtin_ia32_tzcnt_u16: + case X86::BI__builtin_ia32_tzcnt_u32: + case X86::BI__builtin_ia32_tzcnt_u64: { + Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: @@ -9822,6 +10533,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, "psrldq"); return Builder.CreateBitCast(SV, ResultType, "cast"); } + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + + if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + + Value *In = getMaskVecValue(*this, Ops[0], NumElts); + + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = NumElts + i - ShiftVal; + + Value *Zero = llvm::Constant::getNullValue(In->getType()); + Value *SV = Builder.CreateShuffleVector(Zero, In, + makeArrayRef(Indices, NumElts), + "kshiftl"); + return Builder.CreateBitCast(SV, Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + + if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + + Value *In = getMaskVecValue(*this, Ops[0], NumElts); + + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + ShiftVal; + + Value *Zero = llvm::Constant::getNullValue(In->getType()); + Value *SV = Builder.CreateShuffleVector(In, Zero, + makeArrayRef(Indices, NumElts), + "kshiftr"); + return Builder.CreateBitCast(SV, Ops[0]->getType()); + } case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: @@ -9847,7 +10602,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, SI->setAlignment(1); return SI; } - + // Rotate is a special case of funnel shift - 1st 2 args are the same. + case X86::BI__builtin_ia32_vprotb: + case X86::BI__builtin_ia32_vprotw: + case X86::BI__builtin_ia32_vprotd: + case X86::BI__builtin_ia32_vprotq: + case X86::BI__builtin_ia32_vprotbi: + case X86::BI__builtin_ia32_vprotwi: + case X86::BI__builtin_ia32_vprotdi: + case X86::BI__builtin_ia32_vprotqi: + case X86::BI__builtin_ia32_prold128: + case X86::BI__builtin_ia32_prold256: + case X86::BI__builtin_ia32_prold512: + case X86::BI__builtin_ia32_prolq128: + case X86::BI__builtin_ia32_prolq256: + case X86::BI__builtin_ia32_prolq512: + case X86::BI__builtin_ia32_prolvd128: + case X86::BI__builtin_ia32_prolvd256: + case X86::BI__builtin_ia32_prolvd512: + case X86::BI__builtin_ia32_prolvq128: + case X86::BI__builtin_ia32_prolvq256: + case X86::BI__builtin_ia32_prolvq512: + return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); + case X86::BI__builtin_ia32_prord128: + case X86::BI__builtin_ia32_prord256: + case X86::BI__builtin_ia32_prord512: + case X86::BI__builtin_ia32_prorq128: + case X86::BI__builtin_ia32_prorq256: + case X86::BI__builtin_ia32_prorq512: + case X86::BI__builtin_ia32_prorvd128: + case X86::BI__builtin_ia32_prorvd256: + case X86::BI__builtin_ia32_prorvd512: + case X86::BI__builtin_ia32_prorvq128: + case X86::BI__builtin_ia32_prorvq256: + case X86::BI__builtin_ia32_prorvq512: + return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: @@ -9905,38 +10694,147 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: - case X86::BI__builtin_ia32_kortestzhi: { - Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); - Value *C; - if (BuiltinID == X86::BI__builtin_ia32_kortestchi) - C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); - else - C = llvm::Constant::getNullValue(Builder.getInt16Ty()); + case X86::BI__builtin_ia32_kortestcsi: + case X86::BI__builtin_ia32_kortestcdi: { + Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); + Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); Value *Cmp = Builder.CreateICmpEQ(Or, C); return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } + case X86::BI__builtin_ia32_kortestzqi: + case X86::BI__builtin_ia32_kortestzhi: + case X86::BI__builtin_ia32_kortestzsi: + case X86::BI__builtin_ia32_kortestzdi: { + Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); + Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); + Value *Cmp = Builder.CreateICmpEQ(Or, C); + return Builder.CreateZExt(Cmp, ConvertType(E->getType())); + } + + case X86::BI__builtin_ia32_ktestcqi: + case X86::BI__builtin_ia32_ktestzqi: + case X86::BI__builtin_ia32_ktestchi: + case X86::BI__builtin_ia32_ktestzhi: + case X86::BI__builtin_ia32_ktestcsi: + case X86::BI__builtin_ia32_ktestzsi: + case X86::BI__builtin_ia32_ktestcdi: + case X86::BI__builtin_ia32_ktestzdi: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_ktestcqi: + IID = Intrinsic::x86_avx512_ktestc_b; + break; + case X86::BI__builtin_ia32_ktestzqi: + IID = Intrinsic::x86_avx512_ktestz_b; + break; + case X86::BI__builtin_ia32_ktestchi: + IID = Intrinsic::x86_avx512_ktestc_w; + break; + case X86::BI__builtin_ia32_ktestzhi: + IID = Intrinsic::x86_avx512_ktestz_w; + break; + case X86::BI__builtin_ia32_ktestcsi: + IID = Intrinsic::x86_avx512_ktestc_d; + break; + case X86::BI__builtin_ia32_ktestzsi: + IID = Intrinsic::x86_avx512_ktestz_d; + break; + case X86::BI__builtin_ia32_ktestcdi: + IID = Intrinsic::x86_avx512_ktestc_q; + break; + case X86::BI__builtin_ia32_ktestzdi: + IID = Intrinsic::x86_avx512_ktestz_q; + break; + } + + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); + Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, {LHS, RHS}); + } + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_kaddqi: + IID = Intrinsic::x86_avx512_kadd_b; + break; + case X86::BI__builtin_ia32_kaddhi: + IID = Intrinsic::x86_avx512_kadd_w; + break; + case X86::BI__builtin_ia32_kaddsi: + IID = Intrinsic::x86_avx512_kadd_d; + break; + case X86::BI__builtin_ia32_kadddi: + IID = Intrinsic::x86_avx512_kadd_q; + break; + } + + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); + Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); + Function *Intr = CGM.getIntrinsic(IID); + Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); + return Builder.CreateBitCast(Res, Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: - return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return EmitX86MaskLogic(*this, Instruction::And, Ops); + case X86::BI__builtin_ia32_kandnqi: case X86::BI__builtin_ia32_kandnhi: - return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return EmitX86MaskLogic(*this, Instruction::And, Ops, true); + case X86::BI__builtin_ia32_korqi: case X86::BI__builtin_ia32_korhi: - return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return EmitX86MaskLogic(*this, Instruction::Or, Ops); + case X86::BI__builtin_ia32_kxnorqi: case X86::BI__builtin_ia32_kxnorhi: - return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); + case X86::BI__builtin_ia32_kxorqi: case X86::BI__builtin_ia32_kxorhi: - return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); - case X86::BI__builtin_ia32_knothi: { - Ops[0] = getMaskVecValue(*this, Ops[0], 16); - return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), - Builder.getInt16Ty()); + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + return EmitX86MaskLogic(*this, Instruction::Xor, Ops); + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *Res = getMaskVecValue(*this, Ops[0], NumElts); + return Builder.CreateBitCast(Builder.CreateNot(Res), + Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: { + // Bitcast to vXi1 type and then back to integer. This gets the mask + // register type into the IR, but might be optimized out depending on + // what's around it. + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *Res = getMaskVecValue(*this, Ops[0], NumElts); + return Builder.CreateBitCast(Res, Ops[0]->getType()); } case X86::BI__builtin_ia32_kunpckdi: case X86::BI__builtin_ia32_kunpcksi: case X86::BI__builtin_ia32_kunpckhi: { - unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits(); + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); uint32_t Indices[64]; @@ -10103,6 +11001,52 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pternlogq256_maskz: return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); + + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + // Ops 0 and 1 are swapped. + return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + + case X86::BI__builtin_ia32_vpshldvd128: + case X86::BI__builtin_ia32_vpshldvd256: + case X86::BI__builtin_ia32_vpshldvd512: + case X86::BI__builtin_ia32_vpshldvq128: + case X86::BI__builtin_ia32_vpshldvq256: + case X86::BI__builtin_ia32_vpshldvq512: + case X86::BI__builtin_ia32_vpshldvw128: + case X86::BI__builtin_ia32_vpshldvw256: + case X86::BI__builtin_ia32_vpshldvw512: + return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); + + case X86::BI__builtin_ia32_vpshrdvd128: + case X86::BI__builtin_ia32_vpshrdvd256: + case X86::BI__builtin_ia32_vpshrdvd512: + case X86::BI__builtin_ia32_vpshrdvq128: + case X86::BI__builtin_ia32_vpshrdvq256: + case X86::BI__builtin_ia32_vpshrdvq512: + case X86::BI__builtin_ia32_vpshrdvw128: + case X86::BI__builtin_ia32_vpshrdvw256: + case X86::BI__builtin_ia32_vpshrdvw512: + // Ops 0 and 1 are swapped. + return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { @@ -10145,6 +11089,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0]); return Builder.CreateExtractValue(Call, 1); } + case X86::BI__builtin_ia32_addcarryx_u32: + case X86::BI__builtin_ia32_addcarryx_u64: + case X86::BI__builtin_ia32_subborrow_u32: + case X86::BI__builtin_ia32_subborrow_u64: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_addcarryx_u32: + IID = Intrinsic::x86_addcarry_32; + break; + case X86::BI__builtin_ia32_addcarryx_u64: + IID = Intrinsic::x86_addcarry_64; + break; + case X86::BI__builtin_ia32_subborrow_u32: + IID = Intrinsic::x86_subborrow_32; + break; + case X86::BI__builtin_ia32_subborrow_u64: + IID = Intrinsic::x86_subborrow_64; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), + { Ops[0], Ops[1], Ops[2] }); + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[3]); + return Builder.CreateExtractValue(Call, 0); + } case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: @@ -10183,6 +11154,51 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: { + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vpmultishiftqb128: + ID = Intrinsic::x86_avx512_pmultishift_qb_128; + break; + case X86::BI__builtin_ia32_vpmultishiftqb256: + ID = Intrinsic::x86_avx512_pmultishift_qb_256; + break; + case X86::BI__builtin_ia32_vpmultishiftqb512: + ID = Intrinsic::x86_avx512_pmultishift_qb_512; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *MaskIn = Ops[2]; + Ops.erase(&Ops[2]); + + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + ID = Intrinsic::x86_avx512_vpshufbitqmb_128; + break; + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + ID = Intrinsic::x86_avx512_vpshufbitqmb_256; + break; + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + ID = Intrinsic::x86_avx512_vpshufbitqmb_512; + break; + } + + Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); + } + // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: @@ -10361,6 +11377,27 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::System); } + case X86::BI__shiftleft128: + case X86::BI__shiftright128: { + // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: + // llvm::Function *F = CGM.getIntrinsic( + // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, + // Int64Ty); + // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + // return Builder.CreateCall(F, Ops); + llvm::Type *Int128Ty = Builder.getInt128Ty(); + Value *Val = Builder.CreateOr( + Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64), + Builder.CreateZExt(Ops[0], Int128Ty)); + Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), + llvm::ConstantInt::get(Int128Ty, 0x3f)); + Value *Res; + if (BuiltinID == X86::BI__shiftleft128) + Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); + else + Res = Builder.CreateLShr(Val, Amt); + return Builder.CreateTrunc(Res, Int64Ty); + } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: case X86::BI_WriteBarrier: { @@ -10401,14 +11438,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); Value *Destination = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy); - Value *ExchangeHigh128 = - Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty); - Value *ExchangeLow128 = - Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty); - Address ComparandResult( - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy), - getContext().toCharUnitsFromBits(128)); + Builder.CreateBitCast(Ops[0], Int128PtrTy); + Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty); + Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty); + Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy), + getContext().toCharUnitsFromBits(128)); Value *Exchange = Builder.CreateOr( Builder.CreateShl(ExchangeHigh128, 64, "", false, false), @@ -10459,8 +11493,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__readfsdword: case X86::BI__readfsqword: { llvm::Type *IntTy = ConvertType(E->getType()); - Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 257)); + Value *Ptr = + Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); @@ -10471,17 +11505,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__readgsdword: case X86::BI__readgsqword: { llvm::Type *IntTy = ConvertType(E->getType()); - Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 256)); + Value *Ptr = + Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); return Load; } + case X86::BI__builtin_ia32_paddsb512: + case X86::BI__builtin_ia32_paddsw512: + case X86::BI__builtin_ia32_paddsb256: + case X86::BI__builtin_ia32_paddsw256: + case X86::BI__builtin_ia32_paddsb128: + case X86::BI__builtin_ia32_paddsw128: + return EmitX86AddSubSatExpr(*this, Ops, true, true); + case X86::BI__builtin_ia32_paddusb512: + case X86::BI__builtin_ia32_paddusw512: + case X86::BI__builtin_ia32_paddusb256: + case X86::BI__builtin_ia32_paddusw256: + case X86::BI__builtin_ia32_paddusb128: + case X86::BI__builtin_ia32_paddusw128: + return EmitX86AddSubSatExpr(*this, Ops, false, true); + case X86::BI__builtin_ia32_psubsb512: + case X86::BI__builtin_ia32_psubsw512: + case X86::BI__builtin_ia32_psubsb256: + case X86::BI__builtin_ia32_psubsw256: + case X86::BI__builtin_ia32_psubsb128: + case X86::BI__builtin_ia32_psubsw128: + return EmitX86AddSubSatExpr(*this, Ops, true, false); + case X86::BI__builtin_ia32_psubusb512: + case X86::BI__builtin_ia32_psubusw512: + case X86::BI__builtin_ia32_psubusb256: + case X86::BI__builtin_ia32_psubusw256: + case X86::BI__builtin_ia32_psubusb128: + case X86::BI__builtin_ia32_psubusw128: + return EmitX86AddSubSatExpr(*this, Ops, false, false); } } - Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { SmallVector<Value*, 4> Ops; @@ -10901,6 +11962,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } + + case PPC::BI__builtin_pack_vector_int128: { + bool isLittleEndian = getTarget().isLittleEndian(); + Value *UndefValue = + llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2)); + Value *Res = Builder.CreateInsertElement( + UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); + Res = Builder.CreateInsertElement(Res, Ops[1], + (uint64_t)(isLittleEndian ? 0 : 1)); + return Builder.CreateBitCast(Res, ConvertType(E->getType())); + } + + case PPC::BI__builtin_unpack_vector_int128: { + ConstantInt *Index = cast<ConstantInt>(Ops[1]); + Value *Unpacked = Builder.CreateBitCast( + Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2)); + + if (getTarget().isLittleEndian()) + Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); + + return Builder.CreateExtractElement(Unpacked, Index); + } } } @@ -10948,12 +12031,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); - case AMDGPU::BI__builtin_amdgcn_mov_dpp: { - llvm::SmallVector<llvm::Value *, 5> Args; - for (unsigned I = 0; I != 5; ++I) + case AMDGPU::BI__builtin_amdgcn_mov_dpp: + case AMDGPU::BI__builtin_amdgcn_update_dpp: { + llvm::SmallVector<llvm::Value *, 6> Args; + for (unsigned I = 0; I != E->getNumArgs(); ++I) Args.push_back(EmitScalarExpr(E->getArg(I))); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, - Args[0]->getType()); + assert(Args.size() == 5 || Args.size() == 6); + if (Args.size() == 5) + Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); + Value *F = + CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_div_fixup: @@ -11039,50 +12126,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CI->setConvergent(); return CI; } - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { - llvm::SmallVector<llvm::Value *, 5> Args; - for (unsigned I = 0; I != 5; ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); - const llvm::Type *PtrTy = Args[0]->getType(); - // check pointer parameter - if (!PtrTy->isPointerTy() || - E->getArg(0) - ->getType() - ->getPointeeType() - .getQualifiers() - .getAddressSpace() != LangAS::opencl_local || - !PtrTy->getPointerElementType()->isFloatTy()) { - CGM.Error(E->getArg(0)->getLocStart(), - "parameter should have type \"local float*\""); - return nullptr; - } - // check float parameter - if (!Args[1]->getType()->isFloatTy()) { - CGM.Error(E->getArg(1)->getLocStart(), - "parameter should have type \"float\""); - return nullptr; - } - - Intrinsic::ID ID; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - ID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - ID = Intrinsic::amdgcn_ds_fmin; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: - ID = Intrinsic::amdgcn_ds_fmax; - break; - default: - llvm_unreachable("Unknown BuiltinID"); - } - Value *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Args); - } - // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); @@ -11363,7 +12406,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Y, M4Value}); } - // Vector intrisincs that output the post-instruction CC value. + // Vector intrinsics that output the post-instruction CC value. #define INTRINSIC_WITH_CC(NAME) \ case SystemZ::BI__builtin_##NAME: \ @@ -11823,7 +12866,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, bool isColMajor = isColMajorArg.getSExtValue(); unsigned IID; unsigned NumResults = 8; - // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet + // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet // for some reason nvcc builtins use _c_. switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_st_c_f16: @@ -12046,31 +13089,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } - case WebAssembly::BI__builtin_wasm_mem_size: { - llvm::Type *ResultType = ConvertType(E->getType()); - Value *I = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType); - return Builder.CreateCall(Callee, I); - } - case WebAssembly::BI__builtin_wasm_mem_grow: { - llvm::Type *ResultType = ConvertType(E->getType()); - Value *Args[] = { - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)) - }; - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType); - return Builder.CreateCall(Callee, Args); - } - case WebAssembly::BI__builtin_wasm_current_memory: { - llvm::Type *ResultType = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); - return Builder.CreateCall(Callee); - } - case WebAssembly::BI__builtin_wasm_grow_memory: { - Value *X = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); - return Builder.CreateCall(Callee, X); - } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); @@ -12081,6 +13099,211 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); return Builder.CreateCall(Callee); } + case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Value *Expected = EmitScalarExpr(E->getArg(1)); + Value *Timeout = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); + } + case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Value *Expected = EmitScalarExpr(E->getArg(1)); + Value *Timeout = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); + } + case WebAssembly::BI__builtin_wasm_atomic_notify: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Value *Count = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + return Builder.CreateCall(Callee, {Addr, Count}); + } + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_min_f32: + case WebAssembly::BI__builtin_wasm_min_f64: + case WebAssembly::BI__builtin_wasm_min_f32x4: + case WebAssembly::BI__builtin_wasm_min_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_max_f32: + case WebAssembly::BI__builtin_wasm_max_f64: + case WebAssembly::BI__builtin_wasm_max_f32x4: + case WebAssembly::BI__builtin_wasm_max_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: + case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: + case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: { + llvm::APSInt LaneConst; + if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Value *Extract = Builder.CreateExtractElement(Vec, Lane); + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: + return Builder.CreateSExt(Extract, ConvertType(E->getType())); + case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: + return Builder.CreateZExt(Extract, ConvertType(E->getType())); + case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: + case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: + return Extract; + default: + llvm_unreachable("unexpected builtin ID"); + } + } + case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: + case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: + case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: + case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: { + llvm::APSInt LaneConst; + if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Value *Val = EmitScalarExpr(E->getArg(2)); + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: + case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: { + llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType(); + Value *Trunc = Builder.CreateTrunc(Val, ElemType); + return Builder.CreateInsertElement(Vec, Trunc, Lane); + } + case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: + case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: + return Builder.CreateInsertElement(Vec, Val, Lane); + default: + llvm_unreachable("unexpected builtin ID"); + } + } + case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: + IntNo = Intrinsic::sadd_sat; + break; + case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: + IntNo = Intrinsic::uadd_sat; + break; + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: + IntNo = Intrinsic::wasm_sub_saturate_signed; + break; + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: + IntNo = Intrinsic::wasm_sub_saturate_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_bitselect: { + Value *V1 = EmitScalarExpr(E->getArg(0)); + Value *V2 = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {V1, V2, C}); + } + case WebAssembly::BI__builtin_wasm_any_true_i8x16: + case WebAssembly::BI__builtin_wasm_any_true_i16x8: + case WebAssembly::BI__builtin_wasm_any_true_i32x4: + case WebAssembly::BI__builtin_wasm_any_true_i64x2: + case WebAssembly::BI__builtin_wasm_all_true_i8x16: + case WebAssembly::BI__builtin_wasm_all_true_i16x8: + case WebAssembly::BI__builtin_wasm_all_true_i32x4: + case WebAssembly::BI__builtin_wasm_all_true_i64x2: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_any_true_i8x16: + case WebAssembly::BI__builtin_wasm_any_true_i16x8: + case WebAssembly::BI__builtin_wasm_any_true_i32x4: + case WebAssembly::BI__builtin_wasm_any_true_i64x2: + IntNo = Intrinsic::wasm_anytrue; + break; + case WebAssembly::BI__builtin_wasm_all_true_i8x16: + case WebAssembly::BI__builtin_wasm_all_true_i16x8: + case WebAssembly::BI__builtin_wasm_all_true_i32x4: + case WebAssembly::BI__builtin_wasm_all_true_i64x2: + IntNo = Intrinsic::wasm_alltrue; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_abs_f32x4: + case WebAssembly::BI__builtin_wasm_abs_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_sqrt_f32x4: + case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } default: return nullptr; |