diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp | 1639 |
1 files changed, 1198 insertions, 441 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp index a4f26a6f0eb1..86d47054615e 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp @@ -13,6 +13,7 @@ #include "ABIInfo.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" +#include "CGHLSLRuntime.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" @@ -44,6 +45,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" +#include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" @@ -55,6 +57,7 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" +#include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -511,8 +514,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { @@ -528,8 +531,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); @@ -549,8 +552,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { @@ -578,38 +581,19 @@ static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, Args); } -// Emit a simple mangled intrinsic that has 1 argument and a return type -// matching the argument type. -static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, - unsigned IntrinsicID, - llvm::StringRef Name = "") { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, Src0, Name); -} - -// Emit an intrinsic that has 2 operands of the same type as its result. -static Value *emitBinaryBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, { Src0, Src1 }); -} - -// Emit an intrinsic that has 3 operands of the same type as its result. -static Value *emitTernaryBuiltin(CodeGenFunction &CGF, - const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); +// Emit a simple intrinsic that has N scalar arguments and a return type +// matching the argument type. It is assumed that only the first argument is +// overloaded. +template <unsigned N> +Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, + unsigned IntrinsicID, + llvm::StringRef Name = "") { + static_assert(N, "expect non-empty argument"); + SmallVector<Value *, N> Args; + for (unsigned I = 0; I < N; ++I) + Args.push_back(CGF.EmitScalarExpr(E->getArg(I))); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType()); + return CGF.Builder.CreateCall(F, Args, Name); } // Emit an intrinsic that has 1 float or double operand, and 1 integer. @@ -702,8 +686,36 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); - return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); + RValue Call = + CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); + + // Check the supported intrinsic. + if (unsigned BuiltinID = FD->getBuiltinID()) { + auto IsErrnoIntrinsic = [&]() -> unsigned { + switch (BuiltinID) { + case Builtin::BIexpf: + case Builtin::BI__builtin_expf: + case Builtin::BI__builtin_expf128: + return true; + } + // TODO: support more FP math libcalls + return false; + }(); + + // Restrict to target with errno, for example, MacOS doesn't set errno. + if (IsErrnoIntrinsic && CGF.CGM.getLangOpts().MathErrno && + !CGF.Builder.getIsFPConstrained()) { + ASTContext &Context = CGF.getContext(); + // Emit "int" TBAA metadata on FP math libcalls. + clang::QualType IntTy = Context.IntTy; + TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy); + Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal()); + CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); + } + } + return Call; } /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* @@ -730,17 +742,14 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, return CGF.Builder.CreateExtractValue(Tmp, 0); } -static Value *emitRangedBuiltin(CodeGenFunction &CGF, - unsigned IntrinsicID, +static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high) { - llvm::MDBuilder MDHelper(CGF.getLLVMContext()); - llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); - llvm::Instruction *Call = CGF.Builder.CreateCall(F); - Call->setMetadata(llvm::LLVMContext::MD_range, RNode); - Call->setMetadata(llvm::LLVMContext::MD_noundef, - llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); - return Call; + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + llvm::CallInst *Call = CGF.Builder.CreateCall(F); + llvm::ConstantRange CR(APInt(32, low), APInt(32, high)); + Call->addRangeRetAttr(CR); + Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef); + return Call; } namespace { @@ -791,7 +800,8 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; - return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); + return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}), + ArgValue); } /// Checks if using the result of __builtin_object_size(p, @p From) in place of @@ -819,33 +829,37 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } -const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField( - ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) { +const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset( + ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, + uint64_t &Offset) { const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = getLangOpts().getStrictFlexArraysLevel(); - unsigned FieldNo = 0; - bool IsUnion = RD->isUnion(); + uint32_t FieldNo = 0; + + if (RD->isImplicit()) + return nullptr; - for (const Decl *D : RD->decls()) { - if (const auto *Field = dyn_cast<FieldDecl>(D); - Field && (Name.empty() || Field->getNameAsString() == Name) && + for (const FieldDecl *FD : RD->fields()) { + if ((!FAMDecl || FD == FAMDecl) && Decl::isFlexibleArrayMemberLike( - Ctx, Field, Field->getType(), StrictFlexArraysLevel, + Ctx, FD, FD->getType(), StrictFlexArraysLevel, /*IgnoreTemplateOrMacroSubstitution=*/true)) { const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); Offset += Layout.getFieldOffset(FieldNo); - return Field; + return FD; } - if (const auto *Record = dyn_cast<RecordDecl>(D)) - if (const FieldDecl *Field = - FindFlexibleArrayMemberField(Ctx, Record, Name, Offset)) { + QualType Ty = FD->getType(); + if (Ty->isRecordType()) { + if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset( + Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) { const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); Offset += Layout.getFieldOffset(FieldNo); return Field; } + } - if (!IsUnion && isa<FieldDecl>(D)) + if (!RD->isUnion()) ++FieldNo; } @@ -855,14 +869,13 @@ const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField( static unsigned CountCountedByAttrs(const RecordDecl *RD) { unsigned Num = 0; - for (const Decl *D : RD->decls()) { - if (const auto *FD = dyn_cast<FieldDecl>(D); - FD && FD->hasAttr<CountedByAttr>()) { + for (const FieldDecl *FD : RD->fields()) { + if (FD->getType()->isCountAttributedType()) return ++Num; - } - if (const auto *Rec = dyn_cast<RecordDecl>(D)) - Num += CountCountedByAttrs(Rec); + QualType Ty = FD->getType(); + if (Ty->isRecordType()) + Num += CountCountedByAttrs(Ty->getAsRecordDecl()); } return Num; @@ -925,12 +938,14 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, // Get the flexible array member Decl. const RecordDecl *OuterRD = nullptr; - std::string FAMName; + const FieldDecl *FAMDecl = nullptr; if (const auto *ME = dyn_cast<MemberExpr>(Base)) { // Check if \p Base is referencing the FAM itself. const ValueDecl *VD = ME->getMemberDecl(); OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext(); - FAMName = VD->getNameAsString(); + FAMDecl = dyn_cast<FieldDecl>(VD); + if (!FAMDecl) + return nullptr; } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { // Check if we're pointing to the whole struct. QualType Ty = DRE->getDecl()->getType(); @@ -955,7 +970,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, // }; // }; // - // We don't konw which 'count' to use in this scenario: + // We don't know which 'count' to use in this scenario: // // size_t get_size(struct union_of_fams *p) { // return __builtin_dynamic_object_size(p, 1); @@ -969,12 +984,14 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, if (!OuterRD) return nullptr; + // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to + // get its offset. uint64_t Offset = 0; - const FieldDecl *FAMDecl = - FindFlexibleArrayMemberField(Ctx, OuterRD, FAMName, Offset); + FAMDecl = + FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset); Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity(); - if (!FAMDecl || !FAMDecl->hasAttr<CountedByAttr>()) + if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType()) // No flexible array member found or it doesn't have the "counted_by" // attribute. return nullptr; @@ -984,6 +1001,24 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, // Can't find the field referenced by the "counted_by" attribute. return nullptr; + if (isa<DeclRefExpr>(Base)) + // The whole struct is specificed in the __bdos. The calculation of the + // whole size of the structure can be done in two ways: + // + // 1) sizeof(struct S) + count * sizeof(typeof(fam)) + // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam)) + // + // The first will add additional padding after the end of the array, + // allocation while the second method is more precise, but not quite + // expected from programmers. See + // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a + // discussion of the topic. + // + // GCC isn't (currently) able to calculate __bdos on a pointer to the whole + // structure. Therefore, because of the above issue, we'll choose to match + // what GCC does for consistency's sake. + return nullptr; + // Build a load of the counted_by field. bool IsSigned = CountedByFD->getType()->isSignedIntegerType(); Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD); @@ -1014,32 +1049,9 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType()); llvm::Constant *ElemSize = llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned); - Value *FAMSize = + Value *Res = Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned); - FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned); - Value *Res = FAMSize; - - if (isa<DeclRefExpr>(Base)) { - // The whole struct is specificed in the __bdos. - const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD); - - // Get the offset of the FAM. - llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned); - Value *OffsetAndFAMSize = - Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned); - - // Get the full size of the struct. - llvm::Constant *SizeofStruct = - ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned); - - // max(sizeof(struct s), - // offsetof(struct s, array) + p->count * sizeof(*p->array)) - Res = IsSigned - ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax, - OffsetAndFAMSize, SizeofStruct) - : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax, - OffsetAndFAMSize, SizeofStruct); - } + Res = Builder.CreateIntCast(Res, ResType, IsSigned); // A negative \p IdxInst or \p CountedByInst means that the index lands // outside of the flexible array member. If that's the case, we want to @@ -1129,6 +1141,7 @@ struct BitTest { static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; + } // namespace BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { @@ -2115,9 +2128,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( auto AL = ApplyDebugLocation::CreateArtificial(*this); CharUnits Offset; - Address BufAddr = - Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty, - BufferAlignment); + Address BufAddr = makeNaturalAddressForPointer( + Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy, + BufferAlignment); Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), @@ -2160,7 +2173,7 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { // Ignore argument 1, the format string. It is not currently used. CallArgList Args; - Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); + Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy); for (const auto &Item : Layout.Items) { int Size = Item.getSizeByte(); @@ -2200,8 +2213,8 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { if (!isa<Constant>(ArgVal)) { CleanupKind Cleanup = getARCCleanupKind(); QualType Ty = TheExpr->getType(); - Address Alloca = Address::invalid(); - Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); + RawAddress Alloca = RawAddress::invalid(); + RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); ArgVal = EmitARCRetain(Ty, ArgVal); Builder.CreateStore(ArgVal, Addr); pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty, @@ -2234,7 +2247,7 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( Layout, BufAddr.getAlignment()); EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); - return RValue::get(BufAddr.getPointer()); + return RValue::get(BufAddr, *this); } static bool isSpecialUnsignedMultiplySignedResult( @@ -2566,7 +2579,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (OP.hasMathErrnoOverride()) ErrnoOverriden = OP.getMathErrnoOverride(); } - // True if 'atttibute__((optnone)) is used. This attibute overrides + // True if 'attribute__((optnone))' is used. This attribute overrides // fast-math which implies math-errno. bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>(); @@ -2596,6 +2609,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fma: case Builtin::BI__builtin_fmaf: case Builtin::BI__builtin_fmal: + case Builtin::BI__builtin_fmaf16: case Builtin::BIfma: case Builtin::BIfmaf: case Builtin::BIfmal: { @@ -2648,6 +2662,39 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } if (GenerateIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { + case Builtin::BIacos: + case Builtin::BIacosf: + case Builtin::BIacosl: + case Builtin::BI__builtin_acos: + case Builtin::BI__builtin_acosf: + case Builtin::BI__builtin_acosf16: + case Builtin::BI__builtin_acosl: + case Builtin::BI__builtin_acosf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos)); + + case Builtin::BIasin: + case Builtin::BIasinf: + case Builtin::BIasinl: + case Builtin::BI__builtin_asin: + case Builtin::BI__builtin_asinf: + case Builtin::BI__builtin_asinf16: + case Builtin::BI__builtin_asinl: + case Builtin::BI__builtin_asinf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin)); + + case Builtin::BIatan: + case Builtin::BIatanf: + case Builtin::BIatanl: + case Builtin::BI__builtin_atan: + case Builtin::BI__builtin_atanf: + case Builtin::BI__builtin_atanf16: + case Builtin::BI__builtin_atanl: + case Builtin::BI__builtin_atanf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan)); + case Builtin::BIceil: case Builtin::BIceilf: case Builtin::BIceill: @@ -2668,7 +2715,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_copysignf16: case Builtin::BI__builtin_copysignl: case Builtin::BI__builtin_copysignf128: - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign)); case Builtin::BIcos: case Builtin::BIcosf: @@ -2682,6 +2730,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Intrinsic::cos, Intrinsic::experimental_constrained_cos)); + case Builtin::BIcosh: + case Builtin::BIcoshf: + case Builtin::BIcoshl: + case Builtin::BI__builtin_cosh: + case Builtin::BI__builtin_coshf: + case Builtin::BI__builtin_coshf16: + case Builtin::BI__builtin_coshl: + case Builtin::BI__builtin_coshf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh)); + case Builtin::BIexp: case Builtin::BIexpf: case Builtin::BIexpl: @@ -2713,7 +2772,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // TODO: strictfp support if (Builder.getIsFPConstrained()) break; - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10)); } case Builtin::BIfabs: case Builtin::BIfabsf: @@ -2723,7 +2783,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fabsf16: case Builtin::BI__builtin_fabsl: case Builtin::BI__builtin_fabsf128: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs)); case Builtin::BIfloor: case Builtin::BIfloorf: @@ -2896,6 +2957,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Intrinsic::sin, Intrinsic::experimental_constrained_sin)); + case Builtin::BIsinh: + case Builtin::BIsinhf: + case Builtin::BIsinhl: + case Builtin::BI__builtin_sinh: + case Builtin::BI__builtin_sinhf: + case Builtin::BI__builtin_sinhf16: + case Builtin::BI__builtin_sinhl: + case Builtin::BI__builtin_sinhf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh)); + case Builtin::BIsqrt: case Builtin::BIsqrtf: case Builtin::BIsqrtl: @@ -2910,6 +2982,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, SetSqrtFPAccuracy(Call); return RValue::get(Call); } + + case Builtin::BItan: + case Builtin::BItanf: + case Builtin::BItanl: + case Builtin::BI__builtin_tan: + case Builtin::BI__builtin_tanf: + case Builtin::BI__builtin_tanf16: + case Builtin::BI__builtin_tanl: + case Builtin::BI__builtin_tanf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); + + case Builtin::BItanh: + case Builtin::BItanhf: + case Builtin::BItanhl: + case Builtin::BI__builtin_tanh: + case Builtin::BI__builtin_tanhf: + case Builtin::BI__builtin_tanhf16: + case Builtin::BI__builtin_tanhl: + case Builtin::BI__builtin_tanhf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh)); + case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: @@ -2982,7 +3077,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Check NonnullAttribute/NullabilityArg and Alignment. auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg, unsigned ParmNum) { - Value *Val = A.getPointer(); + Value *Val = A.emitRawPointer(*this); EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD, ParmNum); @@ -3011,13 +3106,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_va_end: EmitVAStartEnd(BuiltinID == Builtin::BI__va_start ? EmitScalarExpr(E->getArg(0)) - : EmitVAListRef(E->getArg(0)).getPointer(), + : EmitVAListRef(E->getArg(0)).emitRawPointer(*this), BuiltinID != Builtin::BI__builtin_va_end); return RValue::get(nullptr); case Builtin::BI__builtin_va_copy: { - Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); - Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr}); + Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this); + Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this); + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}), + {DstPtr, SrcPtr}); return RValue::get(nullptr); } case Builtin::BIabs: @@ -3127,36 +3223,66 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: - case Builtin::BI__builtin_ctzll: { - Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); + case Builtin::BI__builtin_ctzll: + case Builtin::BI__builtin_ctzg: { + bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg && + E->getNumArgs() > 1; + + Value *ArgValue = + HasFallback ? EmitScalarExpr(E->getArg(0)) + : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); - Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); + Value *ZeroUndef = + Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef()); Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) - Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, - "cast"); - return RValue::get(Result); + Result = + Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); + if (!HasFallback) + return RValue::get(Result); + + Value *Zero = Constant::getNullValue(ArgType); + Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); + Value *FallbackValue = EmitScalarExpr(E->getArg(1)); + Value *ResultOrFallback = + Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg"); + return RValue::get(ResultOrFallback); } case Builtin::BI__builtin_clzs: case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: - case Builtin::BI__builtin_clzll: { - Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); + case Builtin::BI__builtin_clzll: + case Builtin::BI__builtin_clzg: { + bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg && + E->getNumArgs() > 1; + + Value *ArgValue = + HasFallback ? EmitScalarExpr(E->getArg(0)) + : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); - Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); + Value *ZeroUndef = + Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef()); Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) - Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, - "cast"); - return RValue::get(Result); + Result = + Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); + if (!HasFallback) + return RValue::get(Result); + + Value *Zero = Constant::getNullValue(ArgType); + Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); + Value *FallbackValue = EmitScalarExpr(E->getArg(1)); + Value *ResultOrFallback = + Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg"); + return RValue::get(ResultOrFallback); } case Builtin::BI__builtin_ffs: case Builtin::BI__builtin_ffsl: @@ -3216,7 +3342,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__popcnt64: case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: - case Builtin::BI__builtin_popcountll: { + case Builtin::BI__builtin_popcountll: + case Builtin::BI__builtin_popcountg: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); @@ -3225,8 +3352,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) - Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, - "cast"); + Result = + Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); return RValue::get(Result); } case Builtin::BI__builtin_unpredictable: { @@ -3317,6 +3444,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); return RValue::get(nullptr); } + case Builtin::BI__builtin_allow_runtime_check: { + StringRef Kind = + cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString(); + LLVMContext &Ctx = CGM.getLLVMContext(); + llvm::Value *Allow = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check), + llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind))); + return RValue::get(Allow); + } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target // supports the builtin, otherwise just return the argument. @@ -3353,13 +3489,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_byteswap_ushort: case Builtin::BI_byteswap_ulong: case Builtin::BI_byteswap_uint64: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap)); } case Builtin::BI__builtin_bitreverse8: case Builtin::BI__builtin_bitreverse16: case Builtin::BI__builtin_bitreverse32: case Builtin::BI__builtin_bitreverse64: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse)); } case Builtin::BI__builtin_rotateleft8: case Builtin::BI__builtin_rotateleft16: @@ -3443,6 +3581,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } + case Builtin::BI__builtin_readsteadycounter: { + Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter); + return RValue::get(Builder.CreateCall(F)); + } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); @@ -3452,6 +3594,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_trap: EmitTrapCall(Intrinsic::trap); return RValue::get(nullptr); + case Builtin::BI__builtin_verbose_trap: { + llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation(); + if (getDebugInfo()) { + TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor( + TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()), + *E->getArg(1)->tryEvaluateString(getContext())); + } + ApplyDebugLocation ApplyTrapDI(*this, TrapLocation); + // Currently no attempt is made to prevent traps from being merged. + EmitTrapCall(Intrinsic::trap); + return RValue::get(nullptr); + } case Builtin::BI__debugbreak: EmitTrapCall(Intrinsic::debugtrap); return RValue::get(nullptr); @@ -3489,7 +3643,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // frexpl instead of legalizing this type in the BE. if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case Builtin::BI__builtin_frexp: case Builtin::BI__builtin_frexpf: @@ -3632,67 +3786,90 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), Builder.getFalse(), nullptr, "elt.abs"); else - Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs"); + Result = emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::fabs, "elt.abs"); return RValue::get(Result); } - + case Builtin::BI__builtin_elementwise_acos: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::acos, "elt.acos")); + case Builtin::BI__builtin_elementwise_asin: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::asin, "elt.asin")); + case Builtin::BI__builtin_elementwise_atan: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::atan, "elt.atan")); case Builtin::BI__builtin_elementwise_ceil: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::ceil, "elt.ceil")); case Builtin::BI__builtin_elementwise_exp: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::exp, "elt.exp")); case Builtin::BI__builtin_elementwise_exp2: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::exp2, "elt.exp2")); case Builtin::BI__builtin_elementwise_log: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::log, "elt.log")); case Builtin::BI__builtin_elementwise_log2: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::log2, "elt.log2")); case Builtin::BI__builtin_elementwise_log10: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::log10, "elt.log10")); case Builtin::BI__builtin_elementwise_pow: { - return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow)); + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow)); } case Builtin::BI__builtin_elementwise_bitreverse: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse, - "elt.bitreverse")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse")); case Builtin::BI__builtin_elementwise_cos: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::cos, "elt.cos")); + case Builtin::BI__builtin_elementwise_cosh: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::cosh, "elt.cosh")); case Builtin::BI__builtin_elementwise_floor: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::floor, "elt.floor")); case Builtin::BI__builtin_elementwise_roundeven: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven, - "elt.roundeven")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); case Builtin::BI__builtin_elementwise_round: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round, - "elt.round")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::round, "elt.round")); case Builtin::BI__builtin_elementwise_rint: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint, - "elt.rint")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::rint, "elt.rint")); case Builtin::BI__builtin_elementwise_nearbyint: - return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint, - "elt.nearbyint")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint")); case Builtin::BI__builtin_elementwise_sin: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin")); - + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::sin, "elt.sin")); + case Builtin::BI__builtin_elementwise_sinh: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::sinh, "elt.sinh")); + case Builtin::BI__builtin_elementwise_tan: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::tan, "elt.tan")); + case Builtin::BI__builtin_elementwise_tanh: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::tanh, "elt.tanh")); case Builtin::BI__builtin_elementwise_trunc: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::trunc, "elt.trunc")); case Builtin::BI__builtin_elementwise_canonicalize: - return RValue::get( - emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); case Builtin::BI__builtin_elementwise_copysign: - return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign)); + return RValue::get(emitBuiltinWithOneOverloadedType<2>( + *this, E, llvm::Intrinsic::copysign)); case Builtin::BI__builtin_elementwise_fma: - return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma)); + return RValue::get( + emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { Value *Op0 = EmitScalarExpr(E->getArg(0)); @@ -3746,9 +3923,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_reduce_max: { - auto GetIntrinsicID = [](QualType QT) { + auto GetIntrinsicID = [this](QualType QT) { if (auto *VecTy = QT->getAs<VectorType>()) QT = VecTy->getElementType(); + else if (QT->isSizelessVectorType()) + QT = QT->getSizelessVectorEltType(CGM.getContext()); + if (QT->isSignedIntegerType()) return llvm::Intrinsic::vector_reduce_smax; if (QT->isUnsignedIntegerType()) @@ -3756,14 +3936,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmax; }; - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_min: { - auto GetIntrinsicID = [](QualType QT) { + auto GetIntrinsicID = [this](QualType QT) { if (auto *VecTy = QT->getAs<VectorType>()) QT = VecTy->getElementType(); + else if (QT->isSizelessVectorType()) + QT = QT->getSizelessVectorEltType(CGM.getContext()); + if (QT->isSignedIntegerType()) return llvm::Intrinsic::vector_reduce_smin; if (QT->isUnsignedIntegerType()) @@ -3772,24 +3955,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return llvm::Intrinsic::vector_reduce_fmin; }; - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_add: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); case Builtin::BI__builtin_reduce_mul: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); case Builtin::BI__builtin_reduce_xor: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); case Builtin::BI__builtin_reduce_or: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or")); case Builtin::BI__builtin_reduce_and: - return RValue::get(emitUnaryBuiltin( + return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and")); case Builtin::BI__builtin_matrix_transpose: { @@ -3811,13 +3994,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); Address Src = EmitPointerWithAlignment(E->getArg(0)); - EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), - E->getArg(0)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)), + E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, + 0); Value *Result = MB.CreateColumnMajorLoad( - Src.getElementType(), Src.getPointer(), + Src.getElementType(), Src.emitRawPointer(*this), Align(Src.getAlignment().getQuantity()), Stride, IsVolatile, - ResultTy->getNumRows(), ResultTy->getNumColumns(), - "matrix"); + ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix"); return RValue::get(Result); } @@ -3832,11 +4015,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, assert(PtrTy && "arg1 must be of pointer type"); bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); - EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(), - E->getArg(1)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)), + E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, + 0); Value *Result = MB.CreateColumnMajorStore( - Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()), - Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns()); + Matrix, Dst.emitRawPointer(*this), + Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile, + MatrixTy->getNumRows(), MatrixTy->getNumColumns()); return RValue::get(Result); } @@ -3995,7 +4180,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_bzero: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *SizeVal = EmitScalarExpr(E->getArg(1)); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), + EmitNonNullArgCheck(Dest, E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); return RValue::get(nullptr); @@ -4006,12 +4191,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Src = EmitPointerWithAlignment(E->getArg(0)); Address Dest = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), - E->getArg(0)->getExprLoc(), FD, 0); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(), - E->getArg(1)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)), + E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, + 0); + EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)), + E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, + 0); Builder.CreateMemMove(Dest, Src, SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(nullptr); } case Builtin::BImemcpy: @@ -4026,10 +4213,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateMemCpy(Dest, Src, SizeVal, false); if (BuiltinID == Builtin::BImempcpy || BuiltinID == Builtin::BI__builtin_mempcpy) - return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(), - Dest.getPointer(), SizeVal)); + return RValue::get(Builder.CreateInBoundsGEP( + Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal)); else - return RValue::get(Dest.getPointer()); + return RValue::get(Dest, *this); } case Builtin::BI__builtin_memcpy_inline: { @@ -4061,7 +4248,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); Builder.CreateMemCpy(Dest, Src, SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(Dest, *this); } case Builtin::BI__builtin_objc_memmove_collectable: { @@ -4070,7 +4257,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *SizeVal = EmitScalarExpr(E->getArg(2)); CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestAddr, SrcAddr, SizeVal); - return RValue::get(DestAddr.getPointer()); + return RValue::get(DestAddr, *this); } case Builtin::BI__builtin___memmove_chk: { @@ -4087,7 +4274,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); Builder.CreateMemMove(Dest, Src, SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(Dest, *this); } case Builtin::BImemmove: @@ -4098,7 +4285,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemMove(Dest, Src, SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(Dest, *this); } case Builtin::BImemset: case Builtin::BI__builtin_memset: { @@ -4106,10 +4293,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), + EmitNonNullArgCheck(Dest, E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(Dest, *this); } case Builtin::BI__builtin_memset_inline: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -4117,8 +4304,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); uint64_t Size = E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); - EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), - E->getArg(0)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)), + E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, + 0); Builder.CreateMemSetInline(Dest, ByteVal, Size); return RValue::get(nullptr); } @@ -4137,7 +4325,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.getInt8Ty()); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(Dest, *this); } case Builtin::BI__builtin_wmemchr: { // The MSVC runtime library does not provide a definition of wmemchr, so we @@ -4359,14 +4547,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateStackSave(); - assert(Buf.getPointer()->getType() == StackAddr->getType()); + assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType()); Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); - return RValue::get(Builder.CreateCall(F, Buf.getPointer())); + return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this))); } case Builtin::BI__builtin_longjmp: { Value *Buf = EmitScalarExpr(E->getArg(0)); @@ -5202,6 +5390,76 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__iso_volatile_store64: return RValue::get(EmitISOVolatileStore(*this, E)); + case Builtin::BI__builtin_ptrauth_sign_constant: + return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); + + case Builtin::BI__builtin_ptrauth_auth: + case Builtin::BI__builtin_ptrauth_auth_and_resign: + case Builtin::BI__builtin_ptrauth_blend_discriminator: + case Builtin::BI__builtin_ptrauth_sign_generic_data: + case Builtin::BI__builtin_ptrauth_sign_unauthenticated: + case Builtin::BI__builtin_ptrauth_strip: { + // Emit the arguments. + SmallVector<llvm::Value *, 5> Args; + for (auto argExpr : E->arguments()) + Args.push_back(EmitScalarExpr(argExpr)); + + // Cast the value to intptr_t, saving its original type. + llvm::Type *OrigValueType = Args[0]->getType(); + if (OrigValueType->isPointerTy()) + Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy); + + switch (BuiltinID) { + case Builtin::BI__builtin_ptrauth_auth_and_resign: + if (Args[4]->getType()->isPointerTy()) + Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy); + [[fallthrough]]; + + case Builtin::BI__builtin_ptrauth_auth: + case Builtin::BI__builtin_ptrauth_sign_unauthenticated: + if (Args[2]->getType()->isPointerTy()) + Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy); + break; + + case Builtin::BI__builtin_ptrauth_sign_generic_data: + if (Args[1]->getType()->isPointerTy()) + Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy); + break; + + case Builtin::BI__builtin_ptrauth_blend_discriminator: + case Builtin::BI__builtin_ptrauth_strip: + break; + } + + // Call the intrinsic. + auto IntrinsicID = [&]() -> unsigned { + switch (BuiltinID) { + case Builtin::BI__builtin_ptrauth_auth: + return llvm::Intrinsic::ptrauth_auth; + case Builtin::BI__builtin_ptrauth_auth_and_resign: + return llvm::Intrinsic::ptrauth_resign; + case Builtin::BI__builtin_ptrauth_blend_discriminator: + return llvm::Intrinsic::ptrauth_blend; + case Builtin::BI__builtin_ptrauth_sign_generic_data: + return llvm::Intrinsic::ptrauth_sign_generic; + case Builtin::BI__builtin_ptrauth_sign_unauthenticated: + return llvm::Intrinsic::ptrauth_sign; + case Builtin::BI__builtin_ptrauth_strip: + return llvm::Intrinsic::ptrauth_strip; + } + llvm_unreachable("bad ptrauth intrinsic"); + }(); + auto Intrinsic = CGM.getIntrinsic(IntrinsicID); + llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args); + + if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data && + BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator && + OrigValueType->isPointerTy()) { + Result = Builder.CreateIntToPtr(Result, OrigValueType); + } + return RValue::get(Result); + } + case Builtin::BI__exception_code: case Builtin::BI_exception_code: return RValue::get(EmitSEHExceptionCode()); @@ -5454,7 +5712,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // OpenCL v2.0, s6.13.17 - Enqueue kernel function. - // It contains four different overload formats specified in Table 6.13.17.1. + // Table 6.13.17.1 specifies four overload forms of enqueue_kernel. + // The code below expands the builtin call to a call to one of the following + // functions that an OpenCL runtime library will have to provide: + // __enqueue_kernel_basic + // __enqueue_kernel_varargs + // __enqueue_kernel_basic_events + // __enqueue_kernel_events_varargs case Builtin::BIenqueue_kernel: { StringRef Name; // Generated function call name unsigned NumArgs = E->getNumArgs(); @@ -5466,8 +5730,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); - llvm::Value *Range = NDRangeL.getAddress(*this).getPointer(); - llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType(); + llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this); + llvm::Type *RangeTy = NDRangeL.getAddress().getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: @@ -5486,7 +5750,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B(Builder.getContext()); - B.addByValAttr(NDRangeL.getAddress(*this).getElementType()); + B.addByValAttr(NDRangeL.getAddress().getElementType()); llvm::AttributeList ByValAttrSet = llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); @@ -5575,9 +5839,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, getContext(), Expr::NPC_ValueDependentIsNotNull)) { EventWaitList = llvm::ConstantPointerNull::get(PtrTy); } else { - EventWaitList = E->getArg(4)->getType()->isArrayType() - ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() - : EmitScalarExpr(E->getArg(4)); + EventWaitList = + E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this) + : EmitScalarExpr(E->getArg(4)); // Convert to generic address space. EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy); } @@ -5634,7 +5899,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitLifetimeEnd(TmpSize, TmpPtr); return Call; } - [[fallthrough]]; + llvm_unreachable("Unexpected enqueue_kernel signature"); } // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. @@ -5673,7 +5938,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); - llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer(); + llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); Value *Kernel = @@ -5691,7 +5956,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Name), {NDRange, Kernel, Block})); } - case Builtin::BI__builtin_store_half: case Builtin::BI__builtin_store_halff: { Value *Val = EmitScalarExpr(E->getArg(0)); @@ -5710,14 +5974,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *HalfVal = Builder.CreateLoad(Address); return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } + case Builtin::BI__builtin_printf: case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX() || - getTarget().getTriple().isAMDGCN()) { + getTarget().getTriple().isAMDGCN() || + (getTarget().getTriple().isSPIRV() && + getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) { if (getLangOpts().OpenMPIsTargetDevice) return EmitOpenMPDevicePrintfCallExpr(E); if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E); - if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP) + if ((getTarget().getTriple().isAMDGCN() || + getTarget().getTriple().isSPIRV()) && + getLangOpts().HIP) return EmitAMDGPUDevicePrintfCallExpr(E); } @@ -5726,7 +5995,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_canonicalizef: case Builtin::BI__builtin_canonicalizef16: case Builtin::BI__builtin_canonicalizel: - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); + return RValue::get( + emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize)); case Builtin::BI__builtin_thread_pointer: { if (!getContext().getTargetInfo().isTLSSupported()) @@ -5757,7 +6027,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto PTy0 = FTy->getParamType(0); if (PTy0 != Arg0Val->getType()) { if (Arg0Ty->isArrayType()) - Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); + Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this); else Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); } @@ -5795,7 +6065,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto PTy1 = FTy->getParamType(1); if (PTy1 != Arg1Val->getType()) { if (Arg1Ty->isArrayType()) - Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer(); + Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this); else Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); } @@ -5809,7 +6079,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ms_va_start: case Builtin::BI__builtin_ms_va_end: return RValue::get( - EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), + EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this), BuiltinID == Builtin::BI__builtin_ms_va_start)); case Builtin::BI__builtin_ms_va_copy: { @@ -5833,11 +6103,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto Name = CGM.getCUDARuntime().getDeviceSideName( cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl()); auto Str = CGM.GetAddrOfConstantCString(Name, ""); - llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0), - llvm::ConstantInt::get(SizeTy, 0)}; - auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(), - Str.getPointer(), Zeros); - return RValue::get(Ptr); + return RValue::get(Str.getPointer()); } } @@ -5851,8 +6117,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // If this is a predefined lib function (e.g. malloc), emit the call // using exactly the normal call path. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) - return emitLibraryCall(*this, FD, E, - cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); + return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD)); // Check that a call to a target specific builtin has the correct target // features. @@ -5871,6 +6136,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); if (!Prefix.empty()) { IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name); + if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" && + getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA) + IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name); // NOTE we don't need to perform a compatibility flag check here since the // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. @@ -5907,8 +6175,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } - assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && - "Must be able to losslessly bit cast to param"); // Cast vector type (e.g., v256i32) to x86_amx, this only happen // in amx intrinsics. if (PTy->isX86_AMXTy()) @@ -5938,8 +6204,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } - assert(V->getType()->canLosslesslyBitCastTo(RetTy) && - "Must be able to losslessly bit cast result type"); // Cast x86_amx to vector type (e.g., v256i32), this only happen // in amx intrinsics. if (V->getType()->isX86_AMXTy()) @@ -5973,7 +6237,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); return RValue::get(V); case TEK_Aggregate: - return RValue::getAggregate(ReturnValue.getValue(), + return RValue::getAggregate(ReturnValue.getAddress(), ReturnValue.isVolatile()); case TEK_Complex: llvm_unreachable("No current target builtin returns complex"); @@ -5981,6 +6245,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } + // EmitHLSLBuiltinExpr will check getLangOpts().HLSL + if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E)) + return RValue::get(V); + if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) return EmitHipStdParUnsupportedBuiltin(this, FD); @@ -6041,6 +6309,10 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); + case llvm::Triple::spirv64: + if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA) + return nullptr; + return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -7080,8 +7352,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, - { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, }, - { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, }, { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, @@ -7100,8 +7370,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, - { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, }, - { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, }, { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, @@ -7204,12 +7472,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, - { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, }, - { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, }, - { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, }, - { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, }, - { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, }, - { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, }, // The mangling rules cause us to have one ID for each type for vldap1(q)_lane // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an // arbitrary one to be handled as tha canonical variation. @@ -8739,7 +9001,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(PtrOp0.getPointer()); + Ops.push_back(PtrOp0.emitRawPointer(*this)); continue; } } @@ -8766,7 +9028,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); - Ops.push_back(PtrOp1.getPointer()); + Ops.push_back(PtrOp1.emitRawPointer(*this)); continue; } } @@ -9187,7 +9449,7 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, if (ReturnValue.isNull()) return MvecOut; else - return Builder.CreateStore(MvecOut, ReturnValue.getValue()); + return Builder.CreateStore(MvecOut, ReturnValue.getAddress()); } case CustomCodeGen::VST24: { @@ -10077,11 +10339,15 @@ Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *Ty, ArrayRef<Value *> Ops) { assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) && - "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()"); + "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()"); unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue(); auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>( - TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty); + TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty); + + if (!SingleVecTy) + return nullptr; + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * SingleVecTy->getMinNumElements()); @@ -10096,6 +10362,10 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate"); auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType()); + + if (!SrcTy) + return nullptr; + unsigned MinElts = SrcTy->getMinNumElements(); Value *Call = llvm::PoisonValue::get(Ty); for (unsigned I = 0; I < Ops.size(); I++) { @@ -10637,6 +10907,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID <= clang::AArch64::LastSMEBuiltin) return EmitAArch64SMEBuiltinExpr(BuiltinID, E); + if (BuiltinID == Builtin::BI__builtin_cpu_supports) + return EmitAArch64CpuSupports(E); + unsigned HintID = static_cast<unsigned>(-1); switch (BuiltinID) { default: break; @@ -10670,16 +10943,20 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } + if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) { + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty)); + } + if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) { // Create call to __arm_sme_state and store the results to the two pointers. CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, false), "__arm_sme_state")); - auto Attrs = - AttributeList() - .addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible") - .addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved"); + auto Attrs = AttributeList().addFnAttribute(getLLVMContext(), + "aarch64_pstate_sm_compatible"); CI->setAttributes(Attrs); CI->setCallingConv( llvm::CallingConv:: @@ -11318,6 +11595,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Address, RW, Locality, Data}); } + if (BuiltinID == AArch64::BI__hlt) { + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt); + Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); + + // Return 0 for convenience, even though MSVC returns some other undefined + // value. + return ConstantInt::get(Builder.getInt32Ty(), 0); + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional<MSVCIntrin> MsvcIntId = @@ -11360,7 +11646,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(PtrOp0.getPointer()); + Ops.push_back(PtrOp0.emitRawPointer(*this)); continue; } } @@ -12044,7 +12330,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, "vgetq_lane"); } - case clang::AArch64::BI_InterlockedAdd: { + case clang::AArch64::BI_InterlockedAdd: + case clang::AArch64::BI_InterlockedAdd64: { Address DestAddr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); AtomicRMWInst *RMWI = @@ -13225,15 +13512,15 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using __builtin_preserve_field_info() without -g"); - return IsBitField ? EmitLValue(Arg).getBitFieldPointer() - : EmitLValue(Arg).getPointer(*this); + return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this) + : EmitLValue(Arg).emitRawPointer(*this); } // Enable underlying preserve_*_access_index() generation. bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; IsInPreservedAIRegion = true; - Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() - : EmitLValue(Arg).getPointer(*this); + Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this) + : EmitLValue(Arg).emitRawPointer(*this); IsInPreservedAIRegion = OldIsInPreservedAIRegion; ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); @@ -13931,7 +14218,7 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { // Grab the appropriate field from __cpu_model. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, Index)}; - llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); + llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue, CharUnits::fromQuantity(4)); @@ -13943,6 +14230,8 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); + if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr)) + return Builder.getFalse(); return EmitX86CpuSupports(FeatureStr); } @@ -13971,7 +14260,7 @@ CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) { // global in the struct STy. Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), Builder.getInt32(0)}; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures, CharUnits::fromQuantity(4)); @@ -13992,7 +14281,7 @@ CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) { continue; Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)}; Value *Features = Builder.CreateAlignedLoad( - Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs), + Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs), CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. Value *Mask = Builder.getInt32(M); @@ -14025,6 +14314,21 @@ Value *CodeGenFunction::EmitX86CpuInit() { return Builder.CreateCall(Func); } +Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) { + const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString(); + llvm::SmallVector<StringRef, 8> Features; + ArgStr.split(Features, "+"); + for (auto &Feature : Features) { + Feature = Feature.trim(); + if (!llvm::AArch64::parseFMVExtension(Feature)) + return Builder.getFalse(); + if (Feature != "default") + Features.push_back(Feature); + } + return EmitAArch64CpuSupports(Features); +} + llvm::Value * CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) { uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs); @@ -14053,11 +14357,11 @@ CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) { Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_cpu_is) + if (BuiltinID == Builtin::BI__builtin_cpu_is) return EmitX86CpuIs(E); - if (BuiltinID == X86::BI__builtin_cpu_supports) + if (BuiltinID == Builtin::BI__builtin_cpu_supports) return EmitX86CpuSupports(E); - if (BuiltinID == X86::BI__builtin_cpu_init) + if (BuiltinID == Builtin::BI__builtin_cpu_init) return EmitX86CpuInit(); // Handle MSVC intrinsics before argument evaluation to prevent double @@ -14208,14 +14512,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { - Address Tmp = CreateMemTemp(E->getArg(0)->getType()); + RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType()); Builder.CreateStore(Ops[0], Tmp); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), Tmp.getPointer()); } case X86::BI_mm_getcsr: case X86::BI__builtin_ia32_stmxcsr: { - Address Tmp = CreateMemTemp(E->getType()); + RawAddress Tmp = CreateMemTemp(E->getType()); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), Tmp.getPointer()); return Builder.CreateLoad(Tmp, "stmxcsr"); @@ -15753,14 +16057,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0]}); } - // 3DNow! - case X86::BI__builtin_ia32_pswapdsf: - case X86::BI__builtin_ia32_pswapdsi: { - llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); - Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); - llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); - return Builder.CreateCall(F, Ops, "pswapd"); - } case X86::BI__builtin_ia32_rdrand16_step: case X86::BI__builtin_ia32_rdrand32_step: case X86::BI__builtin_ia32_rdrand64_step: @@ -16542,9 +16838,144 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID = Intrinsic::not_intrinsic; +#include "llvm/TargetParser/PPCTargetParser.def" + auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, + unsigned Mask, CmpInst::Predicate CompOp, + unsigned OpValue) -> Value * { + if (SupportMethod == BUILTIN_PPC_FALSE) + return llvm::ConstantInt::getFalse(ConvertType(E->getType())); + + if (SupportMethod == BUILTIN_PPC_TRUE) + return llvm::ConstantInt::getTrue(ConvertType(E->getType())); + + assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod."); + + llvm::Value *FieldValue = nullptr; + if (SupportMethod == USE_SYS_CONF) { + llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE); + llvm::Constant *SysConf = + CGM.CreateRuntimeVariable(STy, "_system_configuration"); + + // Grab the appropriate field from _system_configuration. + llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, FieldIdx)}; + + FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs); + FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue, + CharUnits::fromQuantity(4)); + } else if (SupportMethod == SYS_CALL) { + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int64Ty, Int32Ty, false); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "getsystemcfg"); + + FieldValue = + Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)}); + } + assert(FieldValue && + "SupportMethod value is not defined in PPCTargetParser.def."); + + if (Mask) + FieldValue = Builder.CreateAnd(FieldValue, Mask); + + llvm::Type *ValueType = FieldValue->getType(); + bool IsValueType64Bit = ValueType->isIntegerTy(64); + assert( + (IsValueType64Bit || ValueType->isIntegerTy(32)) && + "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr()."); + + return Builder.CreateICmp( + CompOp, FieldValue, + ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue)); + }; + switch (BuiltinID) { default: return nullptr; + case Builtin::BI__builtin_cpu_is: { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + llvm::Triple Triple = getTarget().getTriple(); + + unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue; + typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo; + + std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) = + static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr) +#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \ + AIXID) \ + .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({BUILTIN_PPC_UNSUPPORTED, 0, + BUILTIN_PPC_UNSUPPORTED, 0})); + + if (Triple.isOSAIX()) { + assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) && + "Invalid CPU name. Missed by SemaChecking?"); + return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0, + ICmpInst::ICMP_EQ, AIXIDValue); + } + + assert(Triple.isOSLinux() && + "__builtin_cpu_is() is only supported for AIX and Linux."); + + assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) && + "Invalid CPU name. Missed by SemaChecking?"); + + if (LinuxSupportMethod == BUILTIN_PPC_FALSE) + return llvm::ConstantInt::getFalse(ConvertType(E->getType())); + + Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); + return Builder.CreateICmpEQ(TheCall, + llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); + } + case Builtin::BI__builtin_cpu_supports: { + llvm::Triple Triple = getTarget().getTriple(); + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + if (Triple.isOSAIX()) { + unsigned SupportMethod, FieldIdx, Mask, Value; + CmpInst::Predicate CompOp; + typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, + unsigned> + CPUSupportType; + std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) = + static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr) +#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ + VALUE) \ + .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({BUILTIN_PPC_FALSE, 0, 0, + CmpInst::Predicate(), 0})); + return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, + Value); + } + + assert(Triple.isOSLinux() && + "__builtin_cpu_supports() is only supported for AIX and Linux."); + unsigned FeatureWord; + unsigned BitMask; + std::tie(FeatureWord, BitMask) = + StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) +#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ + .Case(Name, {FA_WORD, Bitmask}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({0, 0}); + if (!BitMask) + return Builder.getFalse(); + Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); + Value *Mask = + Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); + return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); +#undef PPC_FAWORD_HWCAP +#undef PPC_FAWORD_HWCAP2 +#undef PPC_FAWORD_CPUID + } + // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we // call __builtin_readcyclecounter. case PPC::BI__builtin_ppc_get_timebase: @@ -16980,37 +17411,34 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } - // Rotate and insert under mask operation. - // __rldimi(rs, is, shift, mask) - // (rotl64(rs, shift) & mask) | (is & ~mask) - // __rlwimi(rs, is, shift, mask) - // (rotl(rs, shift) & mask) | (is & ~mask) case PPC::BI__builtin_ppc_rldimi: case PPC::BI__builtin_ppc_rlwimi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); - llvm::Type *Ty = Op0->getType(); - Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - if (BuiltinID == PPC::BI__builtin_ppc_rldimi) + // rldimi is 64-bit instruction, expand the intrinsic before isel to + // leverage peephole and avoid legalization efforts. + if (BuiltinID == PPC::BI__builtin_ppc_rldimi && + !getTarget().getTriple().isPPC64()) { + Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); Op2 = Builder.CreateZExt(Op2, Int64Ty); - Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); - Value *X = Builder.CreateAnd(Shift, Op3); - Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3)); - return Builder.CreateOr(X, Y); - } - // Rotate and insert under mask operation. - // __rlwnm(rs, shift, mask) - // rotl(rs, shift) & mask + Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); + return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), + Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); + } + return Builder.CreateCall( + CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi + ? Intrinsic::ppc_rldimi + : Intrinsic::ppc_rlwimi), + {Op0, Op1, Op2, Op3}); + } case PPC::BI__builtin_ppc_rlwnm: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); - llvm::Type *Ty = Op0->getType(); - Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1}); - return Builder.CreateAnd(Shift, Op2); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm), + {Op0, Op1, Op2}); } case PPC::BI__builtin_ppc_poppar4: case PPC::BI__builtin_ppc_poppar8: { @@ -17418,7 +17846,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, SmallVector<Value *, 4> Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) if (E->getArg(i)->getType()->isArrayType()) - Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); + Ops.push_back( + EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this)); else Ops.push_back(EmitScalarExpr(E->getArg(i))); // The first argument of these two builtins is a pointer used to store their @@ -17721,9 +18150,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. /// Emit code based on Code Object ABI version. /// COV_4 : Emit code to use dispatch ptr -/// COV_5 : Emit code to use implicitarg ptr +/// COV_5+ : Emit code to use implicitarg ptr /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" -/// and use its value for COV_4 or COV_5 approach. It is used for +/// and use its value for COV_4 or COV_5+ approach. It is used for /// compiling device libraries in an ABI-agnostic way. /// /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by @@ -17766,7 +18195,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); } else { Value *GEP = nullptr; - if (Cov == CodeObjectVersionKind::COV_5) { + if (Cov >= CodeObjectVersionKind::COV_5) { // Indexing the implicit kernarg segment. GEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); @@ -17837,9 +18266,35 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, break; } + // Some of the atomic builtins take the scope as a string name. StringRef scp; - llvm::getConstantStringInfo(Scope, scp); - SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + if (llvm::getConstantStringInfo(Scope, scp)) { + SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + return; + } + + // Older builtins had an enum argument for the memory scope. + int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue(); + switch (scope) { + case 0: // __MEMORY_SCOPE_SYSTEM + SSID = llvm::SyncScope::System; + break; + case 1: // __MEMORY_SCOPE_DEVICE + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + break; + case 2: // __MEMORY_SCOPE_WRKGRP + SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); + break; + case 3: // __MEMORY_SCOPE_WVFRNT + SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + break; + case 4: // __MEMORY_SCOPE_SINGLE + SSID = llvm::SyncScope::SingleThread; + break; + default: + SSID = llvm::SyncScope::System; + break; + } } llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, @@ -17859,6 +18314,209 @@ llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, return Arg; } +Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) { + if (QT->hasFloatingRepresentation()) { + switch (elementCount) { + case 2: + return Intrinsic::dx_dot2; + case 3: + return Intrinsic::dx_dot3; + case 4: + return Intrinsic::dx_dot4; + } + } + if (QT->hasSignedIntegerRepresentation()) + return Intrinsic::dx_sdot; + + assert(QT->hasUnsignedIntegerRepresentation()); + return Intrinsic::dx_udot; +} + +Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + if (!getLangOpts().HLSL) + return nullptr; + + switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_elementwise_all: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + return Builder.CreateIntrinsic( + /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), + CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, + "hlsl.all"); + } + case Builtin::BI__builtin_hlsl_elementwise_any: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + return Builder.CreateIntrinsic( + /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), + CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, + "hlsl.any"); + } + case Builtin::BI__builtin_hlsl_elementwise_clamp: { + Value *OpX = EmitScalarExpr(E->getArg(0)); + Value *OpMin = EmitScalarExpr(E->getArg(1)); + Value *OpMax = EmitScalarExpr(E->getArg(2)); + + QualType Ty = E->getArg(0)->getType(); + bool IsUnsigned = false; + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + IsUnsigned = Ty->isUnsignedIntegerType(); + return Builder.CreateIntrinsic( + /*ReturnType=*/OpX->getType(), + IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp, + ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp"); + } + case Builtin::BI__builtin_hlsl_dot: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *T0 = Op0->getType(); + llvm::Type *T1 = Op1->getType(); + if (!T0->isVectorTy() && !T1->isVectorTy()) { + if (T0->isFloatingPointTy()) + return Builder.CreateFMul(Op0, Op1, "dx.dot"); + + if (T0->isIntegerTy()) + return Builder.CreateMul(Op0, Op1, "dx.dot"); + + // Bools should have been promoted + llvm_unreachable( + "Scalar dot product is only supported on ints and floats."); + } + // A VectorSplat should have happened + assert(T0->isVectorTy() && T1->isVectorTy() && + "Dot product of vector and scalar is not supported."); + + // A vector sext or sitofp should have happened + assert(T0->getScalarType() == T1->getScalarType() && + "Dot product of vectors need the same element types."); + + auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>(); + [[maybe_unused]] auto *VecTy1 = + E->getArg(1)->getType()->getAs<VectorType>(); + // A HLSLVectorTruncation should have happend + assert(VecTy0->getNumElements() == VecTy1->getNumElements() && + "Dot product requires vectors to be of the same size."); + + return Builder.CreateIntrinsic( + /*ReturnType=*/T0->getScalarType(), + getDotProductIntrinsic(E->getArg(0)->getType(), + VecTy0->getNumElements()), + ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot"); + } break; + case Builtin::BI__builtin_hlsl_lerp: { + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *S = EmitScalarExpr(E->getArg(2)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("lerp operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), + ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp"); + } + case Builtin::BI__builtin_hlsl_elementwise_frac: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("frac operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac, + ArrayRef<Value *>{Op0}, nullptr, "dx.frac"); + } + case Builtin::BI__builtin_hlsl_elementwise_isinf: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *Xty = Op0->getType(); + llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext()); + if (Xty->isVectorTy()) { + auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>(); + retType = llvm::VectorType::get( + retType, ElementCount::getFixed(XVecTy->getNumElements())); + } + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("isinf operand must have a float representation"); + return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf, + ArrayRef<Value *>{Op0}, nullptr, "dx.isinf"); + } + case Builtin::BI__builtin_hlsl_mad: { + Value *M = EmitScalarExpr(E->getArg(0)); + Value *A = EmitScalarExpr(E->getArg(1)); + Value *B = EmitScalarExpr(E->getArg(2)); + if (E->getArg(0)->getType()->hasFloatingRepresentation()) + return Builder.CreateIntrinsic( + /*ReturnType*/ M->getType(), Intrinsic::fmuladd, + ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad"); + + if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { + if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) + return Builder.CreateIntrinsic( + /*ReturnType*/ M->getType(), Intrinsic::dx_imad, + ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad"); + + Value *Mul = Builder.CreateNSWMul(M, A); + return Builder.CreateNSWAdd(Mul, B); + } + assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); + if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) + return Builder.CreateIntrinsic( + /*ReturnType=*/M->getType(), Intrinsic::dx_umad, + ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad"); + + Value *Mul = Builder.CreateNUWMul(M, A); + return Builder.CreateNUWAdd(Mul, B); + } + case Builtin::BI__builtin_hlsl_elementwise_rcp: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("rcp operand must have a float representation"); + llvm::Type *Ty = Op0->getType(); + llvm::Type *EltTy = Ty->getScalarType(); + Constant *One = Ty->isVectorTy() + ? ConstantVector::getSplat( + ElementCount::getFixed( + cast<FixedVectorType>(Ty)->getNumElements()), + ConstantFP::get(EltTy, 1.0)) + : ConstantFP::get(EltTy, 1.0); + return Builder.CreateFDiv(One, Op0, "hlsl.rcp"); + } + case Builtin::BI__builtin_hlsl_elementwise_rsqrt: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("rsqrt operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), + ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt"); + } + case Builtin::BI__builtin_hlsl_wave_get_lane_index: { + return EmitRuntimeCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", + {}, false, true)); + } + } + return nullptr; +} + +void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, + const CallExpr *E) { + constexpr const char *Tag = "amdgpu-as"; + + LLVMContext &Ctx = Inst->getContext(); + SmallVector<MMRAMetadata::TagT, 3> MMRAs; + for (unsigned K = 2; K < E->getNumArgs(); ++K) { + llvm::Value *V = EmitScalarExpr(E->getArg(K)); + StringRef AS; + if (llvm::getConstantStringInfo(V, AS)) { + MMRAs.push_back({Tag, AS}); + // TODO: Delete the resulting unused constant? + continue; + } + CGM.Error(E->getExprLoc(), + "expected an address space name as a string literal"); + } + + llvm::sort(MMRAs); + MMRAs.erase(llvm::unique(MMRAs), MMRAs.end()); + Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; @@ -17903,9 +18561,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_ds_swizzle: - return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_ds_swizzle); case AMDGPU::BI__builtin_amdgcn_mov_dpp8: - return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; @@ -17925,42 +18585,63 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } + case AMDGPU::BI__builtin_amdgcn_permlane16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: + return emitBuiltinWithOneOverloadedType<6>( + *this, E, + BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 + ? Intrinsic::amdgcn_permlane16 + : Intrinsic::amdgcn_permlanex16); + case AMDGPU::BI__builtin_amdgcn_permlane64: + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_permlane64); + case AMDGPU::BI__builtin_amdgcn_readlane: + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_readlane); + case AMDGPU::BI__builtin_amdgcn_readfirstlane: + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_readfirstlane); case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_div_fixup); case AMDGPU::BI__builtin_amdgcn_trig_preop: case AMDGPU::BI__builtin_amdgcn_trig_preopf: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); case AMDGPU::BI__builtin_amdgcn_rcp: case AMDGPU::BI__builtin_amdgcn_rcpf: case AMDGPU::BI__builtin_amdgcn_rcph: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp); case AMDGPU::BI__builtin_amdgcn_sqrt: case AMDGPU::BI__builtin_amdgcn_sqrtf: case AMDGPU::BI__builtin_amdgcn_sqrth: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_sqrt); case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: case AMDGPU::BI__builtin_amdgcn_rsqh: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq); case AMDGPU::BI__builtin_amdgcn_rsq_clamp: case AMDGPU::BI__builtin_amdgcn_rsq_clampf: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_rsq_clamp); case AMDGPU::BI__builtin_amdgcn_sinf: case AMDGPU::BI__builtin_amdgcn_sinh: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin); case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); case AMDGPU::BI__builtin_amdgcn_logf: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log); + return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log); case AMDGPU::BI__builtin_amdgcn_exp2f: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_exp2); case AMDGPU::BI__builtin_amdgcn_log_clampf: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: case AMDGPU::BI__builtin_amdgcn_ldexpf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); @@ -17981,7 +18662,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_frexp_mant); case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); @@ -17998,13 +18680,17 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fract: case AMDGPU::BI__builtin_amdgcn_fractf: case AMDGPU::BI__builtin_amdgcn_fracth: - return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_lerp); case AMDGPU::BI__builtin_amdgcn_ubfe: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_ubfe); case AMDGPU::BI__builtin_amdgcn_sbfe: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_ballot_w32: case AMDGPU::BI__builtin_amdgcn_ballot_w64: { llvm::Type *ResultType = ConvertType(E->getType()); @@ -18042,7 +18728,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + return emitBuiltinWithOneOverloadedType<3>(*this, E, + Intrinsic::amdgcn_fmed3); case AMDGPU::BI__builtin_amdgcn_ds_append: case AMDGPU::BI__builtin_amdgcn_ds_consume: { Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? @@ -18051,32 +18738,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { - Intrinsic::ID Intrin; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - Intrin = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - Intrin = Intrinsic::amdgcn_ds_fmin; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: - Intrin = Intrinsic::amdgcn_ds_fmax; - break; - } - llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); - llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *Src4 = EmitScalarExpr(E->getArg(4)); - llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() }); - llvm::FunctionType *FTy = F->getFunctionType(); - llvm::Type *PTy = FTy->getParamType(0); - Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy); - return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 }); - } case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: @@ -18149,74 +18810,46 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); return Builder.CreateCall(F, {Addr, Val}); } - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: { + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: { + Intrinsic::ID IID; - llvm::Type *ArgTy; switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: - ArgTy = llvm::Type::getFloatTy(getLLVMContext()); - IID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: - ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); - IID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 2); - IID = Intrinsic::amdgcn_ds_fadd; + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: + IID = Intrinsic::amdgcn_global_load_tr_b64; + break; + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: + case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: + IID = Intrinsic::amdgcn_global_load_tr_b128; break; } + llvm::Type *LoadTy = ConvertType(E->getType()); llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); - llvm::Value *Val = EmitScalarExpr(E->getArg(1)); - llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue( - llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true)); - llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue( - llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0)); - llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy}); - return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); - } - case AMDGPU::BI__builtin_amdgcn_global_load_tr_i32: - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v2i32: - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4f16: - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4i16: - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8f16: - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8i16: { - - llvm::Type *ArgTy; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_global_load_tr_i32: - ArgTy = llvm::Type::getInt32Ty(getLLVMContext()); - break; - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v2i32: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getInt32Ty(getLLVMContext()), 2); - break; - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 4); - break; - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v4i16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getInt16Ty(getLLVMContext()), 4); - break; - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8f16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getHalfTy(getLLVMContext()), 8); - break; - case AMDGPU::BI__builtin_amdgcn_global_load_tr_v8i16: - ArgTy = llvm::FixedVectorType::get( - llvm::Type::getInt16Ty(getLLVMContext()), 8); - break; - } - - llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); - llvm::Function *F = - CGM.getIntrinsic(Intrinsic::amdgcn_global_load_tr, {ArgTy}); + llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy}); return Builder.CreateCall(F, {Addr}); } + case AMDGPU::BI__builtin_amdgcn_get_fpenv: { + Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, + {llvm::Type::getInt64Ty(getLLVMContext())}); + return Builder.CreateCall(F); + } + case AMDGPU::BI__builtin_amdgcn_set_fpenv: { + Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv, + {llvm::Type::getInt64Ty(getLLVMContext())}); + llvm::Value *Env = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall(F, {Env}); + } case AMDGPU::BI__builtin_amdgcn_read_exec: return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: @@ -18357,7 +18990,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: AppendFalseForOpselArg = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB @@ -18366,7 +18999,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: AppendFalseForOpselArg = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB @@ -18519,7 +19152,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: - return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); + return emitBuiltinWithOneOverloadedType<1>(*this, E, + Intrinsic::r600_recipsqrt_ieee); case AMDGPU::BI__builtin_r600_read_tidig_x: return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); case AMDGPU::BI__builtin_r600_read_tidig_y: @@ -18536,12 +19170,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fence: { ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), AO, SSID); - return Builder.CreateFence(AO, SSID); + FenceInst *Fence = Builder.CreateFence(AO, SSID); + if (E->getNumArgs() > 2) + AddAMDGPUFenceAddressSpaceMMRA(Fence, E); + return Fence; } case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: case AMDGPU::BI__builtin_amdgcn_atomic_dec32: - case AMDGPU::BI__builtin_amdgcn_atomic_dec64: { + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: @@ -18552,23 +19196,62 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_atomic_dec64: BinOp = llvm::AtomicRMWInst::UDecWrap; break; + case AMDGPU::BI__builtin_amdgcn_ds_faddf: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: + case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: + BinOp = llvm::AtomicRMWInst::FAdd; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fminf: + BinOp = llvm::AtomicRMWInst::FMin; + break; + case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: + BinOp = llvm::AtomicRMWInst::FMax; + break; } Address Ptr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Type *OrigTy = Val->getType(); + QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); - ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(3)), AO, SSID); + bool Volatile; - QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); - bool Volatile = - PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf || + BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf || + BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) { + // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument + Volatile = + cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue(); + } else { + // Infer volatile from the passed type. + Volatile = + PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); + } + + if (E->getNumArgs() >= 4) { + // Some of the builtins have explicit ordering and scope arguments. + ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)), AO, SSID); + } else { + // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. + SSID = llvm::SyncScope::System; + AO = AtomicOrdering::SequentiallyConsistent; + + // The v2bf16 builtin uses i16 instead of a natural bfloat type. + if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { + llvm::Type *V2BF16Ty = FixedVectorType::get( + llvm::Type::getBFloatTy(Builder.getContext()), 2); + Val = Builder.CreateBitCast(Val, V2BF16Ty); + } + } llvm::AtomicRMWInst *RMW = Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); - return RMW; + return Builder.CreateBitCast(RMW, OrigTy); } case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { @@ -18579,6 +19262,50 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); return Builder.CreateCall(F, {Arg}); } + case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: + return emitBuiltinWithOneOverloadedType<4>( + *this, E, Intrinsic::amdgcn_make_buffer_rsrc); + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128: + return emitBuiltinWithOneOverloadedType<5>( + *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store); + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: { + llvm::Type *RetTy = nullptr; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: + RetTy = Int8Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: + RetTy = Int16Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: + RetTy = Int32Ty; + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: + RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: + RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3); + break; + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: + RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4); + break; + } + Function *F = + CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy); + return Builder.CreateCall( + F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); + } default: return nullptr; } @@ -19734,14 +20461,14 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, // Save returned values. assert(II.NumResults); if (II.NumResults == 1) { - Builder.CreateAlignedStore(Result, Dst.getPointer(), + Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this), CharUnits::fromQuantity(4)); } else { for (unsigned i = 0; i < II.NumResults; ++i) { Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), Dst.getElementType()), - Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(), + Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); } @@ -19781,7 +20508,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i < II.NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Src.getElementType(), - Builder.CreateGEP(Src.getElementType(), Src.getPointer(), + Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, ParamType)); @@ -19853,7 +20580,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i < MI.NumEltsA; ++i) { Value *V = Builder.CreateAlignedLoad( SrcA.getElementType(), - Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(), + Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, AType)); @@ -19863,7 +20590,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i < MI.NumEltsB; ++i) { Value *V = Builder.CreateAlignedLoad( SrcB.getElementType(), - Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(), + Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, BType)); @@ -19874,7 +20601,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i < MI.NumEltsC; ++i) { Value *V = Builder.CreateAlignedLoad( SrcC.getElementType(), - Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(), + Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, CType)); @@ -19884,7 +20611,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i < MI.NumEltsD; ++i) Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), - Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(), + Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); return Result; @@ -20142,7 +20869,7 @@ struct BuiltinAlignArgs { BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { QualType AstType = E->getArg(0)->getType(); if (AstType->isArrayType()) - Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF); else Src = CGF.EmitScalarExpr(E->getArg(0)); SrcType = Src->getType(); @@ -20318,6 +21045,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_min_f32: case WebAssembly::BI__builtin_wasm_min_f64: + case WebAssembly::BI__builtin_wasm_min_f16x8: case WebAssembly::BI__builtin_wasm_min_f32x4: case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20328,6 +21056,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_max_f32: case WebAssembly::BI__builtin_wasm_max_f64: + case WebAssembly::BI__builtin_wasm_max_f16x8: case WebAssembly::BI__builtin_wasm_max_f32x4: case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20336,6 +21065,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f16x8: case WebAssembly::BI__builtin_wasm_pmin_f32x4: case WebAssembly::BI__builtin_wasm_pmin_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20344,6 +21074,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmax_f16x8: case WebAssembly::BI__builtin_wasm_pmax_f32x4: case WebAssembly::BI__builtin_wasm_pmax_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20642,6 +21373,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } + case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: @@ -20651,10 +21384,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *C = EmitScalarExpr(E->getArg(2)); unsigned IntNo; switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: IntNo = Intrinsic::wasm_relaxed_madd; break; + case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: IntNo = Intrinsic::wasm_relaxed_nmadd; @@ -20758,9 +21493,31 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); return Builder.CreateCall(Callee, {LHS, RHS, Acc}); } + case WebAssembly::BI__builtin_wasm_loadf16_f32: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32); + return Builder.CreateCall(Callee, {Addr}); + } + case WebAssembly::BI__builtin_wasm_storef16_f32: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Value *Addr = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32); + return Builder.CreateCall(Callee, {Val, Addr}); + } + case WebAssembly::BI__builtin_wasm_splat_f16x8: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8); + return Builder.CreateCall(Callee, {Val}); + } + case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: { + Value *Vector = EmitScalarExpr(E->getArg(0)); + Value *Index = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8); + return Builder.CreateCall(Callee, {Vector, Index}); + } case WebAssembly::BI__builtin_wasm_table_get: { assert(E->getArg(0)->getType()->isArrayType()); - Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Index = EmitScalarExpr(E->getArg(1)); Function *Callee; if (E->getType().isWebAssemblyExternrefType()) @@ -20774,7 +21531,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_table_set: { assert(E->getArg(0)->getType()->isArrayType()); - Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Index = EmitScalarExpr(E->getArg(1)); Value *Val = EmitScalarExpr(E->getArg(2)); Function *Callee; @@ -20789,13 +21546,13 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_table_size: { assert(E->getArg(0)->getType()->isArrayType()); - Value *Value = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size); return Builder.CreateCall(Callee, Value); } case WebAssembly::BI__builtin_wasm_table_grow: { assert(E->getArg(0)->getType()->isArrayType()); - Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Val = EmitScalarExpr(E->getArg(1)); Value *NElems = EmitScalarExpr(E->getArg(2)); @@ -20812,7 +21569,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_table_fill: { assert(E->getArg(0)->getType()->isArrayType()); - Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); + Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Index = EmitScalarExpr(E->getArg(1)); Value *Val = EmitScalarExpr(E->getArg(2)); Value *NElems = EmitScalarExpr(E->getArg(3)); @@ -20830,8 +21587,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_table_copy: { assert(E->getArg(0)->getType()->isArrayType()); - Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).getPointer(); - Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).getPointer(); + Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); + Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this); Value *DstIdx = EmitScalarExpr(E->getArg(2)); Value *SrcIdx = EmitScalarExpr(E->getArg(3)); Value *NElems = EmitScalarExpr(E->getArg(4)); @@ -20910,7 +21667,7 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { // The base pointer is passed by address, so it needs to be loaded. Address A = EmitPointerWithAlignment(E->getArg(0)); - Address BP = Address(A.getPointer(), Int8PtrTy, A.getAlignment()); + Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment()); llvm::Value *Base = Builder.CreateLoad(BP); // The treatment of both loads and stores is the same: the arguments for // the builtin are the same as the arguments for the intrinsic. @@ -20951,8 +21708,8 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression // per call. Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); - DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment()); - llvm::Value *DestAddress = DestAddr.getPointer(); + DestAddr = DestAddr.withElementType(Int8Ty); + llvm::Value *DestAddress = DestAddr.emitRawPointer(*this); // Operands are Base, Dest, Modifier. // The intrinsic format in LLVM IR is defined as @@ -21003,8 +21760,8 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); - Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), - PredAddr.getAlignment()); + Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this), + PredAddr.getAlignment()); return Builder.CreateExtractValue(Result, 0); } // These are identical to the builtins above, except they don't consume @@ -21022,8 +21779,8 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); - Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), - PredAddr.getAlignment()); + Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this), + PredAddr.getAlignment()); return Builder.CreateExtractValue(Result, 0); } @@ -21120,7 +21877,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, // Handle aggregate argument, namely RVV tuple types in segment load/store if (hasAggregateEvaluationKind(E->getArg(i)->getType())) { LValue L = EmitAggExprToLValue(E->getArg(i)); - llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this)); + llvm::Value *AggValue = Builder.CreateLoad(L.getAddress()); Ops.push_back(AggValue); continue; } |