summaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--lib/CodeGen/CGBuiltin.cpp1521
1 files changed, 1168 insertions, 353 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index f3527b0f39d1..3ecd1c6697d7 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -16,6 +16,7 @@
#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "ConstantEmitter.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
@@ -29,6 +30,9 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/TargetParser.h"
#include <sstream>
using namespace clang;
@@ -641,6 +645,287 @@ struct CallObjCArcUse final : EHScopeStack::Cleanup {
};
}
+Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
+ BuiltinCheckKind Kind) {
+ assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
+ && "Unsupported builtin check kind");
+
+ Value *ArgValue = EmitScalarExpr(E);
+ if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
+ return ArgValue;
+
+ SanitizerScope SanScope(this);
+ Value *Cond = Builder.CreateICmpNE(
+ ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
+ EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
+ SanitizerHandler::InvalidBuiltin,
+ {EmitCheckSourceLocation(E->getExprLoc()),
+ llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
+ None);
+ return ArgValue;
+}
+
+/// Get the argument type for arguments to os_log_helper.
+static CanQualType getOSLogArgType(ASTContext &C, int Size) {
+ QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
+ return C.getCanonicalType(UnsignedTy);
+}
+
+llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
+ const analyze_os_log::OSLogBufferLayout &Layout,
+ CharUnits BufferAlignment) {
+ ASTContext &Ctx = getContext();
+
+ llvm::SmallString<64> Name;
+ {
+ raw_svector_ostream OS(Name);
+ OS << "__os_log_helper";
+ OS << "_" << BufferAlignment.getQuantity();
+ OS << "_" << int(Layout.getSummaryByte());
+ OS << "_" << int(Layout.getNumArgsByte());
+ for (const auto &Item : Layout.Items)
+ OS << "_" << int(Item.getSizeByte()) << "_"
+ << int(Item.getDescriptorByte());
+ }
+
+ if (llvm::Function *F = CGM.getModule().getFunction(Name))
+ return F;
+
+ llvm::SmallVector<ImplicitParamDecl, 4> Params;
+ Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
+ Ctx.VoidPtrTy, ImplicitParamDecl::Other);
+
+ for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
+ char Size = Layout.Items[I].getSizeByte();
+ if (!Size)
+ continue;
+
+ Params.emplace_back(
+ Ctx, nullptr, SourceLocation(),
+ &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
+ getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
+ }
+
+ FunctionArgList Args;
+ for (auto &P : Params)
+ Args.push_back(&P);
+
+ // The helper function has linkonce_odr linkage to enable the linker to merge
+ // identical functions. To ensure the merging always happens, 'noinline' is
+ // attached to the function when compiling with -Oz.
+ const CGFunctionInfo &FI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+ llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
+ llvm::Function *Fn = llvm::Function::Create(
+ FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
+ Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
+
+ // Attach 'noinline' at -Oz.
+ if (CGM.getCodeGenOpts().OptimizeSize == 2)
+ Fn->addFnAttr(llvm::Attribute::NoInline);
+
+ auto NL = ApplyDebugLocation::CreateEmpty(*this);
+ IdentifierInfo *II = &Ctx.Idents.get(Name);
+ FunctionDecl *FD = FunctionDecl::Create(
+ Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+
+ StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
+
+ // Create a scope with an artificial location for the body of this function.
+ auto AL = ApplyDebugLocation::CreateArtificial(*this);
+
+ CharUnits Offset;
+ Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
+ BufferAlignment);
+ Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
+ Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
+ Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
+ Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
+
+ unsigned I = 1;
+ for (const auto &Item : Layout.Items) {
+ Builder.CreateStore(
+ Builder.getInt8(Item.getDescriptorByte()),
+ Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
+ Builder.CreateStore(
+ Builder.getInt8(Item.getSizeByte()),
+ Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
+
+ CharUnits Size = Item.size();
+ if (!Size.getQuantity())
+ continue;
+
+ Address Arg = GetAddrOfLocalVar(&Params[I]);
+ Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
+ Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
+ "argDataCast");
+ Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
+ Offset += Size;
+ ++I;
+ }
+
+ FinishFunction();
+
+ return Fn;
+}
+
+RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
+ assert(E.getNumArgs() >= 2 &&
+ "__builtin_os_log_format takes at least 2 arguments");
+ ASTContext &Ctx = getContext();
+ analyze_os_log::OSLogBufferLayout Layout;
+ analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
+ Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
+ llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
+
+ // Ignore argument 1, the format string. It is not currently used.
+ CallArgList Args;
+ Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
+
+ for (const auto &Item : Layout.Items) {
+ int Size = Item.getSizeByte();
+ if (!Size)
+ continue;
+
+ llvm::Value *ArgVal;
+
+ if (const Expr *TheExpr = Item.getExpr()) {
+ ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
+
+ // Check if this is a retainable type.
+ if (TheExpr->getType()->isObjCRetainableType()) {
+ assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
+ "Only scalar can be a ObjC retainable type");
+ // Check if the object is constant, if not, save it in
+ // RetainableOperands.
+ if (!isa<Constant>(ArgVal))
+ RetainableOperands.push_back(ArgVal);
+ }
+ } else {
+ ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
+ }
+
+ unsigned ArgValSize =
+ CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
+ llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
+ ArgValSize);
+ ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
+ CanQualType ArgTy = getOSLogArgType(Ctx, Size);
+ // If ArgVal has type x86_fp80, zero-extend ArgVal.
+ ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
+ Args.add(RValue::get(ArgVal), ArgTy);
+ }
+
+ const CGFunctionInfo &FI =
+ CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
+ llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
+ Layout, BufAddr.getAlignment());
+ EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
+
+ // Push a clang.arc.use cleanup for each object in RetainableOperands. The
+ // cleanup will cause the use to appear after the final log call, keeping
+ // the object valid while it’s held in the log buffer. Note that if there’s
+ // a release cleanup on the object, it will already be active; since
+ // cleanups are emitted in reverse order, the use will occur before the
+ // object is released.
+ if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
+ CGM.getCodeGenOpts().OptimizationLevel != 0)
+ for (llvm::Value *Object : RetainableOperands)
+ pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
+
+ return RValue::get(BufAddr.getPointer());
+}
+
+/// Determine if a binop is a checked mixed-sign multiply we can specialize.
+static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
+ WidthAndSignedness Op1Info,
+ WidthAndSignedness Op2Info,
+ WidthAndSignedness ResultInfo) {
+ return BuiltinID == Builtin::BI__builtin_mul_overflow &&
+ Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
+ Op1Info.Signed != Op2Info.Signed;
+}
+
+/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
+/// the generic checked-binop irgen.
+static RValue
+EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
+ WidthAndSignedness Op1Info, const clang::Expr *Op2,
+ WidthAndSignedness Op2Info,
+ const clang::Expr *ResultArg, QualType ResultQTy,
+ WidthAndSignedness ResultInfo) {
+ assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
+ Op2Info, ResultInfo) &&
+ "Not a mixed-sign multipliction we can specialize");
+
+ // Emit the signed and unsigned operands.
+ const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
+ const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
+ llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
+ llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
+
+ llvm::Type *OpTy = Signed->getType();
+ llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
+ Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
+ llvm::Type *ResTy = ResultPtr.getElementType();
+
+ // Take the absolute value of the signed operand.
+ llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
+ llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
+ llvm::Value *AbsSigned =
+ CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
+
+ // Perform a checked unsigned multiplication.
+ llvm::Value *UnsignedOverflow;
+ llvm::Value *UnsignedResult =
+ EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
+ Unsigned, UnsignedOverflow);
+
+ llvm::Value *Overflow, *Result;
+ if (ResultInfo.Signed) {
+ // Signed overflow occurs if the result is greater than INT_MAX or lesser
+ // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
+ auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
+ .zextOrSelf(Op1Info.Width);
+ llvm::Value *MaxResult =
+ CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
+ CGF.Builder.CreateZExt(IsNegative, OpTy));
+ llvm::Value *SignedOverflow =
+ CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
+ Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
+
+ // Prepare the signed result (possibly by negating it).
+ llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
+ llvm::Value *SignedResult =
+ CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
+ Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
+ } else {
+ // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
+ llvm::Value *Underflow = CGF.Builder.CreateAnd(
+ IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
+ Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
+ if (ResultInfo.Width < Op1Info.Width) {
+ auto IntMax =
+ llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
+ llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
+ UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
+ Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
+ }
+
+ Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
+ }
+ assert(Overflow && Result && "Missing overflow or result");
+
+ bool isVolatile =
+ ResultArg->getType()->getPointeeType().isVolatileQualified();
+ CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
+ isVolatile);
+ return RValue::get(Overflow);
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -656,11 +941,196 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Result.Val.getFloat()));
}
+ // There are LLVM math intrinsics/instructions corresponding to math library
+ // functions except the LLVM op will never set errno while the math library
+ // might. Also, math builtins have the same semantics as their math library
+ // twins. Thus, we can transform math library and builtin calls to their
+ // LLVM counterparts if the call is marked 'const' (known to never set errno).
+ if (FD->hasAttr<ConstAttr>()) {
+ switch (BuiltinID) {
+ case Builtin::BIceil:
+ case Builtin::BIceilf:
+ case Builtin::BIceill:
+ case Builtin::BI__builtin_ceil:
+ case Builtin::BI__builtin_ceilf:
+ case Builtin::BI__builtin_ceill:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
+
+ case Builtin::BIcopysign:
+ case Builtin::BIcopysignf:
+ case Builtin::BIcopysignl:
+ case Builtin::BI__builtin_copysign:
+ case Builtin::BI__builtin_copysignf:
+ case Builtin::BI__builtin_copysignl:
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
+
+ case Builtin::BIcos:
+ case Builtin::BIcosf:
+ case Builtin::BIcosl:
+ case Builtin::BI__builtin_cos:
+ case Builtin::BI__builtin_cosf:
+ case Builtin::BI__builtin_cosl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
+
+ case Builtin::BIexp:
+ case Builtin::BIexpf:
+ case Builtin::BIexpl:
+ case Builtin::BI__builtin_exp:
+ case Builtin::BI__builtin_expf:
+ case Builtin::BI__builtin_expl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
+
+ case Builtin::BIexp2:
+ case Builtin::BIexp2f:
+ case Builtin::BIexp2l:
+ case Builtin::BI__builtin_exp2:
+ case Builtin::BI__builtin_exp2f:
+ case Builtin::BI__builtin_exp2l:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
+
+ case Builtin::BIfabs:
+ case Builtin::BIfabsf:
+ case Builtin::BIfabsl:
+ case Builtin::BI__builtin_fabs:
+ case Builtin::BI__builtin_fabsf:
+ case Builtin::BI__builtin_fabsl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
+
+ case Builtin::BIfloor:
+ case Builtin::BIfloorf:
+ case Builtin::BIfloorl:
+ case Builtin::BI__builtin_floor:
+ case Builtin::BI__builtin_floorf:
+ case Builtin::BI__builtin_floorl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
+
+ case Builtin::BIfma:
+ case Builtin::BIfmaf:
+ case Builtin::BIfmal:
+ case Builtin::BI__builtin_fma:
+ case Builtin::BI__builtin_fmaf:
+ case Builtin::BI__builtin_fmal:
+ return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
+
+ case Builtin::BIfmax:
+ case Builtin::BIfmaxf:
+ case Builtin::BIfmaxl:
+ case Builtin::BI__builtin_fmax:
+ case Builtin::BI__builtin_fmaxf:
+ case Builtin::BI__builtin_fmaxl:
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
+
+ case Builtin::BIfmin:
+ case Builtin::BIfminf:
+ case Builtin::BIfminl:
+ case Builtin::BI__builtin_fmin:
+ case Builtin::BI__builtin_fminf:
+ case Builtin::BI__builtin_fminl:
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
+
+ // fmod() is a special-case. It maps to the frem instruction rather than an
+ // LLVM intrinsic.
+ case Builtin::BIfmod:
+ case Builtin::BIfmodf:
+ case Builtin::BIfmodl:
+ case Builtin::BI__builtin_fmod:
+ case Builtin::BI__builtin_fmodf:
+ case Builtin::BI__builtin_fmodl: {
+ Value *Arg1 = EmitScalarExpr(E->getArg(0));
+ Value *Arg2 = EmitScalarExpr(E->getArg(1));
+ return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
+ }
+
+ case Builtin::BIlog:
+ case Builtin::BIlogf:
+ case Builtin::BIlogl:
+ case Builtin::BI__builtin_log:
+ case Builtin::BI__builtin_logf:
+ case Builtin::BI__builtin_logl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
+
+ case Builtin::BIlog10:
+ case Builtin::BIlog10f:
+ case Builtin::BIlog10l:
+ case Builtin::BI__builtin_log10:
+ case Builtin::BI__builtin_log10f:
+ case Builtin::BI__builtin_log10l:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
+
+ case Builtin::BIlog2:
+ case Builtin::BIlog2f:
+ case Builtin::BIlog2l:
+ case Builtin::BI__builtin_log2:
+ case Builtin::BI__builtin_log2f:
+ case Builtin::BI__builtin_log2l:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
+
+ case Builtin::BInearbyint:
+ case Builtin::BInearbyintf:
+ case Builtin::BInearbyintl:
+ case Builtin::BI__builtin_nearbyint:
+ case Builtin::BI__builtin_nearbyintf:
+ case Builtin::BI__builtin_nearbyintl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
+
+ case Builtin::BIpow:
+ case Builtin::BIpowf:
+ case Builtin::BIpowl:
+ case Builtin::BI__builtin_pow:
+ case Builtin::BI__builtin_powf:
+ case Builtin::BI__builtin_powl:
+ return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
+
+ case Builtin::BIrint:
+ case Builtin::BIrintf:
+ case Builtin::BIrintl:
+ case Builtin::BI__builtin_rint:
+ case Builtin::BI__builtin_rintf:
+ case Builtin::BI__builtin_rintl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
+
+ case Builtin::BIround:
+ case Builtin::BIroundf:
+ case Builtin::BIroundl:
+ case Builtin::BI__builtin_round:
+ case Builtin::BI__builtin_roundf:
+ case Builtin::BI__builtin_roundl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
+
+ case Builtin::BIsin:
+ case Builtin::BIsinf:
+ case Builtin::BIsinl:
+ case Builtin::BI__builtin_sin:
+ case Builtin::BI__builtin_sinf:
+ case Builtin::BI__builtin_sinl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
+
+ case Builtin::BIsqrt:
+ case Builtin::BIsqrtf:
+ case Builtin::BIsqrtl:
+ case Builtin::BI__builtin_sqrt:
+ case Builtin::BI__builtin_sqrtf:
+ case Builtin::BI__builtin_sqrtl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
+
+ case Builtin::BItrunc:
+ case Builtin::BItruncf:
+ case Builtin::BItruncl:
+ case Builtin::BI__builtin_trunc:
+ case Builtin::BI__builtin_truncf:
+ case Builtin::BI__builtin_truncl:
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
+
+ default:
+ break;
+ }
+ }
+
switch (BuiltinID) {
- default: break; // Handle intrinsics and libm functions below.
+ default: break;
case Builtin::BI__builtin___CFStringMakeConstantString:
case Builtin::BI__builtin___NSStringMakeConstantString:
- return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
+ return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
case Builtin::BI__builtin_stdarg_start:
case Builtin::BI__builtin_va_start:
case Builtin::BI__va_start:
@@ -696,64 +1166,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Result);
}
- case Builtin::BI__builtin_fabs:
- case Builtin::BI__builtin_fabsf:
- case Builtin::BI__builtin_fabsl: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
- }
- case Builtin::BI__builtin_fmod:
- case Builtin::BI__builtin_fmodf:
- case Builtin::BI__builtin_fmodl: {
- Value *Arg1 = EmitScalarExpr(E->getArg(0));
- Value *Arg2 = EmitScalarExpr(E->getArg(1));
- Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
- return RValue::get(Result);
- }
- case Builtin::BI__builtin_copysign:
- case Builtin::BI__builtin_copysignf:
- case Builtin::BI__builtin_copysignl: {
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
- }
- case Builtin::BI__builtin_ceil:
- case Builtin::BI__builtin_ceilf:
- case Builtin::BI__builtin_ceill: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
- }
- case Builtin::BI__builtin_floor:
- case Builtin::BI__builtin_floorf:
- case Builtin::BI__builtin_floorl: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
- }
- case Builtin::BI__builtin_trunc:
- case Builtin::BI__builtin_truncf:
- case Builtin::BI__builtin_truncl: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
- }
- case Builtin::BI__builtin_rint:
- case Builtin::BI__builtin_rintf:
- case Builtin::BI__builtin_rintl: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
- }
- case Builtin::BI__builtin_nearbyint:
- case Builtin::BI__builtin_nearbyintf:
- case Builtin::BI__builtin_nearbyintl: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
- }
- case Builtin::BI__builtin_round:
- case Builtin::BI__builtin_roundf:
- case Builtin::BI__builtin_roundl: {
- return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
- }
- case Builtin::BI__builtin_fmin:
- case Builtin::BI__builtin_fminf:
- case Builtin::BI__builtin_fminl: {
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
- }
- case Builtin::BI__builtin_fmax:
- case Builtin::BI__builtin_fmaxf:
- case Builtin::BI__builtin_fmaxl: {
- return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
- }
case Builtin::BI__builtin_conj:
case Builtin::BI__builtin_conjf:
case Builtin::BI__builtin_conjl: {
@@ -792,7 +1204,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_ctz:
case Builtin::BI__builtin_ctzl:
case Builtin::BI__builtin_ctzll: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
+ Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
llvm::Type *ArgType = ArgValue->getType();
Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
@@ -809,7 +1221,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_clz:
case Builtin::BI__builtin_clzl:
case Builtin::BI__builtin_clzll: {
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
+ Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
llvm::Type *ArgType = ArgValue->getType();
Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
@@ -1234,7 +1646,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
E->getArg(0)->getExprLoc(), FD, 0);
Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
- return RValue::get(Dest.getPointer());
+ return RValue::get(nullptr);
}
case Builtin::BImemcpy:
case Builtin::BI__builtin_memcpy: {
@@ -1346,8 +1758,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::ConstantInt::get(Int32Ty, Offset)));
}
case Builtin::BI__builtin_return_address: {
- Value *Depth =
- CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
+ Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
+ getContext().UnsignedIntTy);
Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
return RValue::get(Builder.CreateCall(F, Depth));
}
@@ -1356,8 +1768,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
}
case Builtin::BI__builtin_frame_address: {
- Value *Depth =
- CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
+ Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
+ getContext().UnsignedIntTy);
Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
return RValue::get(Builder.CreateCall(F, Depth));
}
@@ -1875,56 +2287,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(nullptr);
}
- // Library functions with special handling.
- case Builtin::BIsqrt:
- case Builtin::BIsqrtf:
- case Builtin::BIsqrtl: {
- // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
- // in finite- or unsafe-math mode (the intrinsic has different semantics
- // for handling negative numbers compared to the library function, so
- // -fmath-errno=0 is not enough).
- if (!FD->hasAttr<ConstAttr>())
- break;
- if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
- CGM.getCodeGenOpts().NoNaNsFPMath))
- break;
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = Arg0->getType();
- Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
- return RValue::get(Builder.CreateCall(F, Arg0));
- }
-
- case Builtin::BI__builtin_pow:
- case Builtin::BI__builtin_powf:
- case Builtin::BI__builtin_powl:
- case Builtin::BIpow:
- case Builtin::BIpowf:
- case Builtin::BIpowl: {
- // Transform a call to pow* into a @llvm.pow.* intrinsic call.
- if (!FD->hasAttr<ConstAttr>())
- break;
- Value *Base = EmitScalarExpr(E->getArg(0));
- Value *Exponent = EmitScalarExpr(E->getArg(1));
- llvm::Type *ArgType = Base->getType();
- Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
- return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
- }
-
- case Builtin::BIfma:
- case Builtin::BIfmaf:
- case Builtin::BIfmal:
- case Builtin::BI__builtin_fma:
- case Builtin::BI__builtin_fmaf:
- case Builtin::BI__builtin_fmal: {
- // Rewrite fma to intrinsic.
- Value *FirstArg = EmitScalarExpr(E->getArg(0));
- llvm::Type *ArgType = FirstArg->getType();
- Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
- return RValue::get(
- Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
- EmitScalarExpr(E->getArg(2))}));
- }
-
case Builtin::BI__builtin_signbit:
case Builtin::BI__builtin_signbitf:
case Builtin::BI__builtin_signbitl: {
@@ -1932,6 +2294,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
ConvertType(E->getType())));
}
+ case Builtin::BI__annotation: {
+ // Re-encode each wide string to UTF8 and make an MDString.
+ SmallVector<Metadata *, 1> Strings;
+ for (const Expr *Arg : E->arguments()) {
+ const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
+ assert(Str->getCharByteWidth() == 2);
+ StringRef WideBytes = Str->getBytes();
+ std::string StrUtf8;
+ if (!convertUTF16ToUTF8String(
+ makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
+ CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
+ continue;
+ }
+ Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
+ }
+
+ // Build and MDTuple of MDStrings and emit the intrinsic call.
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
+ MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
+ Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
+ return RValue::getIgnored();
+ }
case Builtin::BI__builtin_annotation: {
llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
@@ -2026,6 +2410,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
WidthAndSignedness ResultInfo =
getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
+
+ // Handle mixed-sign multiplication as a special case, because adding
+ // runtime or backend support for our generic irgen would be too expensive.
+ if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
+ return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
+ RightInfo, ResultArg, ResultQTy,
+ ResultInfo);
+
WidthAndSignedness EncompassingInfo =
EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
@@ -2560,12 +2952,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// The most basic form of the call with parameters:
// queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
Name = "__enqueue_kernel_basic";
- llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
+ llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
+ GenericVoidPtrTy};
llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- llvm::Value *Block = Builder.CreatePointerCast(
- EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
+ llvm::Value *Kernel =
+ Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ llvm::Value *Block =
+ Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
AttrBuilder B;
B.addAttribute(Attribute::ByVal);
@@ -2574,33 +2971,58 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
auto RTCall =
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
- {Queue, Flags, Range, Block});
+ {Queue, Flags, Range, Kernel, Block});
RTCall->setAttributes(ByValAttrSet);
return RValue::get(RTCall);
}
assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
+ // Create a temporary array to hold the sizes of local pointer arguments
+ // for the block. \p First is the position of the first size argument.
+ auto CreateArrayForSizeVar = [=](unsigned First) {
+ auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
+ auto *Arr = Builder.CreateAlloca(AT);
+ llvm::Value *Ptr;
+ // Each of the following arguments specifies the size of the corresponding
+ // argument passed to the enqueued block.
+ auto *Zero = llvm::ConstantInt::get(IntTy, 0);
+ for (unsigned I = First; I < NumArgs; ++I) {
+ auto *Index = llvm::ConstantInt::get(IntTy, I - First);
+ auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+ if (I == First)
+ Ptr = GEP;
+ auto *V =
+ Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
+ Builder.CreateAlignedStore(
+ V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
+ }
+ return Ptr;
+ };
+
// Could have events and/or vaargs.
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
Name = "__enqueue_kernel_vaargs";
- llvm::Value *Block = Builder.CreatePointerCast(
- EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
+ llvm::Value *Kernel =
+ Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
+ auto *PtrToSizeArray = CreateArrayForSizeVar(4);
+
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
- std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
- ConstantInt::get(IntTy, NumArgs - 4)};
- std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
- GenericVoidPtrTy, IntTy};
-
- // Each of the following arguments specifies the size of the corresponding
- // argument passed to the enqueued block.
- for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
- Args.push_back(
- Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+ std::vector<llvm::Value *> Args = {
+ Queue, Flags, Range,
+ Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
+ PtrToSizeArray};
+ std::vector<llvm::Type *> ArgTys = {
+ QueueTy, IntTy, RangeTy,
+ GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
+ PtrToSizeArray->getType()};
llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
return RValue::get(
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
llvm::ArrayRef<llvm::Value *>(Args)));
@@ -2621,15 +3043,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
- llvm::Value *Block = Builder.CreatePointerCast(
- EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
+ llvm::Value *Kernel =
+ Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ llvm::Value *Block =
+ Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
std::vector<llvm::Type *> ArgTys = {
- QueueTy, Int32Ty, RangeTy, Int32Ty,
- EventPtrTy, EventPtrTy, GenericVoidPtrTy};
+ QueueTy, Int32Ty, RangeTy, Int32Ty,
+ EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
- std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
- EventList, ClkEvent, Block};
+ std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
+ EventList, ClkEvent, Kernel, Block};
if (NumArgs == 7) {
// Has events but no variadics.
@@ -2646,14 +3072,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
ArgTys.push_back(Int32Ty);
Name = "__enqueue_kernel_events_vaargs";
- // Each of the following arguments specifies the size of the corresponding
- // argument passed to the enqueued block.
- for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
- Args.push_back(
- Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+ auto *PtrToSizeArray = CreateArrayForSizeVar(7);
+ Args.push_back(PtrToSizeArray);
+ ArgTys.push_back(PtrToSizeArray->getType());
llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
return RValue::get(
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
llvm::ArrayRef<llvm::Value *>(Args)));
@@ -2665,24 +3089,70 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BIget_kernel_work_group_size: {
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
- Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
+ Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
+ llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
+ false),
"__get_kernel_work_group_size_impl"),
- Arg));
+ {Kernel, Arg}));
}
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
- Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
+ Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
+ llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
+ false),
"__get_kernel_preferred_work_group_multiple_impl"),
- Arg));
+ {Kernel, Arg}));
+ }
+ case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
+ case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
+ llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
+ llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
+ Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
+ const char *Name =
+ BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
+ ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
+ : "__get_kernel_sub_group_count_for_ndrange_impl";
+ return RValue::get(Builder.CreateCall(
+ CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(
+ IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
+ false),
+ Name),
+ {NDRange, Kernel, Block}));
+ }
+
+ case Builtin::BI__builtin_store_half:
+ case Builtin::BI__builtin_store_halff: {
+ Value *Val = EmitScalarExpr(E->getArg(0));
+ Address Address = EmitPointerWithAlignment(E->getArg(1));
+ Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
+ return RValue::get(Builder.CreateStore(HalfVal, Address));
+ }
+ case Builtin::BI__builtin_load_half: {
+ Address Address = EmitPointerWithAlignment(E->getArg(0));
+ Value *HalfVal = Builder.CreateLoad(Address);
+ return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
+ }
+ case Builtin::BI__builtin_load_halff: {
+ Address Address = EmitPointerWithAlignment(E->getArg(0));
+ Value *HalfVal = Builder.CreateLoad(Address);
+ return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
}
case Builtin::BIprintf:
if (getTarget().getTriple().isNVPTX())
@@ -2699,69 +3169,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Fall through - it's already mapped to the intrinsic by GCCBuiltin.
break;
}
- case Builtin::BI__builtin_os_log_format: {
- assert(E->getNumArgs() >= 2 &&
- "__builtin_os_log_format takes at least 2 arguments");
- analyze_os_log::OSLogBufferLayout Layout;
- analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
- Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
- // Ignore argument 1, the format string. It is not currently used.
- CharUnits Offset;
- Builder.CreateStore(
- Builder.getInt8(Layout.getSummaryByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
- Builder.CreateStore(
- Builder.getInt8(Layout.getNumArgsByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
-
- llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
- for (const auto &Item : Layout.Items) {
- Builder.CreateStore(
- Builder.getInt8(Item.getDescriptorByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
- Builder.CreateStore(
- Builder.getInt8(Item.getSizeByte()),
- Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
- Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
- if (const Expr *TheExpr = Item.getExpr()) {
- Addr = Builder.CreateElementBitCast(
- Addr, ConvertTypeForMem(TheExpr->getType()));
- // Check if this is a retainable type.
- if (TheExpr->getType()->isObjCRetainableType()) {
- assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
- "Only scalar can be a ObjC retainable type");
- llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
- RValue RV = RValue::get(SV);
- LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
- EmitStoreThroughLValue(RV, LV);
- // Check if the object is constant, if not, save it in
- // RetainableOperands.
- if (!isa<Constant>(SV))
- RetainableOperands.push_back(SV);
- } else {
- EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
- }
- } else {
- Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
- Builder.CreateStore(
- Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
- }
- Offset += Item.size();
- }
-
- // Push a clang.arc.use cleanup for each object in RetainableOperands. The
- // cleanup will cause the use to appear after the final log call, keeping
- // the object valid while it's held in the log buffer. Note that if there's
- // a release cleanup on the object, it will already be active; since
- // cleanups are emitted in reverse order, the use will occur before the
- // object is released.
- if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
- CGM.getCodeGenOpts().OptimizationLevel != 0)
- for (llvm::Value *object : RetainableOperands)
- pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
-
- return RValue::get(BufAddr.getPointer());
- }
+ case Builtin::BI__builtin_os_log_format:
+ return emitBuiltinOSLogFormat(*E);
case Builtin::BI__builtin_os_log_format_buffer_size: {
analyze_os_log::OSLogBufferLayout Layout;
@@ -2773,10 +3182,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__xray_customevent: {
if (!ShouldXRayInstrumentFunction())
return RValue::getIgnored();
- if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
- if (XRayAttr->neverXRayInstrument())
+ if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
+ if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
return RValue::getIgnored();
- }
+
Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
auto FTy = F->getFunctionType();
auto Arg0 = E->getArg(0);
@@ -2954,6 +3363,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
+ case llvm::Triple::hexagon:
+ return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
default:
return nullptr;
}
@@ -4397,8 +4808,8 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
case NEON::BI__builtin_neon_vsha1cq_u32:
case NEON::BI__builtin_neon_vsha1pq_u32:
case NEON::BI__builtin_neon_vsha1mq_u32:
- case ARM::BI_MoveToCoprocessor:
- case ARM::BI_MoveToCoprocessor2:
+ case clang::ARM::BI_MoveToCoprocessor:
+ case clang::ARM::BI_MoveToCoprocessor2:
return false;
}
return true;
@@ -7153,6 +7564,19 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
}
+static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
+ unsigned NumElts, SmallVectorImpl<Value *> &Ops,
+ bool InvertLHS = false) {
+ Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
+
+ if (InvertLHS)
+ LHS = CGF.Builder.CreateNot(LHS);
+
+ return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
+ CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
+}
+
static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
SmallVectorImpl<Value *> &Ops,
llvm::Type *DstTy,
@@ -7229,6 +7653,18 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
std::max(NumElts, 8U)));
}
+static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
+
+ llvm::Type *Ty = Ops[0]->getType();
+ Value *Zero = llvm::Constant::getNullValue(Ty);
+ Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
+ Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
+ Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
+ if (Ops.size() == 1)
+ return Res;
+ return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
+}
+
static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
ArrayRef<Value *> Ops) {
Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
@@ -7248,8 +7684,118 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
+Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
+ const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
+ StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
+ return EmitX86CpuIs(CPUStr);
+}
+
+Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
+
+ llvm::Type *Int32Ty = Builder.getInt32Ty();
+
+ // Matching the struct layout from the compiler-rt/libgcc structure that is
+ // filled in:
+ // unsigned int __cpu_vendor;
+ // unsigned int __cpu_type;
+ // unsigned int __cpu_subtype;
+ // unsigned int __cpu_features[1];
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+ llvm::ArrayType::get(Int32Ty, 1));
+
+ // Grab the global __cpu_model.
+ llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+
+ // Calculate the index needed to access the correct field based on the
+ // range. Also adjust the expected value.
+ unsigned Index;
+ unsigned Value;
+ std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
+#define X86_VENDOR(ENUM, STRING) \
+ .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \
+ .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \
+ .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
+#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \
+ .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
+#include "llvm/Support/X86TargetParser.def"
+ .Default({0, 0});
+ assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
+
+ // Grab the appropriate field from __cpu_model.
+ llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, Index)};
+ llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
+ CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
+
+ // Check the value of the field against the requested value.
+ return Builder.CreateICmpEQ(CpuValue,
+ llvm::ConstantInt::get(Int32Ty, Value));
+}
+
+Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
+ const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
+ StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
+ return EmitX86CpuSupports(FeatureStr);
+}
+
+Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
+ // Processor features and mapping to processor feature value.
+
+ uint32_t FeaturesMask = 0;
+
+ for (const StringRef &FeatureStr : FeatureStrs) {
+ unsigned Feature =
+ StringSwitch<unsigned>(FeatureStr)
+#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
+#include "llvm/Support/X86TargetParser.def"
+ ;
+ FeaturesMask |= (1U << Feature);
+ }
+
+ // Matching the struct layout from the compiler-rt/libgcc structure that is
+ // filled in:
+ // unsigned int __cpu_vendor;
+ // unsigned int __cpu_type;
+ // unsigned int __cpu_subtype;
+ // unsigned int __cpu_features[1];
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+ llvm::ArrayType::get(Int32Ty, 1));
+
+ // Grab the global __cpu_model.
+ llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+
+ // Grab the first (0th) element from the field __cpu_features off of the
+ // global in the struct STy.
+ Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
+ ConstantInt::get(Int32Ty, 0)};
+ Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Bitset = Builder.CreateAnd(
+ Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
+ return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
+}
+
+Value *CodeGenFunction::EmitX86CpuInit() {
+ llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
+ /*Variadic*/ false);
+ llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
+ return Builder.CreateCall(Func);
+}
+
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
+ if (BuiltinID == X86::BI__builtin_cpu_is)
+ return EmitX86CpuIs(E);
+ if (BuiltinID == X86::BI__builtin_cpu_supports)
+ return EmitX86CpuSupports(E);
+ if (BuiltinID == X86::BI__builtin_cpu_init)
+ return EmitX86CpuInit();
+
SmallVector<Value*, 4> Ops;
// Find out if any arguments are required to be integer constant expressions.
@@ -7300,110 +7846,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
switch (BuiltinID) {
default: return nullptr;
- case X86::BI__builtin_cpu_supports: {
- const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
- StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
-
- // TODO: When/if this becomes more than x86 specific then use a TargetInfo
- // based mapping.
- // Processor features and mapping to processor feature value.
- enum X86Features {
- CMOV = 0,
- MMX,
- POPCNT,
- SSE,
- SSE2,
- SSE3,
- SSSE3,
- SSE4_1,
- SSE4_2,
- AVX,
- AVX2,
- SSE4_A,
- FMA4,
- XOP,
- FMA,
- AVX512F,
- BMI,
- BMI2,
- AES,
- PCLMUL,
- AVX512VL,
- AVX512BW,
- AVX512DQ,
- AVX512CD,
- AVX512ER,
- AVX512PF,
- AVX512VBMI,
- AVX512IFMA,
- AVX5124VNNIW, // TODO implement this fully
- AVX5124FMAPS, // TODO implement this fully
- AVX512VPOPCNTDQ,
- MAX
- };
-
- X86Features Feature =
- StringSwitch<X86Features>(FeatureStr)
- .Case("cmov", X86Features::CMOV)
- .Case("mmx", X86Features::MMX)
- .Case("popcnt", X86Features::POPCNT)
- .Case("sse", X86Features::SSE)
- .Case("sse2", X86Features::SSE2)
- .Case("sse3", X86Features::SSE3)
- .Case("ssse3", X86Features::SSSE3)
- .Case("sse4.1", X86Features::SSE4_1)
- .Case("sse4.2", X86Features::SSE4_2)
- .Case("avx", X86Features::AVX)
- .Case("avx2", X86Features::AVX2)
- .Case("sse4a", X86Features::SSE4_A)
- .Case("fma4", X86Features::FMA4)
- .Case("xop", X86Features::XOP)
- .Case("fma", X86Features::FMA)
- .Case("avx512f", X86Features::AVX512F)
- .Case("bmi", X86Features::BMI)
- .Case("bmi2", X86Features::BMI2)
- .Case("aes", X86Features::AES)
- .Case("pclmul", X86Features::PCLMUL)
- .Case("avx512vl", X86Features::AVX512VL)
- .Case("avx512bw", X86Features::AVX512BW)
- .Case("avx512dq", X86Features::AVX512DQ)
- .Case("avx512cd", X86Features::AVX512CD)
- .Case("avx512er", X86Features::AVX512ER)
- .Case("avx512pf", X86Features::AVX512PF)
- .Case("avx512vbmi", X86Features::AVX512VBMI)
- .Case("avx512ifma", X86Features::AVX512IFMA)
- .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
- .Default(X86Features::MAX);
- assert(Feature != X86Features::MAX && "Invalid feature!");
-
- // Matching the struct layout from the compiler-rt/libgcc structure that is
- // filled in:
- // unsigned int __cpu_vendor;
- // unsigned int __cpu_type;
- // unsigned int __cpu_subtype;
- // unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
- llvm::ArrayType::get(Int32Ty, 1));
-
- // Grab the global __cpu_model.
- llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
-
- // Grab the first (0th) element from the field __cpu_features off of the
- // global in the struct STy.
- Value *Idxs[] = {
- ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, 3),
- ConstantInt::get(Int32Ty, 0)
- };
- Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
- Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
- CharUnits::fromQuantity(4));
-
- // Check the value of the bit corresponding to the feature requested.
- Value *Bitset = Builder.CreateAnd(
- Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
- return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
- }
case X86::BI_mm_prefetch: {
Value *Address = Ops[0];
Value *RW = ConstantInt::get(Int32Ty, 0);
@@ -7526,6 +7968,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask: {
return EmitX86MaskedStore(*this, Ops, 16);
}
+ case X86::BI__builtin_ia32_vpopcntd_128:
+ case X86::BI__builtin_ia32_vpopcntq_128:
+ case X86::BI__builtin_ia32_vpopcntd_256:
+ case X86::BI__builtin_ia32_vpopcntq_256:
case X86::BI__builtin_ia32_vpopcntd_512:
case X86::BI__builtin_ia32_vpopcntq_512: {
llvm::Type *ResultType = ConvertType(E->getType());
@@ -7669,6 +8115,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86Select(*this, Ops[4], Align, Ops[3]);
}
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+
+ // This takes a very simple approach since there are two lanes and a
+ // shuffle can have 2 inputs. So we reserve the first input for the first
+ // lane and the second input for the second lane. This may result in
+ // duplicate sources, but this can be dealt with in the backend.
+
+ Value *OutOps[2];
+ uint32_t Indices[8];
+ for (unsigned l = 0; l != 2; ++l) {
+ // Determine the source for this lane.
+ if (Imm & (1 << ((l * 4) + 3)))
+ OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
+ else if (Imm & (1 << ((l * 4) + 1)))
+ OutOps[l] = Ops[1];
+ else
+ OutOps[l] = Ops[0];
+
+ for (unsigned i = 0; i != NumElts/2; ++i) {
+ // Start with ith element of the source for this lane.
+ unsigned Idx = (l * NumElts) + i;
+ // If bit 0 of the immediate half is set, switch to the high half of
+ // the source.
+ if (Imm & (1 << (l * 4)))
+ Idx += NumElts/2;
+ Indices[(l * (NumElts/2)) + i] = Idx;
+ }
+ }
+
+ return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
+ makeArrayRef(Indices, NumElts),
+ "vperm");
+ }
+
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
@@ -7714,32 +8199,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectpd_256:
case X86::BI__builtin_ia32_selectpd_512:
return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
- case X86::BI__builtin_ia32_pcmpeqb128_mask:
- case X86::BI__builtin_ia32_pcmpeqb256_mask:
- case X86::BI__builtin_ia32_pcmpeqb512_mask:
- case X86::BI__builtin_ia32_pcmpeqw128_mask:
- case X86::BI__builtin_ia32_pcmpeqw256_mask:
- case X86::BI__builtin_ia32_pcmpeqw512_mask:
- case X86::BI__builtin_ia32_pcmpeqd128_mask:
- case X86::BI__builtin_ia32_pcmpeqd256_mask:
- case X86::BI__builtin_ia32_pcmpeqd512_mask:
- case X86::BI__builtin_ia32_pcmpeqq128_mask:
- case X86::BI__builtin_ia32_pcmpeqq256_mask:
- case X86::BI__builtin_ia32_pcmpeqq512_mask:
- return EmitX86MaskedCompare(*this, 0, false, Ops);
- case X86::BI__builtin_ia32_pcmpgtb128_mask:
- case X86::BI__builtin_ia32_pcmpgtb256_mask:
- case X86::BI__builtin_ia32_pcmpgtb512_mask:
- case X86::BI__builtin_ia32_pcmpgtw128_mask:
- case X86::BI__builtin_ia32_pcmpgtw256_mask:
- case X86::BI__builtin_ia32_pcmpgtw512_mask:
- case X86::BI__builtin_ia32_pcmpgtd128_mask:
- case X86::BI__builtin_ia32_pcmpgtd256_mask:
- case X86::BI__builtin_ia32_pcmpgtd512_mask:
- case X86::BI__builtin_ia32_pcmpgtq128_mask:
- case X86::BI__builtin_ia32_pcmpgtq256_mask:
- case X86::BI__builtin_ia32_pcmpgtq512_mask:
- return EmitX86MaskedCompare(*this, 6, true, Ops);
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
@@ -7771,6 +8230,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kandhi:
+ return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
+ case X86::BI__builtin_ia32_kandnhi:
+ return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
+ case X86::BI__builtin_ia32_korhi:
+ return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ case X86::BI__builtin_ia32_kxnorhi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
+ case X86::BI__builtin_ia32_kxorhi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
+ case X86::BI__builtin_ia32_knothi: {
+ Ops[0] = getMaskVecValue(*this, Ops[0], 16);
+ return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
+ Builder.getInt16Ty());
+ }
+
case X86::BI__builtin_ia32_vplzcntd_128_mask:
case X86::BI__builtin_ia32_vplzcntd_256_mask:
case X86::BI__builtin_ia32_vplzcntd_512_mask:
@@ -7783,6 +8258,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops[1]);
}
+ case X86::BI__builtin_ia32_pabsb128:
+ case X86::BI__builtin_ia32_pabsw128:
+ case X86::BI__builtin_ia32_pabsd128:
+ case X86::BI__builtin_ia32_pabsb256:
+ case X86::BI__builtin_ia32_pabsw256:
+ case X86::BI__builtin_ia32_pabsd256:
+ case X86::BI__builtin_ia32_pabsq128_mask:
+ case X86::BI__builtin_ia32_pabsq256_mask:
+ case X86::BI__builtin_ia32_pabsb512_mask:
+ case X86::BI__builtin_ia32_pabsw512_mask:
+ case X86::BI__builtin_ia32_pabsd512_mask:
+ case X86::BI__builtin_ia32_pabsq512_mask:
+ return EmitX86Abs(*this, Ops);
+
case X86::BI__builtin_ia32_pmaxsb128:
case X86::BI__builtin_ia32_pmaxsw128:
case X86::BI__builtin_ia32_pmaxsd128:
@@ -8071,6 +8560,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case X86::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case X86::BI_InterlockedCompareExchange128: {
+ // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
+ // instead it takes pointers to 64bit ints for Destination and
+ // ComparandResult, and exchange is taken as two 64bit ints (high & low).
+ // The previous value is written to ComparandResult, and success is
+ // returned.
+
+ llvm::Type *Int128Ty = Builder.getInt128Ty();
+ llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
+
+ Value *Destination =
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
+ Value *ExchangeHigh128 =
+ Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
+ Value *ExchangeLow128 =
+ Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
+ Address ComparandResult(
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
+ getContext().toCharUnitsFromBits(128));
+
+ Value *Exchange = Builder.CreateOr(
+ Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
+ ExchangeLow128);
+
+ Value *Comparand = Builder.CreateLoad(ComparandResult);
+
+ AtomicCmpXchgInst *CXI =
+ Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ AtomicOrdering::SequentiallyConsistent,
+ AtomicOrdering::SequentiallyConsistent);
+ CXI->setVolatile(true);
+
+ // Write the result back to the inout pointer.
+ Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
+
+ // Get the success boolean and zero extend it to i8.
+ Value *Success = Builder.CreateExtractValue(CXI, 1);
+ return Builder.CreateZExt(Success, ConvertType(E->getType()));
+ }
case X86::BI_AddressOfReturnAddress: {
Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
@@ -8680,6 +9208,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CI->setConvergent();
return CI;
}
+ case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
+ case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
+ StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
+ "exec_lo" : "exec_hi";
+ CallInst *CI = cast<CallInst>(
+ EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
+ CI->setConvergent();
+ return CI;
+ }
// amdgcn workitem
case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
@@ -9129,6 +9666,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(FnALAF32, {Ptr, Val});
}
+ case NVPTX::BI__nvvm_atom_add_gen_d: {
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ // atomicrmw only deals with integer arguments, so we need to use
+ // LLVM's nvvm_atomic_load_add_f64 intrinsic.
+ Value *FnALAF64 =
+ CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
+ return Builder.CreateCall(FnALAF64, {Ptr, Val});
+ }
+
case NVPTX::BI__nvvm_atom_inc_gen_ui: {
Value *Ptr = EmitScalarExpr(E->getArg(0));
Value *Val = EmitScalarExpr(E->getArg(1));
@@ -9282,6 +9829,219 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
{Ptr->getType()->getPointerElementType(), Ptr->getType()}),
{Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
}
+ case NVPTX::BI__nvvm_match_all_sync_i32p:
+ case NVPTX::BI__nvvm_match_all_sync_i64p: {
+ Value *Mask = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
+ Value *ResultPair = Builder.CreateCall(
+ CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
+ ? Intrinsic::nvvm_match_all_sync_i32p
+ : Intrinsic::nvvm_match_all_sync_i64p),
+ {Mask, Val});
+ Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
+ PredOutPtr.getElementType());
+ Builder.CreateStore(Pred, PredOutPtr);
+ return Builder.CreateExtractValue(ResultPair, 0);
+ }
+ case NVPTX::BI__hmma_m16n16k16_ld_a:
+ case NVPTX::BI__hmma_m16n16k16_ld_b:
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
+ Address Dst = EmitPointerWithAlignment(E->getArg(0));
+ Value *Src = EmitScalarExpr(E->getArg(1));
+ Value *Ldm = EmitScalarExpr(E->getArg(2));
+ llvm::APSInt isColMajorArg;
+ if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+ return nullptr;
+ bool isColMajor = isColMajorArg.getSExtValue();
+ unsigned IID;
+ unsigned NumResults;
+ switch (BuiltinID) {
+ case NVPTX::BI__hmma_m16n16k16_ld_a:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_ld_b:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ NumResults = 8;
+ break;
+ default:
+ llvm_unreachable("Unexpected builtin ID.");
+ }
+ Value *Result =
+ Builder.CreateCall(CGM.getIntrinsic(IID),
+ {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm});
+
+ // Save returned values.
+ for (unsigned i = 0; i < NumResults; ++i) {
+ Builder.CreateAlignedStore(
+ Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
+ Dst.getElementType()),
+ Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ }
+ return Result;
+ }
+
+ case NVPTX::BI__hmma_m16n16k16_st_c_f16:
+ case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
+ Value *Dst = EmitScalarExpr(E->getArg(0));
+ Address Src = EmitPointerWithAlignment(E->getArg(1));
+ Value *Ldm = EmitScalarExpr(E->getArg(2));
+ llvm::APSInt isColMajorArg;
+ if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+ return nullptr;
+ bool isColMajor = isColMajorArg.getSExtValue();
+ unsigned IID;
+ unsigned NumResults = 8;
+ // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
+ // for some reason nvcc builtins use _c_.
+ switch (BuiltinID) {
+ case NVPTX::BI__hmma_m16n16k16_st_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_st_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ break;
+ default:
+ llvm_unreachable("Unexpected builtin ID.");
+ }
+ Function *Intrinsic = CGM.getIntrinsic(IID);
+ llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
+ SmallVector<Value *, 10> Values;
+ Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy));
+ for (unsigned i = 0; i < NumResults; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, ParamType));
+ }
+ Values.push_back(Ldm);
+ Value *Result = Builder.CreateCall(Intrinsic, Values);
+ return Result;
+ }
+
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
+ // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
+ Address Dst = EmitPointerWithAlignment(E->getArg(0));
+ Address SrcA = EmitPointerWithAlignment(E->getArg(1));
+ Address SrcB = EmitPointerWithAlignment(E->getArg(2));
+ Address SrcC = EmitPointerWithAlignment(E->getArg(3));
+ llvm::APSInt LayoutArg;
+ if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
+ return nullptr;
+ int Layout = LayoutArg.getSExtValue();
+ if (Layout < 0 || Layout > 3)
+ return nullptr;
+ llvm::APSInt SatfArg;
+ if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
+ return nullptr;
+ bool Satf = SatfArg.getSExtValue();
+
+ // clang-format off
+#define MMA_VARIANTS(type) {{ \
+ Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+ }}
+ // clang-format on
+
+ auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
+ unsigned Index = Layout * 2 + Satf;
+ assert(Index < 8);
+ return Variants[Index];
+ };
+ unsigned IID;
+ unsigned NumEltsC;
+ unsigned NumEltsD;
+ switch (BuiltinID) {
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
+ NumEltsC = 4;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
+ NumEltsC = 4;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
+ NumEltsC = 8;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
+ NumEltsC = 8;
+ NumEltsD = 8;
+ break;
+ default:
+ llvm_unreachable("Unexpected builtin ID.");
+ }
+#undef MMA_VARIANTS
+
+ SmallVector<Value *, 24> Values;
+ Function *Intrinsic = CGM.getIntrinsic(IID);
+ llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
+ // Load A
+ for (unsigned i = 0; i < 8; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(SrcA.getPointer(),
+ llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, ABType));
+ }
+ // Load B
+ for (unsigned i = 0; i < 8; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(SrcB.getPointer(),
+ llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, ABType));
+ }
+ // Load C
+ llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
+ for (unsigned i = 0; i < NumEltsC; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(SrcC.getPointer(),
+ llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, CType));
+ }
+ Value *Result = Builder.CreateCall(Intrinsic, Values);
+ llvm::Type *DType = Dst.getElementType();
+ for (unsigned i = 0; i < NumEltsD; ++i)
+ Builder.CreateAlignedStore(
+ Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
+ Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ return Result;
+ }
default:
return nullptr;
}
@@ -9315,3 +10075,58 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
return nullptr;
}
}
+
+Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ SmallVector<llvm::Value *, 4> Ops;
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+
+ switch (BuiltinID) {
+ case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
+ case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
+ Address Dest = EmitPointerWithAlignment(E->getArg(2));
+ unsigned Size;
+ if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
+ Size = 512;
+ ID = Intrinsic::hexagon_V6_vaddcarry;
+ } else {
+ Size = 1024;
+ ID = Intrinsic::hexagon_V6_vaddcarry_128B;
+ }
+ Dest = Builder.CreateBitCast(Dest,
+ llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
+ LoadInst *QLd = Builder.CreateLoad(Dest);
+ Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
+ llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
+ Vprd->getType()->getPointerTo(0));
+ Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+ return Builder.CreateExtractValue(Result, 0);
+ }
+ case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
+ case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
+ Address Dest = EmitPointerWithAlignment(E->getArg(2));
+ unsigned Size;
+ if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
+ Size = 512;
+ ID = Intrinsic::hexagon_V6_vsubcarry;
+ } else {
+ Size = 1024;
+ ID = Intrinsic::hexagon_V6_vsubcarry_128B;
+ }
+ Dest = Builder.CreateBitCast(Dest,
+ llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
+ LoadInst *QLd = Builder.CreateLoad(Dest);
+ Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
+ llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
+ llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
+ Vprd->getType()->getPointerTo(0));
+ Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
+ return Builder.CreateExtractValue(Result, 0);
+ }
+ } // switch
+
+ return nullptr;
+}