diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp')
| -rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp | 823 | 
1 files changed, 713 insertions, 110 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp index cadce507412b..2d20f92fbb3d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp @@ -20,6 +20,7 @@  #include "PatternInit.h"  #include "TargetInfo.h"  #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h"  #include "clang/AST/Decl.h"  #include "clang/AST/OSLog.h"  #include "clang/Basic/TargetBuiltins.h" @@ -30,6 +31,17 @@  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/InlineAsm.h"  #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsBPF.h" +#include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsR600.h" +#include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" +#include "llvm/IR/IntrinsicsX86.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/Support/ConvertUTF.h"  #include "llvm/Support/ScopedPrinter.h" @@ -45,7 +57,8 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) {    return std::min(High, std::max(Low, Value));  } -static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, +                             Align AlignmentInBytes) {    ConstantInt *Byte;    switch (CGF.getLangOpts().getTrivialAutoVarInit()) {    case LangOptions::TrivialAutoVarInitKind::Uninitialized: @@ -347,6 +360,58 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {  }  // Emit a simple mangled intrinsic that has 1 argument and a return type +// matching the argument type. Depending on mode, this may be a constrained +// floating-point intrinsic. +static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, +                                const CallExpr *E, unsigned IntrinsicID, +                                unsigned ConstrainedIntrinsicID) { +  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + +  if (CGF.Builder.getIsFPConstrained()) { +    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); +    return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); +  } else { +    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); +    return CGF.Builder.CreateCall(F, Src0); +  } +} + +// Emit an intrinsic that has 2 operands of the same type as its result. +// Depending on mode, this may be a constrained floating-point intrinsic. +static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, +                                const CallExpr *E, unsigned IntrinsicID, +                                unsigned ConstrainedIntrinsicID) { +  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); +  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); + +  if (CGF.Builder.getIsFPConstrained()) { +    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); +    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); +  } else { +    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); +    return CGF.Builder.CreateCall(F, { Src0, Src1 }); +  } +} + +// Emit an intrinsic that has 3 operands of the same type as its result. +// Depending on mode, this may be a constrained floating-point intrinsic. +static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, +                                 const CallExpr *E, unsigned IntrinsicID, +                                 unsigned ConstrainedIntrinsicID) { +  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); +  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); +  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); + +  if (CGF.Builder.getIsFPConstrained()) { +    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); +    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); +  } else { +    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); +    return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); +  } +} + +// Emit a simple mangled intrinsic that has 1 argument and a return type  // matching the argument type.  static Value *emitUnaryBuiltin(CodeGenFunction &CGF,                                 const CallExpr *E, @@ -392,15 +457,22 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,  }  // Emit an intrinsic that has overloaded integer result and fp operand. -static Value *emitFPToIntRoundBuiltin(CodeGenFunction &CGF, -                                      const CallExpr *E, -                                      unsigned IntrinsicID) { -   llvm::Type *ResultType = CGF.ConvertType(E->getType()); -   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); +static Value * +emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, +                                        unsigned IntrinsicID, +                                        unsigned ConstrainedIntrinsicID) { +  llvm::Type *ResultType = CGF.ConvertType(E->getType()); +  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); -   Function *F = CGF.CGM.getIntrinsic(IntrinsicID, -                                      {ResultType, Src0->getType()}); -   return CGF.Builder.CreateCall(F, Src0); +  if (CGF.Builder.getIsFPConstrained()) { +    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, +                                       {ResultType, Src0->getType()}); +    return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); +  } else { +    Function *F = +        CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); +    return CGF.Builder.CreateCall(F, Src0); +  }  }  /// EmitFAbs - Emit a call to @llvm.fabs(). @@ -749,8 +821,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,    // X86 has special BT, BTC, BTR, and BTS instructions that handle the array    // indexing operation internally. Use them if possible. -  llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch(); -  if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) +  if (CGF.getTarget().getTriple().isX86())      return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);    // Otherwise, use generic code to load one byte and test the bit. Use all but @@ -843,10 +914,12 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,      Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";      Arg1Ty = CGF.Int8PtrTy;      if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { -      Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); +      Arg1 = CGF.Builder.CreateCall( +          CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));      } else -      Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), -                                    llvm::ConstantInt::get(CGF.Int32Ty, 0)); +      Arg1 = CGF.Builder.CreateCall( +          CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), +          llvm::ConstantInt::get(CGF.Int32Ty, 0));    }    // Mark the call site and declaration with ReturnsTwice. @@ -1394,9 +1467,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,  static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,                                 Value *&RecordPtr, CharUnits Align,                                 llvm::FunctionCallee Func, int Lvl) { -  const auto *RT = RType->getAs<RecordType>();    ASTContext &Context = CGF.getContext(); -  RecordDecl *RD = RT->getDecl()->getDefinition(); +  RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition();    std::string Pad = std::string(Lvl * 4, ' ');    Value *GString = @@ -1555,14 +1627,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BIceill:      case Builtin::BI__builtin_ceil:      case Builtin::BI__builtin_ceilf: +    case Builtin::BI__builtin_ceilf16:      case Builtin::BI__builtin_ceill: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::ceil, +                                   Intrinsic::experimental_constrained_ceil));      case Builtin::BIcopysign:      case Builtin::BIcopysignf:      case Builtin::BIcopysignl:      case Builtin::BI__builtin_copysign:      case Builtin::BI__builtin_copysignf: +    case Builtin::BI__builtin_copysignf16:      case Builtin::BI__builtin_copysignl:      case Builtin::BI__builtin_copysignf128:        return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); @@ -1572,30 +1648,40 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BIcosl:      case Builtin::BI__builtin_cos:      case Builtin::BI__builtin_cosf: +    case Builtin::BI__builtin_cosf16:      case Builtin::BI__builtin_cosl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::cos, +                                   Intrinsic::experimental_constrained_cos));      case Builtin::BIexp:      case Builtin::BIexpf:      case Builtin::BIexpl:      case Builtin::BI__builtin_exp:      case Builtin::BI__builtin_expf: +    case Builtin::BI__builtin_expf16:      case Builtin::BI__builtin_expl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::exp, +                                   Intrinsic::experimental_constrained_exp));      case Builtin::BIexp2:      case Builtin::BIexp2f:      case Builtin::BIexp2l:      case Builtin::BI__builtin_exp2:      case Builtin::BI__builtin_exp2f: +    case Builtin::BI__builtin_exp2f16:      case Builtin::BI__builtin_exp2l: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::exp2, +                                   Intrinsic::experimental_constrained_exp2));      case Builtin::BIfabs:      case Builtin::BIfabsf:      case Builtin::BIfabsl:      case Builtin::BI__builtin_fabs:      case Builtin::BI__builtin_fabsf: +    case Builtin::BI__builtin_fabsf16:      case Builtin::BI__builtin_fabsl:      case Builtin::BI__builtin_fabsf128:        return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); @@ -1605,32 +1691,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BIfloorl:      case Builtin::BI__builtin_floor:      case Builtin::BI__builtin_floorf: +    case Builtin::BI__builtin_floorf16:      case Builtin::BI__builtin_floorl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::floor, +                                   Intrinsic::experimental_constrained_floor));      case Builtin::BIfma:      case Builtin::BIfmaf:      case Builtin::BIfmal:      case Builtin::BI__builtin_fma:      case Builtin::BI__builtin_fmaf: +    case Builtin::BI__builtin_fmaf16:      case Builtin::BI__builtin_fmal: -      return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma)); +      return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::fma, +                                   Intrinsic::experimental_constrained_fma));      case Builtin::BIfmax:      case Builtin::BIfmaxf:      case Builtin::BIfmaxl:      case Builtin::BI__builtin_fmax:      case Builtin::BI__builtin_fmaxf: +    case Builtin::BI__builtin_fmaxf16:      case Builtin::BI__builtin_fmaxl: -      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); +      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::maxnum, +                                   Intrinsic::experimental_constrained_maxnum));      case Builtin::BIfmin:      case Builtin::BIfminf:      case Builtin::BIfminl:      case Builtin::BI__builtin_fmin:      case Builtin::BI__builtin_fminf: +    case Builtin::BI__builtin_fminf16:      case Builtin::BI__builtin_fminl: -      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); +      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::minnum, +                                   Intrinsic::experimental_constrained_minnum));      // fmod() is a special-case. It maps to the frem instruction rather than an      // LLVM intrinsic. @@ -1639,6 +1737,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BIfmodl:      case Builtin::BI__builtin_fmod:      case Builtin::BI__builtin_fmodf: +    case Builtin::BI__builtin_fmodf16:      case Builtin::BI__builtin_fmodl: {        Value *Arg1 = EmitScalarExpr(E->getArg(0));        Value *Arg2 = EmitScalarExpr(E->getArg(1)); @@ -1650,24 +1749,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BIlogl:      case Builtin::BI__builtin_log:      case Builtin::BI__builtin_logf: +    case Builtin::BI__builtin_logf16:      case Builtin::BI__builtin_logl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::log, +                                   Intrinsic::experimental_constrained_log));      case Builtin::BIlog10:      case Builtin::BIlog10f:      case Builtin::BIlog10l:      case Builtin::BI__builtin_log10:      case Builtin::BI__builtin_log10f: +    case Builtin::BI__builtin_log10f16:      case Builtin::BI__builtin_log10l: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::log10, +                                   Intrinsic::experimental_constrained_log10));      case Builtin::BIlog2:      case Builtin::BIlog2f:      case Builtin::BIlog2l:      case Builtin::BI__builtin_log2:      case Builtin::BI__builtin_log2f: +    case Builtin::BI__builtin_log2f16:      case Builtin::BI__builtin_log2l: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::log2, +                                   Intrinsic::experimental_constrained_log2));      case Builtin::BInearbyint:      case Builtin::BInearbyintf: @@ -1675,55 +1783,75 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BI__builtin_nearbyint:      case Builtin::BI__builtin_nearbyintf:      case Builtin::BI__builtin_nearbyintl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                Intrinsic::nearbyint, +                                Intrinsic::experimental_constrained_nearbyint));      case Builtin::BIpow:      case Builtin::BIpowf:      case Builtin::BIpowl:      case Builtin::BI__builtin_pow:      case Builtin::BI__builtin_powf: +    case Builtin::BI__builtin_powf16:      case Builtin::BI__builtin_powl: -      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow)); +      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::pow, +                                   Intrinsic::experimental_constrained_pow));      case Builtin::BIrint:      case Builtin::BIrintf:      case Builtin::BIrintl:      case Builtin::BI__builtin_rint:      case Builtin::BI__builtin_rintf: +    case Builtin::BI__builtin_rintf16:      case Builtin::BI__builtin_rintl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::rint, +                                   Intrinsic::experimental_constrained_rint));      case Builtin::BIround:      case Builtin::BIroundf:      case Builtin::BIroundl:      case Builtin::BI__builtin_round:      case Builtin::BI__builtin_roundf: +    case Builtin::BI__builtin_roundf16:      case Builtin::BI__builtin_roundl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::round, +                                   Intrinsic::experimental_constrained_round));      case Builtin::BIsin:      case Builtin::BIsinf:      case Builtin::BIsinl:      case Builtin::BI__builtin_sin:      case Builtin::BI__builtin_sinf: +    case Builtin::BI__builtin_sinf16:      case Builtin::BI__builtin_sinl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::sin, +                                   Intrinsic::experimental_constrained_sin));      case Builtin::BIsqrt:      case Builtin::BIsqrtf:      case Builtin::BIsqrtl:      case Builtin::BI__builtin_sqrt:      case Builtin::BI__builtin_sqrtf: +    case Builtin::BI__builtin_sqrtf16:      case Builtin::BI__builtin_sqrtl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::sqrt, +                                   Intrinsic::experimental_constrained_sqrt));      case Builtin::BItrunc:      case Builtin::BItruncf:      case Builtin::BItruncl:      case Builtin::BI__builtin_trunc:      case Builtin::BI__builtin_truncf: +    case Builtin::BI__builtin_truncf16:      case Builtin::BI__builtin_truncl: -      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); +      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,  +                                   Intrinsic::trunc, +                                   Intrinsic::experimental_constrained_trunc));      case Builtin::BIlround:      case Builtin::BIlroundf: @@ -1731,7 +1859,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BI__builtin_lround:      case Builtin::BI__builtin_lroundf:      case Builtin::BI__builtin_lroundl: -      return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lround)); +      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( +          *this, E, Intrinsic::lround, +          Intrinsic::experimental_constrained_lround));      case Builtin::BIllround:      case Builtin::BIllroundf: @@ -1739,7 +1869,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BI__builtin_llround:      case Builtin::BI__builtin_llroundf:      case Builtin::BI__builtin_llroundl: -      return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llround)); +      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( +          *this, E, Intrinsic::llround, +          Intrinsic::experimental_constrained_llround));      case Builtin::BIlrint:      case Builtin::BIlrintf: @@ -1747,7 +1879,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BI__builtin_lrint:      case Builtin::BI__builtin_lrintf:      case Builtin::BI__builtin_lrintl: -      return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lrint)); +      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( +          *this, E, Intrinsic::lrint, +          Intrinsic::experimental_constrained_lrint));      case Builtin::BIllrint:      case Builtin::BIllrintf: @@ -1755,7 +1889,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      case Builtin::BI__builtin_llrint:      case Builtin::BI__builtin_llrintf:      case Builtin::BI__builtin_llrintl: -      return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llrint)); +      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( +          *this, E, Intrinsic::llrint, +          Intrinsic::experimental_constrained_llrint));      default:        break; @@ -1801,16 +1937,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,    }    case Builtin::BI__builtin_conj:    case Builtin::BI__builtin_conjf: -  case Builtin::BI__builtin_conjl: { +  case Builtin::BI__builtin_conjl: +  case Builtin::BIconj: +  case Builtin::BIconjf: +  case Builtin::BIconjl: {      ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));      Value *Real = ComplexVal.first;      Value *Imag = ComplexVal.second; -    Value *Zero = -      Imag->getType()->isFPOrFPVectorTy() -        ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) -        : llvm::Constant::getNullValue(Imag->getType()); - -    Imag = Builder.CreateFSub(Zero, Imag, "sub"); +    Imag = Builder.CreateFNeg(Imag, "neg");      return RValue::getComplex(std::make_pair(Real, Imag));    }    case Builtin::BI__builtin_creal: @@ -2026,11 +2160,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      Value *AlignmentValue = EmitScalarExpr(E->getArg(1));      ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); -    unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); +    if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) +      AlignmentCI = ConstantInt::get(AlignmentCI->getType(), +                                     llvm::Value::MaximumAlignment);      EmitAlignmentAssumption(PtrValue, Ptr,                              /*The expr loc is sufficient.*/ SourceLocation(), -                            Alignment, OffsetValue); +                            AlignmentCI, OffsetValue);      return RValue::get(PtrValue);    }    case Builtin::BI__assume: @@ -2077,10 +2213,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,    case Builtin::BI__builtin_constant_p: {      llvm::Type *ResultType = ConvertType(E->getType()); -    if (CGM.getCodeGenOpts().OptimizationLevel == 0) -      // At -O0, we don't perform inlining, so we don't need to delay the -      // processing. -      return RValue::get(ConstantInt::get(ResultType, 0));      const Expr *Arg = E->getArg(0);      QualType ArgType = Arg->getType(); @@ -2131,7 +2263,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :        llvm::ConstantInt::get(Int32Ty, 3);      Value *Data = llvm::ConstantInt::get(Int32Ty, 1); -    Function *F = CGM.getIntrinsic(Intrinsic::prefetch); +    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());      return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));    }    case Builtin::BI__builtin_readcyclecounter: { @@ -2159,13 +2291,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,    case Builtin::BI__builtin_powi:    case Builtin::BI__builtin_powif: -  case Builtin::BI__builtin_powil: { -    Value *Base = EmitScalarExpr(E->getArg(0)); -    Value *Exponent = EmitScalarExpr(E->getArg(1)); -    llvm::Type *ArgType = Base->getType(); -    Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); -    return RValue::get(Builder.CreateCall(F, {Base, Exponent})); -  } +  case Builtin::BI__builtin_powil: +    return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( +        *this, E, Intrinsic::powi, Intrinsic::experimental_constrained_powi));    case Builtin::BI__builtin_isgreater:    case Builtin::BI__builtin_isgreaterequal: @@ -2339,10 +2467,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      Value *Size = EmitScalarExpr(E->getArg(0));      const TargetInfo &TI = getContext().getTargetInfo();      // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. -    unsigned SuitableAlignmentInBytes = +    const Align SuitableAlignmentInBytes =          CGM.getContext()              .toCharUnitsFromBits(TI.getSuitableAlign()) -            .getQuantity(); +            .getAsAlign();      AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);      AI->setAlignment(SuitableAlignmentInBytes);      initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); @@ -2354,8 +2482,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));      auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);      unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); -    unsigned AlignmentInBytes = -        CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); +    const Align AlignmentInBytes = +        CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();      AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);      AI->setAlignment(AlignmentInBytes);      initializeAlloca(*this, AI, Size, AlignmentInBytes); @@ -2372,7 +2500,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      return RValue::get(nullptr);    }    case Builtin::BImemcpy: -  case Builtin::BI__builtin_memcpy: { +  case Builtin::BI__builtin_memcpy: +  case Builtin::BImempcpy: +  case Builtin::BI__builtin_mempcpy: {      Address Dest = EmitPointerWithAlignment(E->getArg(0));      Address Src = EmitPointerWithAlignment(E->getArg(1));      Value *SizeVal = EmitScalarExpr(E->getArg(2)); @@ -2381,7 +2511,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),                          E->getArg(1)->getExprLoc(), FD, 1);      Builder.CreateMemCpy(Dest, Src, SizeVal, false); -    return RValue::get(Dest.getPointer()); +    if (BuiltinID == Builtin::BImempcpy || +        BuiltinID == Builtin::BI__builtin_mempcpy) +      return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal)); +    else +      return RValue::get(Dest.getPointer());    }    case Builtin::BI__builtin_char_memchr: @@ -2556,7 +2690,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,    case Builtin::BI__builtin_frame_address: {      Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),                                                     getContext().UnsignedIntTy); -    Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); +    Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);      return RValue::get(Builder.CreateCall(F, Depth));    }    case Builtin::BI__builtin_extract_return_addr: { @@ -2637,9 +2771,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      Address Buf = EmitPointerWithAlignment(E->getArg(0));      // Store the frame pointer to the setjmp buffer. -    Value *FrameAddr = -      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), -                         ConstantInt::get(Int32Ty, 0)); +    Value *FrameAddr = Builder.CreateCall( +        CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), +        ConstantInt::get(Int32Ty, 0));      Builder.CreateStore(FrameAddr, Buf);      // Store the stack pointer to the setjmp buffer. @@ -3088,6 +3222,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,          Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),                             ConvertType(E->getType())));    } +  case Builtin::BI__warn_memset_zero_len: +    return RValue::getIgnored();    case Builtin::BI__annotation: {      // Re-encode each wide string to UTF8 and make an MDString.      SmallVector<Metadata *, 1> Strings; @@ -3348,7 +3484,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      return RValue::get(Carry);    }    case Builtin::BI__builtin_addressof: -    return RValue::get(EmitLValue(E->getArg(0)).getPointer()); +    return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));    case Builtin::BI__builtin_operator_new:      return EmitBuiltinNewDeleteCall(          E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); @@ -3356,6 +3492,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      return EmitBuiltinNewDeleteCall(          E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); +  case Builtin::BI__builtin_is_aligned: +    return EmitBuiltinIsAligned(E); +  case Builtin::BI__builtin_align_up: +    return EmitBuiltinAlignTo(E, true); +  case Builtin::BI__builtin_align_down: +    return EmitBuiltinAlignTo(E, false); +    case Builtin::BI__noop:      // __noop always evaluates to an integer literal zero.      return RValue::get(ConstantInt::get(IntTy, 0)); @@ -3673,13 +3816,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,    case Builtin::BIget_pipe_num_packets:    case Builtin::BIget_pipe_max_packets: {      const char *BaseName; -    const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>(); +    const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();      if (BuiltinID == Builtin::BIget_pipe_num_packets)        BaseName = "__get_pipe_num_packets";      else        BaseName = "__get_pipe_max_packets"; -    auto Name = std::string(BaseName) + -                std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); +    std::string Name = std::string(BaseName) + +                       std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");      // Building the generic function prototype.      Value *Arg0 = EmitScalarExpr(E->getArg(0)); @@ -3731,8 +3874,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      llvm::Value *Queue = EmitScalarExpr(E->getArg(0));      llvm::Value *Flags = EmitScalarExpr(E->getArg(1));      LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); -    llvm::Value *Range = NDRangeL.getAddress().getPointer(); -    llvm::Type *RangeTy = NDRangeL.getAddress().getType(); +    llvm::Value *Range = NDRangeL.getAddress(*this).getPointer(); +    llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();      if (NumArgs == 4) {        // The most basic form of the call with parameters: @@ -3751,7 +3894,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,            Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);        AttrBuilder B; -      B.addByValAttr(NDRangeL.getAddress().getElementType()); +      B.addByValAttr(NDRangeL.getAddress(*this).getElementType());        llvm::AttributeList ByValAttrSet =            llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); @@ -3769,7 +3912,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,          -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {        llvm::APInt ArraySize(32, NumArgs - First);        QualType SizeArrayTy = getContext().getConstantArrayType( -          getContext().getSizeType(), ArraySize, ArrayType::Normal, +          getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,            /*IndexTypeQuals=*/0);        auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");        llvm::Value *TmpPtr = Tmp.getPointer(); @@ -3936,7 +4079,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(          getContext().getTargetAddressSpace(LangAS::opencl_generic));      LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); -    llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); +    llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();      auto Info =          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));      Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); @@ -3977,6 +4120,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      break;    case Builtin::BI__builtin_canonicalize:    case Builtin::BI__builtin_canonicalizef: +  case Builtin::BI__builtin_canonicalizef16:    case Builtin::BI__builtin_canonicalizel:      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); @@ -4197,9 +4341,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,      return RValue::get(V);    } -  // See if we have a target specific builtin that needs to be lowered. -  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) -    return RValue::get(V); +  // Some target-specific builtins can have aggregate return values, e.g. +  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force +  // ReturnValue to be non-null, so that the target-specific emission code can +  // always just emit into it. +  TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); +  if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { +    Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); +    ReturnValue = ReturnValueSlot(DestPtr, false); +  } + +  // Now see if we can emit a target-specific builtin. +  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { +    switch (EvalKind) { +    case TEK_Scalar: +      return RValue::get(V); +    case TEK_Aggregate: +      return RValue::getAggregate(ReturnValue.getValue(), +                                  ReturnValue.isVolatile()); +    case TEK_Complex: +      llvm_unreachable("No current target builtin returns complex"); +    } +    llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); +  }    ErrorUnsupported(E, "builtin function"); @@ -4209,16 +4373,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,  static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,                                          unsigned BuiltinID, const CallExpr *E, +                                        ReturnValueSlot ReturnValue,                                          llvm::Triple::ArchType Arch) {    switch (Arch) {    case llvm::Triple::arm:    case llvm::Triple::armeb:    case llvm::Triple::thumb:    case llvm::Triple::thumbeb: -    return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch); +    return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);    case llvm::Triple::aarch64: +  case llvm::Triple::aarch64_32:    case llvm::Triple::aarch64_be:      return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); +  case llvm::Triple::bpfeb: +  case llvm::Triple::bpfel: +    return CGF->EmitBPFBuiltinExpr(BuiltinID, E);    case llvm::Triple::x86:    case llvm::Triple::x86_64:      return CGF->EmitX86BuiltinExpr(BuiltinID, E); @@ -4245,15 +4414,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,  }  Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, -                                              const CallExpr *E) { +                                              const CallExpr *E, +                                              ReturnValueSlot ReturnValue) {    if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {      assert(getContext().getAuxTargetInfo() && "Missing aux target info");      return EmitTargetArchBuiltinExpr(          this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, -        getContext().getAuxTargetInfo()->getTriple().getArch()); +        ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());    } -  return EmitTargetArchBuiltinExpr(this, BuiltinID, E, +  return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,                                     getTarget().getTriple().getArch());  } @@ -4428,6 +4598,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {    NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),    NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),    NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), +  NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), +  NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), +  NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), +  NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),    NEONMAP1(vcage_v, arm_neon_vacge, 0),    NEONMAP1(vcageq_v, arm_neon_vacge, 0),    NEONMAP1(vcagt_v, arm_neon_vacgt, 0), @@ -4595,10 +4769,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {    NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),    NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),    NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), -  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), -  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), -  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), -  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), +  NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), +  NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), +  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), +  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),    NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),    NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),    NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), @@ -4616,8 +4790,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {    NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),    NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),    NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), -  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), -  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), +  NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), +  NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),    NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),    NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),    NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), @@ -4701,6 +4875,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {    NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),    NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),    NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), +  NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), +  NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), +  NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), +  NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),    NEONMAP1(vcage_v, aarch64_neon_facge, 0),    NEONMAP1(vcageq_v, aarch64_neon_facge, 0),    NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), @@ -5430,6 +5608,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(      llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };      return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);    } +  case NEON::BI__builtin_neon_vcvtx_f32_v: { +    llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty}; +    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); + +  }    case NEON::BI__builtin_neon_vext_v:    case NEON::BI__builtin_neon_vextq_v: {      int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); @@ -5645,7 +5828,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(      llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());      // TODO: Currently in AArch32 mode the pointer operand comes first, whereas      // in AArch64 it comes last. We may want to stick to one or another. -    if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) { +    if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || +        Arch == llvm::Triple::aarch64_32) {        llvm::Type *Tys[2] = { VTy, PTy };        std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());        return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); @@ -5981,6 +6165,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {  Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,                                             const CallExpr *E, +                                           ReturnValueSlot ReturnValue,                                             llvm::Triple::ArchType Arch) {    if (auto Hint = GetValueForARMHint(BuiltinID))      return Hint; @@ -6019,7 +6204,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,      // Locality is not supported on ARM target      Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); -    Function *F = CGM.getIntrinsic(Intrinsic::prefetch); +    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());      return Builder.CreateCall(F, {Address, RW, Locality, IsData});    } @@ -6029,6 +6214,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,          CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");    } +  if (BuiltinID == ARM::BI__builtin_arm_cls) { +    llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); +    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); +  } +  if (BuiltinID == ARM::BI__builtin_arm_cls64) { +    llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); +    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, +                              "cls"); +  } +    if (BuiltinID == ARM::BI__clear_cache) {      assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");      const FunctionDecl *FD = E->getDirectCallee(); @@ -6297,6 +6492,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,      return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);    } +  // Deal with MVE builtins +  if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) +    return Result; +    // Find out if any arguments are required to be integer constant    // expressions.    unsigned ICEArguments = 0; @@ -6746,6 +6945,152 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,    }  } +template<typename Integer> +static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { +  llvm::APSInt IntVal; +  bool IsConst = E->isIntegerConstantExpr(IntVal, Context); +  assert(IsConst && "Sema should have checked this was a constant"); +  (void)IsConst; +  return IntVal.getExtValue(); +} + +static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, +                                     llvm::Type *T, bool Unsigned) { +  // Helper function called by Tablegen-constructed ARM MVE builtin codegen, +  // which finds it convenient to specify signed/unsigned as a boolean flag. +  return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); +} + +static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, +                                    uint32_t Shift, bool Unsigned) { +  // MVE helper function for integer shift right. This must handle signed vs +  // unsigned, and also deal specially with the case where the shift count is +  // equal to the lane size. In LLVM IR, an LShr with that parameter would be +  // undefined behavior, but in MVE it's legal, so we must convert it to code +  // that is not undefined in IR. +  unsigned LaneBits = +      V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); +  if (Shift == LaneBits) { +    // An unsigned shift of the full lane size always generates zero, so we can +    // simply emit a zero vector. A signed shift of the full lane size does the +    // same thing as shifting by one bit fewer. +    if (Unsigned) +      return llvm::Constant::getNullValue(V->getType()); +    else +      --Shift; +  } +  return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); +} + +static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { +  // MVE-specific helper function for a vector splat, which infers the element +  // count of the output vector by knowing that MVE vectors are all 128 bits +  // wide. +  unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits(); +  return Builder.CreateVectorSplat(Elements, V); +} + +Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, +                                              const CallExpr *E, +                                              ReturnValueSlot ReturnValue, +                                              llvm::Triple::ArchType Arch) { +  enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; +  Intrinsic::ID IRIntr; +  unsigned NumVectors; + +  // Code autogenerated by Tablegen will handle all the simple builtins. +  switch (BuiltinID) { +    #include "clang/Basic/arm_mve_builtin_cg.inc" + +    // If we didn't match an MVE builtin id at all, go back to the +    // main EmitARMBuiltinExpr. +  default: +    return nullptr; +  } + +  // Anything that breaks from that switch is an MVE builtin that +  // needs handwritten code to generate. + +  switch (CustomCodeGenType) { + +  case CustomCodeGen::VLD24: { +    llvm::SmallVector<Value *, 4> Ops; +    llvm::SmallVector<llvm::Type *, 4> Tys; + +    auto MvecCType = E->getType(); +    auto MvecLType = ConvertType(MvecCType); +    assert(MvecLType->isStructTy() && +           "Return type for vld[24]q should be a struct"); +    assert(MvecLType->getStructNumElements() == 1 && +           "Return-type struct for vld[24]q should have one element"); +    auto MvecLTypeInner = MvecLType->getStructElementType(0); +    assert(MvecLTypeInner->isArrayTy() && +           "Return-type struct for vld[24]q should contain an array"); +    assert(MvecLTypeInner->getArrayNumElements() == NumVectors && +           "Array member of return-type struct vld[24]q has wrong length"); +    auto VecLType = MvecLTypeInner->getArrayElementType(); + +    Tys.push_back(VecLType); + +    auto Addr = E->getArg(0); +    Ops.push_back(EmitScalarExpr(Addr)); +    Tys.push_back(ConvertType(Addr->getType())); + +    Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); +    Value *LoadResult = Builder.CreateCall(F, Ops); +    Value *MvecOut = UndefValue::get(MvecLType); +    for (unsigned i = 0; i < NumVectors; ++i) { +      Value *Vec = Builder.CreateExtractValue(LoadResult, i); +      MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); +    } + +    if (ReturnValue.isNull()) +      return MvecOut; +    else +      return Builder.CreateStore(MvecOut, ReturnValue.getValue()); +  } + +  case CustomCodeGen::VST24: { +    llvm::SmallVector<Value *, 4> Ops; +    llvm::SmallVector<llvm::Type *, 4> Tys; + +    auto Addr = E->getArg(0); +    Ops.push_back(EmitScalarExpr(Addr)); +    Tys.push_back(ConvertType(Addr->getType())); + +    auto MvecCType = E->getArg(1)->getType(); +    auto MvecLType = ConvertType(MvecCType); +    assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); +    assert(MvecLType->getStructNumElements() == 1 && +           "Data-type struct for vst2q should have one element"); +    auto MvecLTypeInner = MvecLType->getStructElementType(0); +    assert(MvecLTypeInner->isArrayTy() && +           "Data-type struct for vst2q should contain an array"); +    assert(MvecLTypeInner->getArrayNumElements() == NumVectors && +           "Array member of return-type struct vld[24]q has wrong length"); +    auto VecLType = MvecLTypeInner->getArrayElementType(); + +    Tys.push_back(VecLType); + +    AggValueSlot MvecSlot = CreateAggTemp(MvecCType); +    EmitAggExpr(E->getArg(1), MvecSlot); +    auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); +    for (unsigned i = 0; i < NumVectors; i++) +      Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); + +    Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys)); +    Value *ToReturn = nullptr; +    for (unsigned i = 0; i < NumVectors; i++) { +      Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); +      ToReturn = Builder.CreateCall(F, Ops); +      Ops.pop_back(); +    } +    return ToReturn; +  } +  } +  llvm_unreachable("unknown custom codegen type."); +} +  static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,                                        const CallExpr *E,                                        SmallVectorImpl<Value *> &Ops, @@ -6958,7 +7303,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,      // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify      // PLDL3STRM or PLDL2STRM. -    Function *F = CGM.getIntrinsic(Intrinsic::prefetch); +    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());      return Builder.CreateCall(F, {Address, RW, Locality, IsData});    } @@ -6977,6 +7322,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,          CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");    } +  if (BuiltinID == AArch64::BI__builtin_arm_cls) { +    llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); +    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, +                              "cls"); +  } +  if (BuiltinID == AArch64::BI__builtin_arm_cls64) { +    llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); +    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, +                              "cls"); +  } +    if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {      assert((getContext().getTypeSize(E->getType()) == 32) &&             "__jcvt of unusual size!"); @@ -7293,12 +7649,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,    }    if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { -    llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); +    llvm::Function *F = +        CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);      return Builder.CreateCall(F);    }    if (BuiltinID == AArch64::BI__builtin_sponentry) { -    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); +    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);      return Builder.CreateCall(F);    } @@ -9276,6 +9633,37 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,    }  } +Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, +                                           const CallExpr *E) { +  assert(BuiltinID == BPF::BI__builtin_preserve_field_info && +         "unexpected ARM builtin"); + +  const Expr *Arg = E->getArg(0); +  bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; + +  if (!getDebugInfo()) { +    CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g"); +    return IsBitField ? EmitLValue(Arg).getBitFieldPointer() +                      : EmitLValue(Arg).getPointer(*this); +  } + +  // Enable underlying preserve_*_access_index() generation. +  bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; +  IsInPreservedAIRegion = true; +  Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() +                                : EmitLValue(Arg).getPointer(*this); +  IsInPreservedAIRegion = OldIsInPreservedAIRegion; + +  ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); +  Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); + +  // Built the IR for the preserve_field_info intrinsic. +  llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( +      &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, +      {FieldAddr->getType()}); +  return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); +} +  llvm::Value *CodeGenFunction::  BuildVector(ArrayRef<llvm::Value*> Ops) {    assert((Ops.size() & (Ops.size() - 1)) == 0 && @@ -10034,7 +10422,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,      Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);      Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);      Value *Data = ConstantInt::get(Int32Ty, 1); -    Function *F = CGM.getIntrinsic(Intrinsic::prefetch); +    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());      return Builder.CreateCall(F, {Address, RW, Locality, Data});    }    case X86::BI_mm_clflush: { @@ -11169,7 +11557,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,      // Unaligned nontemporal store of the scalar value.      StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);      SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); -    SI->setAlignment(1); +    SI->setAlignment(llvm::Align::None());      return SI;    }    // Rotate is a special case of funnel shift - 1st 2 args are the same. @@ -12113,13 +12501,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,    }    case X86::BI_AddressOfReturnAddress: { -    Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); +    Function *F = +        CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);      return Builder.CreateCall(F);    }    case X86::BI__stosb: {      // We treat __stosb as a volatile memset - it may not generate "rep stosb"      // instruction, but it will create a memset that won't be optimized away. -    return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); +    return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align::None(), true);    }    case X86::BI__ud2:      // llvm.trap makes a ud2a instruction on x86. @@ -12937,9 +13326,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,      Value *X = EmitScalarExpr(E->getArg(0));      Value *Y = EmitScalarExpr(E->getArg(1));      Value *Z = EmitScalarExpr(E->getArg(2)); -    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); -    return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); +    return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});    }    case SystemZ::BI__builtin_s390_vfnmasb:    case SystemZ::BI__builtin_s390_vfnmadb: { @@ -12947,9 +13335,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,      Value *X = EmitScalarExpr(E->getArg(0));      Value *Y = EmitScalarExpr(E->getArg(1));      Value *Z = EmitScalarExpr(E->getArg(2)); -    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); -    return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub"); +    return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");    }    case SystemZ::BI__builtin_s390_vfnmssb:    case SystemZ::BI__builtin_s390_vfnmsdb: { @@ -12957,10 +13344,9 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,      Value *X = EmitScalarExpr(E->getArg(0));      Value *Y = EmitScalarExpr(E->getArg(1));      Value *Z = EmitScalarExpr(E->getArg(2)); -    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); -    Value *NegZ = Builder.CreateFSub(Zero, Z, "sub"); -    return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ})); +    Value *NegZ = Builder.CreateFNeg(Z, "neg"); +    return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));    }    case SystemZ::BI__builtin_s390_vflpsb:    case SystemZ::BI__builtin_s390_vflpdb: { @@ -12973,9 +13359,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,    case SystemZ::BI__builtin_s390_vflndb: {      llvm::Type *ResultType = ConvertType(E->getType());      Value *X = EmitScalarExpr(E->getArg(0)); -    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);      Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); -    return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); +    return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");    }    case SystemZ::BI__builtin_s390_vfisb:    case SystemZ::BI__builtin_s390_vfidb: { @@ -13877,6 +14262,96 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {    }  } +namespace { +struct BuiltinAlignArgs { +  llvm::Value *Src = nullptr; +  llvm::Type *SrcType = nullptr; +  llvm::Value *Alignment = nullptr; +  llvm::Value *Mask = nullptr; +  llvm::IntegerType *IntType = nullptr; + +  BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { +    QualType AstType = E->getArg(0)->getType(); +    if (AstType->isArrayType()) +      Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer(); +    else +      Src = CGF.EmitScalarExpr(E->getArg(0)); +    SrcType = Src->getType(); +    if (SrcType->isPointerTy()) { +      IntType = IntegerType::get( +          CGF.getLLVMContext(), +          CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); +    } else { +      assert(SrcType->isIntegerTy()); +      IntType = cast<llvm::IntegerType>(SrcType); +    } +    Alignment = CGF.EmitScalarExpr(E->getArg(1)); +    Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment"); +    auto *One = llvm::ConstantInt::get(IntType, 1); +    Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); +  } +}; +} // namespace + +/// Generate (x & (y-1)) == 0. +RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { +  BuiltinAlignArgs Args(E, *this); +  llvm::Value *SrcAddress = Args.Src; +  if (Args.SrcType->isPointerTy()) +    SrcAddress = +        Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr"); +  return RValue::get(Builder.CreateICmpEQ( +      Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"), +      llvm::Constant::getNullValue(Args.IntType), "is_aligned")); +} + +/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. +/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the +/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). +/// TODO: actually use ptrmask once most optimization passes know about it. +RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { +  BuiltinAlignArgs Args(E, *this); +  llvm::Value *SrcAddr = Args.Src; +  if (Args.Src->getType()->isPointerTy()) +    SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); +  llvm::Value *SrcForMask = SrcAddr; +  if (AlignUp) { +    // When aligning up we have to first add the mask to ensure we go over the +    // next alignment value and then align down to the next valid multiple. +    // By adding the mask, we ensure that align_up on an already aligned +    // value will not change the value. +    SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); +  } +  // Invert the mask to only clear the lower bits. +  llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); +  llvm::Value *Result = +      Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); +  if (Args.Src->getType()->isPointerTy()) { +    /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well. +    // Result = Builder.CreateIntrinsic( +    //  Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType}, +    //  {SrcForMask, NegatedMask}, nullptr, "aligned_result"); +    Result->setName("aligned_intptr"); +    llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff"); +    // The result must point to the same underlying allocation. This means we +    // can use an inbounds GEP to enable better optimization. +    Value *Base = EmitCastToVoidPtr(Args.Src); +    if (getLangOpts().isSignedOverflowDefined()) +      Result = Builder.CreateGEP(Base, Difference, "aligned_result"); +    else +      Result = EmitCheckedInBoundsGEP(Base, Difference, +                                      /*SignedIndices=*/true, +                                      /*isSubtraction=*/!AlignUp, +                                      E->getExprLoc(), "aligned_result"); +    Result = Builder.CreatePointerCast(Result, Args.SrcType); +    // Emit an alignment assumption to ensure that the new alignment is +    // propagated to loads/stores, etc. +    EmitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); +  } +  assert(Result->getType() == Args.SrcType); +  return RValue::get(Result); +} +  Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,                                                     const CallExpr *E) {    switch (BuiltinID) { @@ -13924,6 +14399,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,      Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);      return Builder.CreateCall(Callee);    } +  case WebAssembly::BI__builtin_wasm_tls_align: { +    llvm::Type *ResultType = ConvertType(E->getType()); +    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); +    return Builder.CreateCall(Callee); +  } +  case WebAssembly::BI__builtin_wasm_tls_base: { +    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); +    return Builder.CreateCall(Callee); +  }    case WebAssembly::BI__builtin_wasm_throw: {      Value *Tag = EmitScalarExpr(E->getArg(0));      Value *Obj = EmitScalarExpr(E->getArg(1)); @@ -13954,6 +14438,26 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,      Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);      return Builder.CreateCall(Callee, {Addr, Count});    } +  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: +  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: +  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: +  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { +    Value *Src = EmitScalarExpr(E->getArg(0)); +    llvm::Type *ResT = ConvertType(E->getType()); +    Function *Callee = +        CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); +    return Builder.CreateCall(Callee, {Src}); +  } +  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: +  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: +  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: +  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { +    Value *Src = EmitScalarExpr(E->getArg(0)); +    llvm::Type *ResT = ConvertType(E->getType()); +    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, +                                        {ResT, Src->getType()}); +    return Builder.CreateCall(Callee, {Src}); +  }    case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:    case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:    case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: @@ -13998,6 +14502,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,                                       ConvertType(E->getType()));      return Builder.CreateCall(Callee, {LHS, RHS});    } +  case WebAssembly::BI__builtin_wasm_swizzle_v8x16: { +    Value *Src = EmitScalarExpr(E->getArg(0)); +    Value *Indices = EmitScalarExpr(E->getArg(1)); +    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); +    return Builder.CreateCall(Callee, {Src, Indices}); +  }    case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:    case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:    case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: @@ -14090,6 +14600,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,      Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));      return Builder.CreateCall(Callee, {LHS, RHS});    } +  case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: +  case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { +    Value *LHS = EmitScalarExpr(E->getArg(0)); +    Value *RHS = EmitScalarExpr(E->getArg(1)); +    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, +                                        ConvertType(E->getType())); +    return Builder.CreateCall(Callee, {LHS, RHS}); +  }    case WebAssembly::BI__builtin_wasm_bitselect: {      Value *V1 = EmitScalarExpr(E->getArg(0));      Value *V2 = EmitScalarExpr(E->getArg(1)); @@ -14098,6 +14616,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,                                       ConvertType(E->getType()));      return Builder.CreateCall(Callee, {V1, V2, C});    } +  case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { +    Value *LHS = EmitScalarExpr(E->getArg(0)); +    Value *RHS = EmitScalarExpr(E->getArg(1)); +    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); +    return Builder.CreateCall(Callee, {LHS, RHS}); +  }    case WebAssembly::BI__builtin_wasm_any_true_i8x16:    case WebAssembly::BI__builtin_wasm_any_true_i16x8:    case WebAssembly::BI__builtin_wasm_any_true_i32x4: @@ -14139,7 +14663,86 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,      Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());      return Builder.CreateCall(Callee, {Vec});    } - +  case WebAssembly::BI__builtin_wasm_qfma_f32x4: +  case WebAssembly::BI__builtin_wasm_qfms_f32x4: +  case WebAssembly::BI__builtin_wasm_qfma_f64x2: +  case WebAssembly::BI__builtin_wasm_qfms_f64x2: { +    Value *A = EmitScalarExpr(E->getArg(0)); +    Value *B = EmitScalarExpr(E->getArg(1)); +    Value *C = EmitScalarExpr(E->getArg(2)); +    unsigned IntNo; +    switch (BuiltinID) { +    case WebAssembly::BI__builtin_wasm_qfma_f32x4: +    case WebAssembly::BI__builtin_wasm_qfma_f64x2: +      IntNo = Intrinsic::wasm_qfma; +      break; +    case WebAssembly::BI__builtin_wasm_qfms_f32x4: +    case WebAssembly::BI__builtin_wasm_qfms_f64x2: +      IntNo = Intrinsic::wasm_qfms; +      break; +    default: +      llvm_unreachable("unexpected builtin ID"); +    } +    Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); +    return Builder.CreateCall(Callee, {A, B, C}); +  } +  case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: +  case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: +  case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: +  case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { +    Value *Low = EmitScalarExpr(E->getArg(0)); +    Value *High = EmitScalarExpr(E->getArg(1)); +    unsigned IntNo; +    switch (BuiltinID) { +    case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: +    case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: +      IntNo = Intrinsic::wasm_narrow_signed; +      break; +    case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: +    case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: +      IntNo = Intrinsic::wasm_narrow_unsigned; +      break; +    default: +      llvm_unreachable("unexpected builtin ID"); +    } +    Function *Callee = +        CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); +    return Builder.CreateCall(Callee, {Low, High}); +  } +  case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: +  case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: +  case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: +  case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: +  case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: +  case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: +  case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: +  case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: { +    Value *Vec = EmitScalarExpr(E->getArg(0)); +    unsigned IntNo; +    switch (BuiltinID) { +    case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: +    case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: +      IntNo = Intrinsic::wasm_widen_low_signed; +      break; +    case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: +    case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: +      IntNo = Intrinsic::wasm_widen_high_signed; +      break; +    case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: +    case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: +      IntNo = Intrinsic::wasm_widen_low_unsigned; +      break; +    case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: +    case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: +      IntNo = Intrinsic::wasm_widen_high_unsigned; +      break; +    default: +      llvm_unreachable("unexpected builtin ID"); +    } +    Function *Callee = +        CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()}); +    return Builder.CreateCall(Callee, Vec); +  }    default:      return nullptr;    }  | 
