diff options
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 2069 |
1 files changed, 1291 insertions, 778 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d071c7a5b4a4..8c7ee6b078f2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" @@ -373,7 +374,7 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy); Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy), - CGF.getContext().toCharUnitsFromBits(128)); + Int128Ty, CGF.getContext().toCharUnitsFromBits(128)); // (((i128)hi) << 64) | ((i128)lo) ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); @@ -961,7 +962,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, ByteIndex, "bittest.byteaddr"), - CharUnits::One()); + CGF.Int8Ty, CharUnits::One()); Value *PosLow = CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); @@ -1168,141 +1169,141 @@ translateArmToMsvcIntrin(unsigned BuiltinID) { switch (BuiltinID) { default: return None; - case ARM::BI_BitScanForward: - case ARM::BI_BitScanForward64: + case clang::ARM::BI_BitScanForward: + case clang::ARM::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; - case ARM::BI_BitScanReverse: - case ARM::BI_BitScanReverse64: + case clang::ARM::BI_BitScanReverse: + case clang::ARM::BI_BitScanReverse64: return MSVCIntrin::_BitScanReverse; - case ARM::BI_InterlockedAnd64: + case clang::ARM::BI_InterlockedAnd64: return MSVCIntrin::_InterlockedAnd; - case ARM::BI_InterlockedExchange64: + case clang::ARM::BI_InterlockedExchange64: return MSVCIntrin::_InterlockedExchange; - case ARM::BI_InterlockedExchangeAdd64: + case clang::ARM::BI_InterlockedExchangeAdd64: return MSVCIntrin::_InterlockedExchangeAdd; - case ARM::BI_InterlockedExchangeSub64: + case clang::ARM::BI_InterlockedExchangeSub64: return MSVCIntrin::_InterlockedExchangeSub; - case ARM::BI_InterlockedOr64: + case clang::ARM::BI_InterlockedOr64: return MSVCIntrin::_InterlockedOr; - case ARM::BI_InterlockedXor64: + case clang::ARM::BI_InterlockedXor64: return MSVCIntrin::_InterlockedXor; - case ARM::BI_InterlockedDecrement64: + case clang::ARM::BI_InterlockedDecrement64: return MSVCIntrin::_InterlockedDecrement; - case ARM::BI_InterlockedIncrement64: + case clang::ARM::BI_InterlockedIncrement64: return MSVCIntrin::_InterlockedIncrement; - case ARM::BI_InterlockedExchangeAdd8_acq: - case ARM::BI_InterlockedExchangeAdd16_acq: - case ARM::BI_InterlockedExchangeAdd_acq: - case ARM::BI_InterlockedExchangeAdd64_acq: + case clang::ARM::BI_InterlockedExchangeAdd8_acq: + case clang::ARM::BI_InterlockedExchangeAdd16_acq: + case clang::ARM::BI_InterlockedExchangeAdd_acq: + case clang::ARM::BI_InterlockedExchangeAdd64_acq: return MSVCIntrin::_InterlockedExchangeAdd_acq; - case ARM::BI_InterlockedExchangeAdd8_rel: - case ARM::BI_InterlockedExchangeAdd16_rel: - case ARM::BI_InterlockedExchangeAdd_rel: - case ARM::BI_InterlockedExchangeAdd64_rel: + case clang::ARM::BI_InterlockedExchangeAdd8_rel: + case clang::ARM::BI_InterlockedExchangeAdd16_rel: + case clang::ARM::BI_InterlockedExchangeAdd_rel: + case clang::ARM::BI_InterlockedExchangeAdd64_rel: return MSVCIntrin::_InterlockedExchangeAdd_rel; - case ARM::BI_InterlockedExchangeAdd8_nf: - case ARM::BI_InterlockedExchangeAdd16_nf: - case ARM::BI_InterlockedExchangeAdd_nf: - case ARM::BI_InterlockedExchangeAdd64_nf: + case clang::ARM::BI_InterlockedExchangeAdd8_nf: + case clang::ARM::BI_InterlockedExchangeAdd16_nf: + case clang::ARM::BI_InterlockedExchangeAdd_nf: + case clang::ARM::BI_InterlockedExchangeAdd64_nf: return MSVCIntrin::_InterlockedExchangeAdd_nf; - case ARM::BI_InterlockedExchange8_acq: - case ARM::BI_InterlockedExchange16_acq: - case ARM::BI_InterlockedExchange_acq: - case ARM::BI_InterlockedExchange64_acq: + case clang::ARM::BI_InterlockedExchange8_acq: + case clang::ARM::BI_InterlockedExchange16_acq: + case clang::ARM::BI_InterlockedExchange_acq: + case clang::ARM::BI_InterlockedExchange64_acq: return MSVCIntrin::_InterlockedExchange_acq; - case ARM::BI_InterlockedExchange8_rel: - case ARM::BI_InterlockedExchange16_rel: - case ARM::BI_InterlockedExchange_rel: - case ARM::BI_InterlockedExchange64_rel: + case clang::ARM::BI_InterlockedExchange8_rel: + case clang::ARM::BI_InterlockedExchange16_rel: + case clang::ARM::BI_InterlockedExchange_rel: + case clang::ARM::BI_InterlockedExchange64_rel: return MSVCIntrin::_InterlockedExchange_rel; - case ARM::BI_InterlockedExchange8_nf: - case ARM::BI_InterlockedExchange16_nf: - case ARM::BI_InterlockedExchange_nf: - case ARM::BI_InterlockedExchange64_nf: + case clang::ARM::BI_InterlockedExchange8_nf: + case clang::ARM::BI_InterlockedExchange16_nf: + case clang::ARM::BI_InterlockedExchange_nf: + case clang::ARM::BI_InterlockedExchange64_nf: return MSVCIntrin::_InterlockedExchange_nf; - case ARM::BI_InterlockedCompareExchange8_acq: - case ARM::BI_InterlockedCompareExchange16_acq: - case ARM::BI_InterlockedCompareExchange_acq: - case ARM::BI_InterlockedCompareExchange64_acq: + case clang::ARM::BI_InterlockedCompareExchange8_acq: + case clang::ARM::BI_InterlockedCompareExchange16_acq: + case clang::ARM::BI_InterlockedCompareExchange_acq: + case clang::ARM::BI_InterlockedCompareExchange64_acq: return MSVCIntrin::_InterlockedCompareExchange_acq; - case ARM::BI_InterlockedCompareExchange8_rel: - case ARM::BI_InterlockedCompareExchange16_rel: - case ARM::BI_InterlockedCompareExchange_rel: - case ARM::BI_InterlockedCompareExchange64_rel: + case clang::ARM::BI_InterlockedCompareExchange8_rel: + case clang::ARM::BI_InterlockedCompareExchange16_rel: + case clang::ARM::BI_InterlockedCompareExchange_rel: + case clang::ARM::BI_InterlockedCompareExchange64_rel: return MSVCIntrin::_InterlockedCompareExchange_rel; - case ARM::BI_InterlockedCompareExchange8_nf: - case ARM::BI_InterlockedCompareExchange16_nf: - case ARM::BI_InterlockedCompareExchange_nf: - case ARM::BI_InterlockedCompareExchange64_nf: + case clang::ARM::BI_InterlockedCompareExchange8_nf: + case clang::ARM::BI_InterlockedCompareExchange16_nf: + case clang::ARM::BI_InterlockedCompareExchange_nf: + case clang::ARM::BI_InterlockedCompareExchange64_nf: return MSVCIntrin::_InterlockedCompareExchange_nf; - case ARM::BI_InterlockedOr8_acq: - case ARM::BI_InterlockedOr16_acq: - case ARM::BI_InterlockedOr_acq: - case ARM::BI_InterlockedOr64_acq: + case clang::ARM::BI_InterlockedOr8_acq: + case clang::ARM::BI_InterlockedOr16_acq: + case clang::ARM::BI_InterlockedOr_acq: + case clang::ARM::BI_InterlockedOr64_acq: return MSVCIntrin::_InterlockedOr_acq; - case ARM::BI_InterlockedOr8_rel: - case ARM::BI_InterlockedOr16_rel: - case ARM::BI_InterlockedOr_rel: - case ARM::BI_InterlockedOr64_rel: + case clang::ARM::BI_InterlockedOr8_rel: + case clang::ARM::BI_InterlockedOr16_rel: + case clang::ARM::BI_InterlockedOr_rel: + case clang::ARM::BI_InterlockedOr64_rel: return MSVCIntrin::_InterlockedOr_rel; - case ARM::BI_InterlockedOr8_nf: - case ARM::BI_InterlockedOr16_nf: - case ARM::BI_InterlockedOr_nf: - case ARM::BI_InterlockedOr64_nf: + case clang::ARM::BI_InterlockedOr8_nf: + case clang::ARM::BI_InterlockedOr16_nf: + case clang::ARM::BI_InterlockedOr_nf: + case clang::ARM::BI_InterlockedOr64_nf: return MSVCIntrin::_InterlockedOr_nf; - case ARM::BI_InterlockedXor8_acq: - case ARM::BI_InterlockedXor16_acq: - case ARM::BI_InterlockedXor_acq: - case ARM::BI_InterlockedXor64_acq: + case clang::ARM::BI_InterlockedXor8_acq: + case clang::ARM::BI_InterlockedXor16_acq: + case clang::ARM::BI_InterlockedXor_acq: + case clang::ARM::BI_InterlockedXor64_acq: return MSVCIntrin::_InterlockedXor_acq; - case ARM::BI_InterlockedXor8_rel: - case ARM::BI_InterlockedXor16_rel: - case ARM::BI_InterlockedXor_rel: - case ARM::BI_InterlockedXor64_rel: + case clang::ARM::BI_InterlockedXor8_rel: + case clang::ARM::BI_InterlockedXor16_rel: + case clang::ARM::BI_InterlockedXor_rel: + case clang::ARM::BI_InterlockedXor64_rel: return MSVCIntrin::_InterlockedXor_rel; - case ARM::BI_InterlockedXor8_nf: - case ARM::BI_InterlockedXor16_nf: - case ARM::BI_InterlockedXor_nf: - case ARM::BI_InterlockedXor64_nf: + case clang::ARM::BI_InterlockedXor8_nf: + case clang::ARM::BI_InterlockedXor16_nf: + case clang::ARM::BI_InterlockedXor_nf: + case clang::ARM::BI_InterlockedXor64_nf: return MSVCIntrin::_InterlockedXor_nf; - case ARM::BI_InterlockedAnd8_acq: - case ARM::BI_InterlockedAnd16_acq: - case ARM::BI_InterlockedAnd_acq: - case ARM::BI_InterlockedAnd64_acq: + case clang::ARM::BI_InterlockedAnd8_acq: + case clang::ARM::BI_InterlockedAnd16_acq: + case clang::ARM::BI_InterlockedAnd_acq: + case clang::ARM::BI_InterlockedAnd64_acq: return MSVCIntrin::_InterlockedAnd_acq; - case ARM::BI_InterlockedAnd8_rel: - case ARM::BI_InterlockedAnd16_rel: - case ARM::BI_InterlockedAnd_rel: - case ARM::BI_InterlockedAnd64_rel: + case clang::ARM::BI_InterlockedAnd8_rel: + case clang::ARM::BI_InterlockedAnd16_rel: + case clang::ARM::BI_InterlockedAnd_rel: + case clang::ARM::BI_InterlockedAnd64_rel: return MSVCIntrin::_InterlockedAnd_rel; - case ARM::BI_InterlockedAnd8_nf: - case ARM::BI_InterlockedAnd16_nf: - case ARM::BI_InterlockedAnd_nf: - case ARM::BI_InterlockedAnd64_nf: + case clang::ARM::BI_InterlockedAnd8_nf: + case clang::ARM::BI_InterlockedAnd16_nf: + case clang::ARM::BI_InterlockedAnd_nf: + case clang::ARM::BI_InterlockedAnd64_nf: return MSVCIntrin::_InterlockedAnd_nf; - case ARM::BI_InterlockedIncrement16_acq: - case ARM::BI_InterlockedIncrement_acq: - case ARM::BI_InterlockedIncrement64_acq: + case clang::ARM::BI_InterlockedIncrement16_acq: + case clang::ARM::BI_InterlockedIncrement_acq: + case clang::ARM::BI_InterlockedIncrement64_acq: return MSVCIntrin::_InterlockedIncrement_acq; - case ARM::BI_InterlockedIncrement16_rel: - case ARM::BI_InterlockedIncrement_rel: - case ARM::BI_InterlockedIncrement64_rel: + case clang::ARM::BI_InterlockedIncrement16_rel: + case clang::ARM::BI_InterlockedIncrement_rel: + case clang::ARM::BI_InterlockedIncrement64_rel: return MSVCIntrin::_InterlockedIncrement_rel; - case ARM::BI_InterlockedIncrement16_nf: - case ARM::BI_InterlockedIncrement_nf: - case ARM::BI_InterlockedIncrement64_nf: + case clang::ARM::BI_InterlockedIncrement16_nf: + case clang::ARM::BI_InterlockedIncrement_nf: + case clang::ARM::BI_InterlockedIncrement64_nf: return MSVCIntrin::_InterlockedIncrement_nf; - case ARM::BI_InterlockedDecrement16_acq: - case ARM::BI_InterlockedDecrement_acq: - case ARM::BI_InterlockedDecrement64_acq: + case clang::ARM::BI_InterlockedDecrement16_acq: + case clang::ARM::BI_InterlockedDecrement_acq: + case clang::ARM::BI_InterlockedDecrement64_acq: return MSVCIntrin::_InterlockedDecrement_acq; - case ARM::BI_InterlockedDecrement16_rel: - case ARM::BI_InterlockedDecrement_rel: - case ARM::BI_InterlockedDecrement64_rel: + case clang::ARM::BI_InterlockedDecrement16_rel: + case clang::ARM::BI_InterlockedDecrement_rel: + case clang::ARM::BI_InterlockedDecrement64_rel: return MSVCIntrin::_InterlockedDecrement_rel; - case ARM::BI_InterlockedDecrement16_nf: - case ARM::BI_InterlockedDecrement_nf: - case ARM::BI_InterlockedDecrement64_nf: + case clang::ARM::BI_InterlockedDecrement16_nf: + case clang::ARM::BI_InterlockedDecrement_nf: + case clang::ARM::BI_InterlockedDecrement64_nf: return MSVCIntrin::_InterlockedDecrement_nf; } llvm_unreachable("must return from switch"); @@ -1314,149 +1315,149 @@ translateAarch64ToMsvcIntrin(unsigned BuiltinID) { switch (BuiltinID) { default: return None; - case AArch64::BI_BitScanForward: - case AArch64::BI_BitScanForward64: + case clang::AArch64::BI_BitScanForward: + case clang::AArch64::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; - case AArch64::BI_BitScanReverse: - case AArch64::BI_BitScanReverse64: + case clang::AArch64::BI_BitScanReverse: + case clang::AArch64::BI_BitScanReverse64: return MSVCIntrin::_BitScanReverse; - case AArch64::BI_InterlockedAnd64: + case clang::AArch64::BI_InterlockedAnd64: return MSVCIntrin::_InterlockedAnd; - case AArch64::BI_InterlockedExchange64: + case clang::AArch64::BI_InterlockedExchange64: return MSVCIntrin::_InterlockedExchange; - case AArch64::BI_InterlockedExchangeAdd64: + case clang::AArch64::BI_InterlockedExchangeAdd64: return MSVCIntrin::_InterlockedExchangeAdd; - case AArch64::BI_InterlockedExchangeSub64: + case clang::AArch64::BI_InterlockedExchangeSub64: return MSVCIntrin::_InterlockedExchangeSub; - case AArch64::BI_InterlockedOr64: + case clang::AArch64::BI_InterlockedOr64: return MSVCIntrin::_InterlockedOr; - case AArch64::BI_InterlockedXor64: + case clang::AArch64::BI_InterlockedXor64: return MSVCIntrin::_InterlockedXor; - case AArch64::BI_InterlockedDecrement64: + case clang::AArch64::BI_InterlockedDecrement64: return MSVCIntrin::_InterlockedDecrement; - case AArch64::BI_InterlockedIncrement64: + case clang::AArch64::BI_InterlockedIncrement64: return MSVCIntrin::_InterlockedIncrement; - case AArch64::BI_InterlockedExchangeAdd8_acq: - case AArch64::BI_InterlockedExchangeAdd16_acq: - case AArch64::BI_InterlockedExchangeAdd_acq: - case AArch64::BI_InterlockedExchangeAdd64_acq: + case clang::AArch64::BI_InterlockedExchangeAdd8_acq: + case clang::AArch64::BI_InterlockedExchangeAdd16_acq: + case clang::AArch64::BI_InterlockedExchangeAdd_acq: + case clang::AArch64::BI_InterlockedExchangeAdd64_acq: return MSVCIntrin::_InterlockedExchangeAdd_acq; - case AArch64::BI_InterlockedExchangeAdd8_rel: - case AArch64::BI_InterlockedExchangeAdd16_rel: - case AArch64::BI_InterlockedExchangeAdd_rel: - case AArch64::BI_InterlockedExchangeAdd64_rel: + case clang::AArch64::BI_InterlockedExchangeAdd8_rel: + case clang::AArch64::BI_InterlockedExchangeAdd16_rel: + case clang::AArch64::BI_InterlockedExchangeAdd_rel: + case clang::AArch64::BI_InterlockedExchangeAdd64_rel: return MSVCIntrin::_InterlockedExchangeAdd_rel; - case AArch64::BI_InterlockedExchangeAdd8_nf: - case AArch64::BI_InterlockedExchangeAdd16_nf: - case AArch64::BI_InterlockedExchangeAdd_nf: - case AArch64::BI_InterlockedExchangeAdd64_nf: + case clang::AArch64::BI_InterlockedExchangeAdd8_nf: + case clang::AArch64::BI_InterlockedExchangeAdd16_nf: + case clang::AArch64::BI_InterlockedExchangeAdd_nf: + case clang::AArch64::BI_InterlockedExchangeAdd64_nf: return MSVCIntrin::_InterlockedExchangeAdd_nf; - case AArch64::BI_InterlockedExchange8_acq: - case AArch64::BI_InterlockedExchange16_acq: - case AArch64::BI_InterlockedExchange_acq: - case AArch64::BI_InterlockedExchange64_acq: + case clang::AArch64::BI_InterlockedExchange8_acq: + case clang::AArch64::BI_InterlockedExchange16_acq: + case clang::AArch64::BI_InterlockedExchange_acq: + case clang::AArch64::BI_InterlockedExchange64_acq: return MSVCIntrin::_InterlockedExchange_acq; - case AArch64::BI_InterlockedExchange8_rel: - case AArch64::BI_InterlockedExchange16_rel: - case AArch64::BI_InterlockedExchange_rel: - case AArch64::BI_InterlockedExchange64_rel: + case clang::AArch64::BI_InterlockedExchange8_rel: + case clang::AArch64::BI_InterlockedExchange16_rel: + case clang::AArch64::BI_InterlockedExchange_rel: + case clang::AArch64::BI_InterlockedExchange64_rel: return MSVCIntrin::_InterlockedExchange_rel; - case AArch64::BI_InterlockedExchange8_nf: - case AArch64::BI_InterlockedExchange16_nf: - case AArch64::BI_InterlockedExchange_nf: - case AArch64::BI_InterlockedExchange64_nf: + case clang::AArch64::BI_InterlockedExchange8_nf: + case clang::AArch64::BI_InterlockedExchange16_nf: + case clang::AArch64::BI_InterlockedExchange_nf: + case clang::AArch64::BI_InterlockedExchange64_nf: return MSVCIntrin::_InterlockedExchange_nf; - case AArch64::BI_InterlockedCompareExchange8_acq: - case AArch64::BI_InterlockedCompareExchange16_acq: - case AArch64::BI_InterlockedCompareExchange_acq: - case AArch64::BI_InterlockedCompareExchange64_acq: + case clang::AArch64::BI_InterlockedCompareExchange8_acq: + case clang::AArch64::BI_InterlockedCompareExchange16_acq: + case clang::AArch64::BI_InterlockedCompareExchange_acq: + case clang::AArch64::BI_InterlockedCompareExchange64_acq: return MSVCIntrin::_InterlockedCompareExchange_acq; - case AArch64::BI_InterlockedCompareExchange8_rel: - case AArch64::BI_InterlockedCompareExchange16_rel: - case AArch64::BI_InterlockedCompareExchange_rel: - case AArch64::BI_InterlockedCompareExchange64_rel: + case clang::AArch64::BI_InterlockedCompareExchange8_rel: + case clang::AArch64::BI_InterlockedCompareExchange16_rel: + case clang::AArch64::BI_InterlockedCompareExchange_rel: + case clang::AArch64::BI_InterlockedCompareExchange64_rel: return MSVCIntrin::_InterlockedCompareExchange_rel; - case AArch64::BI_InterlockedCompareExchange8_nf: - case AArch64::BI_InterlockedCompareExchange16_nf: - case AArch64::BI_InterlockedCompareExchange_nf: - case AArch64::BI_InterlockedCompareExchange64_nf: + case clang::AArch64::BI_InterlockedCompareExchange8_nf: + case clang::AArch64::BI_InterlockedCompareExchange16_nf: + case clang::AArch64::BI_InterlockedCompareExchange_nf: + case clang::AArch64::BI_InterlockedCompareExchange64_nf: return MSVCIntrin::_InterlockedCompareExchange_nf; - case AArch64::BI_InterlockedCompareExchange128: + case clang::AArch64::BI_InterlockedCompareExchange128: return MSVCIntrin::_InterlockedCompareExchange128; - case AArch64::BI_InterlockedCompareExchange128_acq: + case clang::AArch64::BI_InterlockedCompareExchange128_acq: return MSVCIntrin::_InterlockedCompareExchange128_acq; - case AArch64::BI_InterlockedCompareExchange128_nf: + case clang::AArch64::BI_InterlockedCompareExchange128_nf: return MSVCIntrin::_InterlockedCompareExchange128_nf; - case AArch64::BI_InterlockedCompareExchange128_rel: + case clang::AArch64::BI_InterlockedCompareExchange128_rel: return MSVCIntrin::_InterlockedCompareExchange128_rel; - case AArch64::BI_InterlockedOr8_acq: - case AArch64::BI_InterlockedOr16_acq: - case AArch64::BI_InterlockedOr_acq: - case AArch64::BI_InterlockedOr64_acq: + case clang::AArch64::BI_InterlockedOr8_acq: + case clang::AArch64::BI_InterlockedOr16_acq: + case clang::AArch64::BI_InterlockedOr_acq: + case clang::AArch64::BI_InterlockedOr64_acq: return MSVCIntrin::_InterlockedOr_acq; - case AArch64::BI_InterlockedOr8_rel: - case AArch64::BI_InterlockedOr16_rel: - case AArch64::BI_InterlockedOr_rel: - case AArch64::BI_InterlockedOr64_rel: + case clang::AArch64::BI_InterlockedOr8_rel: + case clang::AArch64::BI_InterlockedOr16_rel: + case clang::AArch64::BI_InterlockedOr_rel: + case clang::AArch64::BI_InterlockedOr64_rel: return MSVCIntrin::_InterlockedOr_rel; - case AArch64::BI_InterlockedOr8_nf: - case AArch64::BI_InterlockedOr16_nf: - case AArch64::BI_InterlockedOr_nf: - case AArch64::BI_InterlockedOr64_nf: + case clang::AArch64::BI_InterlockedOr8_nf: + case clang::AArch64::BI_InterlockedOr16_nf: + case clang::AArch64::BI_InterlockedOr_nf: + case clang::AArch64::BI_InterlockedOr64_nf: return MSVCIntrin::_InterlockedOr_nf; - case AArch64::BI_InterlockedXor8_acq: - case AArch64::BI_InterlockedXor16_acq: - case AArch64::BI_InterlockedXor_acq: - case AArch64::BI_InterlockedXor64_acq: + case clang::AArch64::BI_InterlockedXor8_acq: + case clang::AArch64::BI_InterlockedXor16_acq: + case clang::AArch64::BI_InterlockedXor_acq: + case clang::AArch64::BI_InterlockedXor64_acq: return MSVCIntrin::_InterlockedXor_acq; - case AArch64::BI_InterlockedXor8_rel: - case AArch64::BI_InterlockedXor16_rel: - case AArch64::BI_InterlockedXor_rel: - case AArch64::BI_InterlockedXor64_rel: + case clang::AArch64::BI_InterlockedXor8_rel: + case clang::AArch64::BI_InterlockedXor16_rel: + case clang::AArch64::BI_InterlockedXor_rel: + case clang::AArch64::BI_InterlockedXor64_rel: return MSVCIntrin::_InterlockedXor_rel; - case AArch64::BI_InterlockedXor8_nf: - case AArch64::BI_InterlockedXor16_nf: - case AArch64::BI_InterlockedXor_nf: - case AArch64::BI_InterlockedXor64_nf: + case clang::AArch64::BI_InterlockedXor8_nf: + case clang::AArch64::BI_InterlockedXor16_nf: + case clang::AArch64::BI_InterlockedXor_nf: + case clang::AArch64::BI_InterlockedXor64_nf: return MSVCIntrin::_InterlockedXor_nf; - case AArch64::BI_InterlockedAnd8_acq: - case AArch64::BI_InterlockedAnd16_acq: - case AArch64::BI_InterlockedAnd_acq: - case AArch64::BI_InterlockedAnd64_acq: + case clang::AArch64::BI_InterlockedAnd8_acq: + case clang::AArch64::BI_InterlockedAnd16_acq: + case clang::AArch64::BI_InterlockedAnd_acq: + case clang::AArch64::BI_InterlockedAnd64_acq: return MSVCIntrin::_InterlockedAnd_acq; - case AArch64::BI_InterlockedAnd8_rel: - case AArch64::BI_InterlockedAnd16_rel: - case AArch64::BI_InterlockedAnd_rel: - case AArch64::BI_InterlockedAnd64_rel: + case clang::AArch64::BI_InterlockedAnd8_rel: + case clang::AArch64::BI_InterlockedAnd16_rel: + case clang::AArch64::BI_InterlockedAnd_rel: + case clang::AArch64::BI_InterlockedAnd64_rel: return MSVCIntrin::_InterlockedAnd_rel; - case AArch64::BI_InterlockedAnd8_nf: - case AArch64::BI_InterlockedAnd16_nf: - case AArch64::BI_InterlockedAnd_nf: - case AArch64::BI_InterlockedAnd64_nf: + case clang::AArch64::BI_InterlockedAnd8_nf: + case clang::AArch64::BI_InterlockedAnd16_nf: + case clang::AArch64::BI_InterlockedAnd_nf: + case clang::AArch64::BI_InterlockedAnd64_nf: return MSVCIntrin::_InterlockedAnd_nf; - case AArch64::BI_InterlockedIncrement16_acq: - case AArch64::BI_InterlockedIncrement_acq: - case AArch64::BI_InterlockedIncrement64_acq: + case clang::AArch64::BI_InterlockedIncrement16_acq: + case clang::AArch64::BI_InterlockedIncrement_acq: + case clang::AArch64::BI_InterlockedIncrement64_acq: return MSVCIntrin::_InterlockedIncrement_acq; - case AArch64::BI_InterlockedIncrement16_rel: - case AArch64::BI_InterlockedIncrement_rel: - case AArch64::BI_InterlockedIncrement64_rel: + case clang::AArch64::BI_InterlockedIncrement16_rel: + case clang::AArch64::BI_InterlockedIncrement_rel: + case clang::AArch64::BI_InterlockedIncrement64_rel: return MSVCIntrin::_InterlockedIncrement_rel; - case AArch64::BI_InterlockedIncrement16_nf: - case AArch64::BI_InterlockedIncrement_nf: - case AArch64::BI_InterlockedIncrement64_nf: + case clang::AArch64::BI_InterlockedIncrement16_nf: + case clang::AArch64::BI_InterlockedIncrement_nf: + case clang::AArch64::BI_InterlockedIncrement64_nf: return MSVCIntrin::_InterlockedIncrement_nf; - case AArch64::BI_InterlockedDecrement16_acq: - case AArch64::BI_InterlockedDecrement_acq: - case AArch64::BI_InterlockedDecrement64_acq: + case clang::AArch64::BI_InterlockedDecrement16_acq: + case clang::AArch64::BI_InterlockedDecrement_acq: + case clang::AArch64::BI_InterlockedDecrement64_acq: return MSVCIntrin::_InterlockedDecrement_acq; - case AArch64::BI_InterlockedDecrement16_rel: - case AArch64::BI_InterlockedDecrement_rel: - case AArch64::BI_InterlockedDecrement64_rel: + case clang::AArch64::BI_InterlockedDecrement16_rel: + case clang::AArch64::BI_InterlockedDecrement_rel: + case clang::AArch64::BI_InterlockedDecrement64_rel: return MSVCIntrin::_InterlockedDecrement_rel; - case AArch64::BI_InterlockedDecrement16_nf: - case AArch64::BI_InterlockedDecrement_nf: - case AArch64::BI_InterlockedDecrement64_nf: + case clang::AArch64::BI_InterlockedDecrement16_nf: + case clang::AArch64::BI_InterlockedDecrement_nf: + case clang::AArch64::BI_InterlockedDecrement64_nf: return MSVCIntrin::_InterlockedDecrement_nf; } llvm_unreachable("must return from switch"); @@ -1778,8 +1779,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( auto AL = ApplyDebugLocation::CreateArtificial(*this); CharUnits Offset; - Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), - BufferAlignment); + Address BufAddr = + Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty, + BufferAlignment); Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), @@ -1800,8 +1802,8 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Address Arg = GetAddrOfLocalVar(Args[I]); Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); - Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), - "argDataCast"); + Addr = + Builder.CreateElementBitCast(Addr, Arg.getElementType(), "argDataCast"); Builder.CreateStore(Builder.CreateLoad(Arg), Addr); Offset += Size; ++I; @@ -2000,7 +2002,7 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, // Signed overflow occurs if the result is greater than INT_MAX or lesser // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). auto IntMax = - llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth); + llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth); llvm::Value *MaxResult = CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), CGF.Builder.CreateZExt(IsNegative, OpTy)); @@ -2041,89 +2043,6 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, return RValue::get(Overflow); } -static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, - Value *&RecordPtr, CharUnits Align, - llvm::FunctionCallee Func, int Lvl) { - ASTContext &Context = CGF.getContext(); - RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition(); - std::string Pad = std::string(Lvl * 4, ' '); - - Value *GString = - CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n"); - Value *Res = CGF.Builder.CreateCall(Func, {GString}); - - static llvm::DenseMap<QualType, const char *> Types; - if (Types.empty()) { - Types[Context.CharTy] = "%c"; - Types[Context.BoolTy] = "%d"; - Types[Context.SignedCharTy] = "%hhd"; - Types[Context.UnsignedCharTy] = "%hhu"; - Types[Context.IntTy] = "%d"; - Types[Context.UnsignedIntTy] = "%u"; - Types[Context.LongTy] = "%ld"; - Types[Context.UnsignedLongTy] = "%lu"; - Types[Context.LongLongTy] = "%lld"; - Types[Context.UnsignedLongLongTy] = "%llu"; - Types[Context.ShortTy] = "%hd"; - Types[Context.UnsignedShortTy] = "%hu"; - Types[Context.VoidPtrTy] = "%p"; - Types[Context.FloatTy] = "%f"; - Types[Context.DoubleTy] = "%f"; - Types[Context.LongDoubleTy] = "%Lf"; - Types[Context.getPointerType(Context.CharTy)] = "%s"; - Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s"; - } - - for (const auto *FD : RD->fields()) { - Value *FieldPtr = RecordPtr; - if (RD->isUnion()) - FieldPtr = CGF.Builder.CreatePointerCast( - FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType()))); - else - FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr, - FD->getFieldIndex()); - - GString = CGF.Builder.CreateGlobalStringPtr( - llvm::Twine(Pad) - .concat(FD->getType().getAsString()) - .concat(llvm::Twine(' ')) - .concat(FD->getNameAsString()) - .concat(" : ") - .str()); - Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); - Res = CGF.Builder.CreateAdd(Res, TmpRes); - - QualType CanonicalType = - FD->getType().getUnqualifiedType().getCanonicalType(); - - // We check whether we are in a recursive type - if (CanonicalType->isRecordType()) { - TmpRes = dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1); - Res = CGF.Builder.CreateAdd(TmpRes, Res); - continue; - } - - // We try to determine the best format to print the current field - llvm::Twine Format = Types.find(CanonicalType) == Types.end() - ? Types[Context.VoidPtrTy] - : Types[CanonicalType]; - - Address FieldAddress = Address(FieldPtr, Align); - FieldPtr = CGF.Builder.CreateLoad(FieldAddress); - - // FIXME Need to handle bitfield here - GString = CGF.Builder.CreateGlobalStringPtr( - Format.concat(llvm::Twine('\n')).str()); - TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr}); - Res = CGF.Builder.CreateAdd(Res, TmpRes); - } - - GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n"); - Value *TmpRes = CGF.Builder.CreateCall(Func, {GString}); - Res = CGF.Builder.CreateAdd(Res, TmpRes); - return Res; -} - static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl<const Decl *> &Seen) { @@ -2252,8 +2171,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ReturnValueSlot ReturnValue) { const FunctionDecl *FD = GD.getDecl()->getAsFunction(); // See if we can constant fold this builtin. If so, don't emit it at all. + // TODO: Extend this handling to all builtin calls that we can constant-fold. Expr::EvalResult Result; - if (E->EvaluateAsRValue(Result, CGM.getContext()) && + if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) && !Result.hasSideEffects()) { if (Result.Val.isInt()) return RValue::get(llvm::ConstantInt::get(getLLVMContext(), @@ -2649,23 +2569,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(ComplexVal.first); } - case Builtin::BI__builtin_dump_struct: { - llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy); - llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get( - LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true); - - Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); - CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); - - const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts(); - QualType Arg0Type = Arg0->getType()->getPointeeType(); - - Value *RecordPtr = EmitScalarExpr(Arg0); - Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, - {LLVMFuncType, Func}, 0); - return RValue::get(Res); - } - case Builtin::BI__builtin_preserve_access_index: { // Only enabled preserved access index region when debuginfo // is available as debuginfo is needed to preserve user-level @@ -2929,7 +2832,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: - case Builtin::BI__builtin_bswap64: { + case Builtin::BI__builtin_bswap64: + case Builtin::BI_byteswap_ushort: + case Builtin::BI_byteswap_ulong: + case Builtin::BI_byteswap_uint64: { return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); } case Builtin::BI__builtin_bitreverse8: @@ -3154,6 +3060,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc")); + case Builtin::BI__builtin_elementwise_add_sat: + case Builtin::BI__builtin_elementwise_sub_sat: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Result; + assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected"); + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + bool IsSigned = Ty->isSignedIntegerType(); + unsigned Opc; + if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat) + Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat; + else + Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat; + Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat"); + return RValue::get(Result); + } + case Builtin::BI__builtin_elementwise_max: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); @@ -3218,6 +3143,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } + case Builtin::BI__builtin_reduce_add: + return RValue::get(emitUnaryBuiltin( + *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); + case Builtin::BI__builtin_reduce_mul: + return RValue::get(emitUnaryBuiltin( + *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); case Builtin::BI__builtin_reduce_xor: return RValue::get(emitUnaryBuiltin( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); @@ -3231,14 +3162,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_matrix_transpose: { auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>(); Value *MatValue = EmitScalarExpr(E->getArg(0)); - MatrixBuilder<CGBuilderTy> MB(Builder); + MatrixBuilder MB(Builder); Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(), MatrixTy->getNumColumns()); return RValue::get(Result); } case Builtin::BI__builtin_matrix_column_major_load: { - MatrixBuilder<CGBuilderTy> MB(Builder); + MatrixBuilder MB(Builder); // Emit everything that isn't dependent on the first parameter type Value *Stride = EmitScalarExpr(E->getArg(3)); const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>(); @@ -3250,14 +3181,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Value *Result = MB.CreateColumnMajorLoad( - Src.getPointer(), Align(Src.getAlignment().getQuantity()), Stride, - IsVolatile, ResultTy->getNumRows(), ResultTy->getNumColumns(), + Src.getElementType(), Src.getPointer(), + Align(Src.getAlignment().getQuantity()), Stride, IsVolatile, + ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix"); return RValue::get(Result); } case Builtin::BI__builtin_matrix_column_major_store: { - MatrixBuilder<CGBuilderTy> MB(Builder); + MatrixBuilder MB(Builder); Value *Matrix = EmitScalarExpr(E->getArg(0)); Address Dst = EmitPointerWithAlignment(E->getArg(1)); Value *Stride = EmitScalarExpr(E->getArg(2)); @@ -3576,6 +3508,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest.getPointer()); } + case Builtin::BI__builtin_memset_inline: { + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Value *ByteVal = + Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); + uint64_t Size = + E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), + E->getArg(0)->getExprLoc(), FD, 0); + Builder.CreateMemSetInline(Dest, ByteVal, Size); + return RValue::get(nullptr); + } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; @@ -3818,7 +3761,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Call LLVM's EH setjmp, which is lightweight. Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); - Buf = Builder.CreateBitCast(Buf, Int8PtrTy); + Buf = Builder.CreateElementBitCast(Buf, Int8Ty); return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } case Builtin::BI__builtin_longjmp: { @@ -4129,8 +4072,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); Address Ptr = EmitPointerWithAlignment(E->getArg(0)); - unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); - Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); + Ptr = Builder.CreateElementBitCast(Ptr, Int8Ty); Value *NewVal = Builder.getInt8(0); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa<llvm::ConstantInt>(Order)) { @@ -4524,6 +4466,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Carry); } + case Builtin::BIaddressof: + case Builtin::BI__addressof: case Builtin::BI__builtin_addressof: return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__builtin_function_start: @@ -4683,6 +4627,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } break; + // C++ std:: builtins. + case Builtin::BImove: + case Builtin::BImove_if_noexcept: + case Builtin::BIforward: + case Builtin::BIas_const: + return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__GetExceptionInfo: { if (llvm::GlobalVariable *GV = CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) @@ -5176,7 +5126,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_thread_pointer: { if (!getContext().getTargetInfo().isTLSSupported()) CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); - // Fall through - it's already mapped to the intrinsic by GCCBuiltin. + // Fall through - it's already mapped to the intrinsic by ClangBuiltin. break; } case Builtin::BI__builtin_os_log_format: @@ -5319,7 +5269,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, StringRef Prefix = llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); if (!Prefix.empty()) { - IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); + IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name); // NOTE we don't need to perform a compatibility flag check here since the // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. @@ -5369,7 +5319,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && "Must be able to losslessly bit cast to param"); - ArgValue = Builder.CreateBitCast(ArgValue, PTy); + // Cast vector type (e.g., v256i32) to x86_amx, this only happen + // in amx intrinsics. + if (PTy->isX86_AMXTy()) + ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile, + {ArgValue->getType()}, {ArgValue}); + else + ArgValue = Builder.CreateBitCast(ArgValue, PTy); } Args.push_back(ArgValue); @@ -5393,7 +5349,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, assert(V->getType()->canLosslesslyBitCastTo(RetTy) && "Must be able to losslessly bit cast result type"); - V = Builder.CreateBitCast(V, RetTy); + // Cast x86_amx to vector type (e.g., v256i32), this only happen + // in amx intrinsics. + if (V->getType()->isX86_AMXTy()) + V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy}, + {V}); + else + V = Builder.CreateBitCast(V, RetTy); } return RValue::get(V); @@ -6899,8 +6861,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = UndefValue::get(Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); + PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); LoadInst *Ld = Builder.CreateLoad(PtrOp0); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Ops[0] = Builder.CreateInsertElement(V, Ld, CI); @@ -7294,7 +7255,10 @@ Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( Op = Builder.CreateBitCast(Op, OTy); if (OTy->getScalarType()->isFloatingPointTy()) { - Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); + if (Fp == CmpInst::FCMP_OEQ) + Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); + else + Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy)); } else { Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); } @@ -7345,27 +7309,27 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { switch (BuiltinID) { default: return nullptr; - case ARM::BI__builtin_arm_nop: + case clang::ARM::BI__builtin_arm_nop: Value = 0; break; - case ARM::BI__builtin_arm_yield: - case ARM::BI__yield: + case clang::ARM::BI__builtin_arm_yield: + case clang::ARM::BI__yield: Value = 1; break; - case ARM::BI__builtin_arm_wfe: - case ARM::BI__wfe: + case clang::ARM::BI__builtin_arm_wfe: + case clang::ARM::BI__wfe: Value = 2; break; - case ARM::BI__builtin_arm_wfi: - case ARM::BI__wfi: + case clang::ARM::BI__builtin_arm_wfi: + case clang::ARM::BI__wfi: Value = 3; break; - case ARM::BI__builtin_arm_sev: - case ARM::BI__sev: + case clang::ARM::BI__builtin_arm_sev: + case clang::ARM::BI__sev: Value = 4; break; - case ARM::BI__builtin_arm_sevl: - case ARM::BI__sevl: + case clang::ARM::BI__builtin_arm_sevl: + case clang::ARM::BI__sevl: Value = 5; break; } @@ -7498,7 +7462,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (auto Hint = GetValueForARMHint(BuiltinID)) return Hint; - if (BuiltinID == ARM::BI__emit) { + if (BuiltinID == clang::ARM::BI__emit) { bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic=*/false); @@ -7519,12 +7483,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(Emit); } - if (BuiltinID == ARM::BI__builtin_arm_dbg) { + if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) { Value *Option = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); } - if (BuiltinID == ARM::BI__builtin_arm_prefetch) { + if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) { Value *Address = EmitScalarExpr(E->getArg(0)); Value *RW = EmitScalarExpr(E->getArg(1)); Value *IsData = EmitScalarExpr(E->getArg(2)); @@ -7536,23 +7500,23 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } - if (BuiltinID == ARM::BI__builtin_arm_rbit) { + if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } - if (BuiltinID == ARM::BI__builtin_arm_cls) { + if (BuiltinID == clang::ARM::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); } - if (BuiltinID == ARM::BI__builtin_arm_cls64) { + if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, "cls"); } - if (BuiltinID == ARM::BI__clear_cache) { + if (BuiltinID == clang::ARM::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); Value *Ops[2]; @@ -7564,16 +7528,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } - if (BuiltinID == ARM::BI__builtin_arm_mcrr || - BuiltinID == ARM::BI__builtin_arm_mcrr2) { + if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr || + BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) { Function *F; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin"); - case ARM::BI__builtin_arm_mcrr: + case clang::ARM::BI__builtin_arm_mcrr: F = CGM.getIntrinsic(Intrinsic::arm_mcrr); break; - case ARM::BI__builtin_arm_mcrr2: + case clang::ARM::BI__builtin_arm_mcrr2: F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); break; } @@ -7598,16 +7562,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); } - if (BuiltinID == ARM::BI__builtin_arm_mrrc || - BuiltinID == ARM::BI__builtin_arm_mrrc2) { + if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc || + BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) { Function *F; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin"); - case ARM::BI__builtin_arm_mrrc: + case clang::ARM::BI__builtin_arm_mrrc: F = CGM.getIntrinsic(Intrinsic::arm_mrrc); break; - case ARM::BI__builtin_arm_mrrc2: + case clang::ARM::BI__builtin_arm_mrrc2: F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); break; } @@ -7632,21 +7596,21 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); } - if (BuiltinID == ARM::BI__builtin_arm_ldrexd || - ((BuiltinID == ARM::BI__builtin_arm_ldrex || - BuiltinID == ARM::BI__builtin_arm_ldaex) && + if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd || + ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex || + BuiltinID == clang::ARM::BI__builtin_arm_ldaex) && getContext().getTypeSize(E->getType()) == 64) || - BuiltinID == ARM::BI__ldrexd) { + BuiltinID == clang::ARM::BI__ldrexd) { Function *F; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin"); - case ARM::BI__builtin_arm_ldaex: + case clang::ARM::BI__builtin_arm_ldaex: F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); break; - case ARM::BI__builtin_arm_ldrexd: - case ARM::BI__builtin_arm_ldrex: - case ARM::BI__ldrexd: + case clang::ARM::BI__builtin_arm_ldrexd: + case clang::ARM::BI__builtin_arm_ldrex: + case clang::ARM::BI__ldrexd: F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); break; } @@ -7666,46 +7630,49 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateBitCast(Val, ConvertType(E->getType())); } - if (BuiltinID == ARM::BI__builtin_arm_ldrex || - BuiltinID == ARM::BI__builtin_arm_ldaex) { + if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex || + BuiltinID == clang::ARM::BI__builtin_arm_ldaex) { Value *LoadAddr = EmitScalarExpr(E->getArg(0)); QualType Ty = E->getType(); llvm::Type *RealResTy = ConvertType(Ty); - llvm::Type *PtrTy = llvm::IntegerType::get( - getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); + llvm::Type *IntTy = + llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); + llvm::Type *PtrTy = IntTy->getPointerTo(); LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); - Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex - ? Intrinsic::arm_ldaex - : Intrinsic::arm_ldrex, - PtrTy); - Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); + Function *F = CGM.getIntrinsic( + BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex + : Intrinsic::arm_ldrex, + PtrTy); + CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); + Val->addParamAttr( + 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); if (RealResTy->isPointerTy()) return Builder.CreateIntToPtr(Val, RealResTy); else { llvm::Type *IntResTy = llvm::IntegerType::get( getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); - Val = Builder.CreateTruncOrBitCast(Val, IntResTy); - return Builder.CreateBitCast(Val, RealResTy); + return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), + RealResTy); } } - if (BuiltinID == ARM::BI__builtin_arm_strexd || - ((BuiltinID == ARM::BI__builtin_arm_stlex || - BuiltinID == ARM::BI__builtin_arm_strex) && + if (BuiltinID == clang::ARM::BI__builtin_arm_strexd || + ((BuiltinID == clang::ARM::BI__builtin_arm_stlex || + BuiltinID == clang::ARM::BI__builtin_arm_strex) && getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { - Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex - ? Intrinsic::arm_stlexd - : Intrinsic::arm_strexd); + Function *F = CGM.getIntrinsic( + BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd + : Intrinsic::arm_strexd); llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Value *Val = EmitScalarExpr(E->getArg(0)); Builder.CreateStore(Val, Tmp); - Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); + Address LdPtr = Builder.CreateElementBitCast(Tmp, STy); Val = Builder.CreateLoad(LdPtr); Value *Arg0 = Builder.CreateExtractValue(Val, 0); @@ -7714,8 +7681,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); } - if (BuiltinID == ARM::BI__builtin_arm_strex || - BuiltinID == ARM::BI__builtin_arm_stlex) { + if (BuiltinID == clang::ARM::BI__builtin_arm_strex || + BuiltinID == clang::ARM::BI__builtin_arm_stlex) { Value *StoreVal = EmitScalarExpr(E->getArg(0)); Value *StoreAddr = EmitScalarExpr(E->getArg(1)); @@ -7734,14 +7701,18 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); } - Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex - ? Intrinsic::arm_stlex - : Intrinsic::arm_strex, - StoreAddr->getType()); - return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); + Function *F = CGM.getIntrinsic( + BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex + : Intrinsic::arm_strex, + StoreAddr->getType()); + + CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); + CI->addParamAttr( + 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); + return CI; } - if (BuiltinID == ARM::BI__builtin_arm_clrex) { + if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); } @@ -7749,19 +7720,19 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { - case ARM::BI__builtin_arm_crc32b: + case clang::ARM::BI__builtin_arm_crc32b: CRCIntrinsicID = Intrinsic::arm_crc32b; break; - case ARM::BI__builtin_arm_crc32cb: + case clang::ARM::BI__builtin_arm_crc32cb: CRCIntrinsicID = Intrinsic::arm_crc32cb; break; - case ARM::BI__builtin_arm_crc32h: + case clang::ARM::BI__builtin_arm_crc32h: CRCIntrinsicID = Intrinsic::arm_crc32h; break; - case ARM::BI__builtin_arm_crc32ch: + case clang::ARM::BI__builtin_arm_crc32ch: CRCIntrinsicID = Intrinsic::arm_crc32ch; break; - case ARM::BI__builtin_arm_crc32w: - case ARM::BI__builtin_arm_crc32d: + case clang::ARM::BI__builtin_arm_crc32w: + case clang::ARM::BI__builtin_arm_crc32d: CRCIntrinsicID = Intrinsic::arm_crc32w; break; - case ARM::BI__builtin_arm_crc32cw: - case ARM::BI__builtin_arm_crc32cd: + case clang::ARM::BI__builtin_arm_crc32cw: + case clang::ARM::BI__builtin_arm_crc32cd: CRCIntrinsicID = Intrinsic::arm_crc32cw; break; } @@ -7771,8 +7742,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w // intrinsics, hence we need different codegen for these cases. - if (BuiltinID == ARM::BI__builtin_arm_crc32d || - BuiltinID == ARM::BI__builtin_arm_crc32cd) { + if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d || + BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) { Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); Value *Arg1b = Builder.CreateLShr(Arg1, C1); @@ -7789,24 +7760,24 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } - if (BuiltinID == ARM::BI__builtin_arm_rsr || - BuiltinID == ARM::BI__builtin_arm_rsr64 || - BuiltinID == ARM::BI__builtin_arm_rsrp || - BuiltinID == ARM::BI__builtin_arm_wsr || - BuiltinID == ARM::BI__builtin_arm_wsr64 || - BuiltinID == ARM::BI__builtin_arm_wsrp) { + if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || + BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || + BuiltinID == clang::ARM::BI__builtin_arm_rsrp || + BuiltinID == clang::ARM::BI__builtin_arm_wsr || + BuiltinID == clang::ARM::BI__builtin_arm_wsr64 || + BuiltinID == clang::ARM::BI__builtin_arm_wsrp) { SpecialRegisterAccessKind AccessKind = Write; - if (BuiltinID == ARM::BI__builtin_arm_rsr || - BuiltinID == ARM::BI__builtin_arm_rsr64 || - BuiltinID == ARM::BI__builtin_arm_rsrp) + if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || + BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || + BuiltinID == clang::ARM::BI__builtin_arm_rsrp) AccessKind = VolatileRead; - bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || - BuiltinID == ARM::BI__builtin_arm_wsrp; + bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp || + BuiltinID == clang::ARM::BI__builtin_arm_wsrp; - bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || - BuiltinID == ARM::BI__builtin_arm_wsr64; + bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || + BuiltinID == clang::ARM::BI__builtin_arm_wsr64; llvm::Type *ValueType; llvm::Type *RegisterType; @@ -7823,6 +7794,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, AccessKind); } + if (BuiltinID == ARM::BI__builtin_sponentry) { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); + return Builder.CreateCall(F); + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) @@ -7981,10 +7957,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // The ARM _MoveToCoprocessor builtins put the input register value as // the first argument, but the LLVM intrinsic expects it as the third one. - case ARM::BI_MoveToCoprocessor: - case ARM::BI_MoveToCoprocessor2: { - Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? - Intrinsic::arm_mcr : Intrinsic::arm_mcr2); + case clang::ARM::BI_MoveToCoprocessor: + case clang::ARM::BI_MoveToCoprocessor2: { + Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor + ? Intrinsic::arm_mcr + : Intrinsic::arm_mcr2); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], Ops[3], Ops[4], Ops[5]}); } @@ -7997,11 +7974,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (!Result) return nullptr; - if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || - BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { + if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f || + BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) { // Determine the overloaded type of this builtin. llvm::Type *Ty; - if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) + if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f) Ty = FloatTy; else Ty = DoubleTy; @@ -8126,8 +8103,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); + auto St = Builder.CreateStore( + Ops[1], Builder.CreateElementBitCast(PtrOp0, Ops[1]->getType())); return St; } case NEON::BI__builtin_neon_vtbl1_v: @@ -9005,7 +8982,10 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); - Value *Load = Builder.CreateCall(F, {Predicate, BasePtr}); + auto *Load = + cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr})); + auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); + CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) : Builder.CreateSExt(Load, VectorTy); @@ -9033,7 +9013,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); - return Builder.CreateCall(F, {Val, Predicate, BasePtr}); + auto *Store = + cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr})); + auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); + CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); + return Store; } // Limit the usage of scalable llvm IR generated by the ACLE by using the @@ -9427,34 +9411,34 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { - if (BuiltinID >= AArch64::FirstSVEBuiltin && - BuiltinID <= AArch64::LastSVEBuiltin) + if (BuiltinID >= clang::AArch64::FirstSVEBuiltin && + BuiltinID <= clang::AArch64::LastSVEBuiltin) return EmitAArch64SVEBuiltinExpr(BuiltinID, E); unsigned HintID = static_cast<unsigned>(-1); switch (BuiltinID) { default: break; - case AArch64::BI__builtin_arm_nop: + case clang::AArch64::BI__builtin_arm_nop: HintID = 0; break; - case AArch64::BI__builtin_arm_yield: - case AArch64::BI__yield: + case clang::AArch64::BI__builtin_arm_yield: + case clang::AArch64::BI__yield: HintID = 1; break; - case AArch64::BI__builtin_arm_wfe: - case AArch64::BI__wfe: + case clang::AArch64::BI__builtin_arm_wfe: + case clang::AArch64::BI__wfe: HintID = 2; break; - case AArch64::BI__builtin_arm_wfi: - case AArch64::BI__wfi: + case clang::AArch64::BI__builtin_arm_wfi: + case clang::AArch64::BI__wfi: HintID = 3; break; - case AArch64::BI__builtin_arm_sev: - case AArch64::BI__sev: + case clang::AArch64::BI__builtin_arm_sev: + case clang::AArch64::BI__sev: HintID = 4; break; - case AArch64::BI__builtin_arm_sevl: - case AArch64::BI__sevl: + case clang::AArch64::BI__builtin_arm_sevl: + case clang::AArch64::BI__sevl: HintID = 5; break; } @@ -9464,7 +9448,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } - if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_prefetch) { Value *Address = EmitScalarExpr(E->getArg(0)); Value *RW = EmitScalarExpr(E->getArg(1)); Value *CacheLevel = EmitScalarExpr(E->getArg(2)); @@ -9487,14 +9471,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } - if (BuiltinID == AArch64::BI__builtin_arm_rbit) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } - if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) { assert((getContext().getTypeSize(E->getType()) == 64) && "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); @@ -9502,50 +9486,50 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } - if (BuiltinID == AArch64::BI__builtin_arm_cls) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, "cls"); } - if (BuiltinID == AArch64::BI__builtin_arm_cls64) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, "cls"); } - if (BuiltinID == AArch64::BI__builtin_arm_frint32zf || - BuiltinID == AArch64::BI__builtin_arm_frint32z) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32zf || + BuiltinID == clang::AArch64::BI__builtin_arm_frint32z) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty), Arg, "frint32z"); } - if (BuiltinID == AArch64::BI__builtin_arm_frint64zf || - BuiltinID == AArch64::BI__builtin_arm_frint64z) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64zf || + BuiltinID == clang::AArch64::BI__builtin_arm_frint64z) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty), Arg, "frint64z"); } - if (BuiltinID == AArch64::BI__builtin_arm_frint32xf || - BuiltinID == AArch64::BI__builtin_arm_frint32x) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_frint32xf || + BuiltinID == clang::AArch64::BI__builtin_arm_frint32x) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty), Arg, "frint32x"); } - if (BuiltinID == AArch64::BI__builtin_arm_frint64xf || - BuiltinID == AArch64::BI__builtin_arm_frint64x) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_frint64xf || + BuiltinID == clang::AArch64::BI__builtin_arm_frint64x) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty), Arg, "frint64x"); } - if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) { assert((getContext().getTypeSize(E->getType()) == 32) && "__jcvt of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); @@ -9553,14 +9537,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); } - if (BuiltinID == AArch64::BI__builtin_arm_ld64b || - BuiltinID == AArch64::BI__builtin_arm_st64b || - BuiltinID == AArch64::BI__builtin_arm_st64bv || - BuiltinID == AArch64::BI__builtin_arm_st64bv0) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b || + BuiltinID == clang::AArch64::BI__builtin_arm_st64b || + BuiltinID == clang::AArch64::BI__builtin_arm_st64bv || + BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) { llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0)); llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1)); - if (BuiltinID == AArch64::BI__builtin_arm_ld64b) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) { // Load from the address via an LLVM intrinsic, receiving a // tuple of 8 i64 words, and store each one to ValPtr. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b); @@ -9569,7 +9553,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, for (size_t i = 0; i < 8; i++) { llvm::Value *ValOffsetPtr = Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); - Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8)); + Address Addr = + Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr); } return ToRet; @@ -9581,24 +9566,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, for (size_t i = 0; i < 8; i++) { llvm::Value *ValOffsetPtr = Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); - Address Addr(ValOffsetPtr, CharUnits::fromQuantity(8)); + Address Addr = + Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); Args.push_back(Builder.CreateLoad(Addr)); } - auto Intr = (BuiltinID == AArch64::BI__builtin_arm_st64b + auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b ? Intrinsic::aarch64_st64b - : BuiltinID == AArch64::BI__builtin_arm_st64bv - ? Intrinsic::aarch64_st64bv - : Intrinsic::aarch64_st64bv0); + : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv + ? Intrinsic::aarch64_st64bv + : Intrinsic::aarch64_st64bv0); Function *F = CGM.getIntrinsic(Intr); return Builder.CreateCall(F, Args); } } - if (BuiltinID == AArch64::BI__builtin_arm_rndr || - BuiltinID == AArch64::BI__builtin_arm_rndrrs) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr || + BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) { - auto Intr = (BuiltinID == AArch64::BI__builtin_arm_rndr + auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ? Intrinsic::aarch64_rndr : Intrinsic::aarch64_rndrrs); Function *F = CGM.getIntrinsic(Intr); @@ -9612,7 +9598,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Status; } - if (BuiltinID == AArch64::BI__clear_cache) { + if (BuiltinID == clang::AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); Value *Ops[2]; @@ -9624,12 +9610,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } - if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || - BuiltinID == AArch64::BI__builtin_arm_ldaex) && + if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || + BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) && getContext().getTypeSize(E->getType()) == 128) { - Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex - ? Intrinsic::aarch64_ldaxp - : Intrinsic::aarch64_ldxp); + Function *F = + CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex + ? Intrinsic::aarch64_ldaxp + : Intrinsic::aarch64_ldxp); Value *LdPtr = EmitScalarExpr(E->getArg(0)); Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), @@ -9645,43 +9632,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); Val = Builder.CreateOr(Val, Val1); return Builder.CreateBitCast(Val, ConvertType(E->getType())); - } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || - BuiltinID == AArch64::BI__builtin_arm_ldaex) { + } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || + BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) { Value *LoadAddr = EmitScalarExpr(E->getArg(0)); QualType Ty = E->getType(); llvm::Type *RealResTy = ConvertType(Ty); - llvm::Type *PtrTy = llvm::IntegerType::get( - getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); + llvm::Type *IntTy = + llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); + llvm::Type *PtrTy = IntTy->getPointerTo(); LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); - Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex - ? Intrinsic::aarch64_ldaxr - : Intrinsic::aarch64_ldxr, - PtrTy); - Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); + Function *F = + CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex + ? Intrinsic::aarch64_ldaxr + : Intrinsic::aarch64_ldxr, + PtrTy); + CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); + Val->addParamAttr( + 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); if (RealResTy->isPointerTy()) return Builder.CreateIntToPtr(Val, RealResTy); llvm::Type *IntResTy = llvm::IntegerType::get( getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); - Val = Builder.CreateTruncOrBitCast(Val, IntResTy); - return Builder.CreateBitCast(Val, RealResTy); + return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), + RealResTy); } - if ((BuiltinID == AArch64::BI__builtin_arm_strex || - BuiltinID == AArch64::BI__builtin_arm_stlex) && + if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex || + BuiltinID == clang::AArch64::BI__builtin_arm_stlex) && getContext().getTypeSize(E->getArg(0)->getType()) == 128) { - Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex - ? Intrinsic::aarch64_stlxp - : Intrinsic::aarch64_stxp); + Function *F = + CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex + ? Intrinsic::aarch64_stlxp + : Intrinsic::aarch64_stxp); llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); - Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); + Tmp = Builder.CreateElementBitCast(Tmp, STy); llvm::Value *Val = Builder.CreateLoad(Tmp); Value *Arg0 = Builder.CreateExtractValue(Val, 0); @@ -9691,8 +9683,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); } - if (BuiltinID == AArch64::BI__builtin_arm_strex || - BuiltinID == AArch64::BI__builtin_arm_stlex) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_strex || + BuiltinID == clang::AArch64::BI__builtin_arm_stlex) { Value *StoreVal = EmitScalarExpr(E->getArg(0)); Value *StoreAddr = EmitScalarExpr(E->getArg(1)); @@ -9711,14 +9703,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); } - Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex - ? Intrinsic::aarch64_stlxr - : Intrinsic::aarch64_stxr, - StoreAddr->getType()); - return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); + Function *F = + CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex + ? Intrinsic::aarch64_stlxr + : Intrinsic::aarch64_stxr, + StoreAddr->getType()); + CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); + CI->addParamAttr( + 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); + return CI; } - if (BuiltinID == AArch64::BI__getReg) { + if (BuiltinID == clang::AArch64::BI__getReg) { Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); @@ -9736,33 +9732,42 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Metadata); } - if (BuiltinID == AArch64::BI__builtin_arm_clrex) { + if (BuiltinID == clang::AArch64::BI__break) { + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable("Sema will ensure that the parameter is constant"); + + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break); + return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); + } + + if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); return Builder.CreateCall(F); } - if (BuiltinID == AArch64::BI_ReadWriteBarrier) + if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier) return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::SingleThread); // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { - case AArch64::BI__builtin_arm_crc32b: + case clang::AArch64::BI__builtin_arm_crc32b: CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; - case AArch64::BI__builtin_arm_crc32cb: + case clang::AArch64::BI__builtin_arm_crc32cb: CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; - case AArch64::BI__builtin_arm_crc32h: + case clang::AArch64::BI__builtin_arm_crc32h: CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; - case AArch64::BI__builtin_arm_crc32ch: + case clang::AArch64::BI__builtin_arm_crc32ch: CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; - case AArch64::BI__builtin_arm_crc32w: + case clang::AArch64::BI__builtin_arm_crc32w: CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; - case AArch64::BI__builtin_arm_crc32cw: + case clang::AArch64::BI__builtin_arm_crc32cw: CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; - case AArch64::BI__builtin_arm_crc32d: + case clang::AArch64::BI__builtin_arm_crc32d: CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; - case AArch64::BI__builtin_arm_crc32cd: + case clang::AArch64::BI__builtin_arm_crc32cd: CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; } @@ -9792,17 +9797,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { - case AArch64::BI__builtin_arm_irg: + case clang::AArch64::BI__builtin_arm_irg: MTEIntrinsicID = Intrinsic::aarch64_irg; break; - case AArch64::BI__builtin_arm_addg: + case clang::AArch64::BI__builtin_arm_addg: MTEIntrinsicID = Intrinsic::aarch64_addg; break; - case AArch64::BI__builtin_arm_gmi: + case clang::AArch64::BI__builtin_arm_gmi: MTEIntrinsicID = Intrinsic::aarch64_gmi; break; - case AArch64::BI__builtin_arm_ldg: + case clang::AArch64::BI__builtin_arm_ldg: MTEIntrinsicID = Intrinsic::aarch64_ldg; break; - case AArch64::BI__builtin_arm_stg: + case clang::AArch64::BI__builtin_arm_stg: MTEIntrinsicID = Intrinsic::aarch64_stg; break; - case AArch64::BI__builtin_arm_subp: + case clang::AArch64::BI__builtin_arm_subp: MTEIntrinsicID = Intrinsic::aarch64_subp; break; } @@ -9867,24 +9872,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } } - if (BuiltinID == AArch64::BI__builtin_arm_rsr || - BuiltinID == AArch64::BI__builtin_arm_rsr64 || - BuiltinID == AArch64::BI__builtin_arm_rsrp || - BuiltinID == AArch64::BI__builtin_arm_wsr || - BuiltinID == AArch64::BI__builtin_arm_wsr64 || - BuiltinID == AArch64::BI__builtin_arm_wsrp) { + if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || + BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || + BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || + BuiltinID == clang::AArch64::BI__builtin_arm_wsr || + BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 || + BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) { SpecialRegisterAccessKind AccessKind = Write; - if (BuiltinID == AArch64::BI__builtin_arm_rsr || - BuiltinID == AArch64::BI__builtin_arm_rsr64 || - BuiltinID == AArch64::BI__builtin_arm_rsrp) + if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || + BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || + BuiltinID == clang::AArch64::BI__builtin_arm_rsrp) AccessKind = VolatileRead; - bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || - BuiltinID == AArch64::BI__builtin_arm_wsrp; + bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || + BuiltinID == clang::AArch64::BI__builtin_arm_wsrp; - bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && - BuiltinID != AArch64::BI__builtin_arm_wsr; + bool Is64Bit = BuiltinID != clang::AArch64::BI__builtin_arm_rsr && + BuiltinID != clang::AArch64::BI__builtin_arm_wsr; llvm::Type *ValueType; llvm::Type *RegisterType = Int64Ty; @@ -9900,8 +9905,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, AccessKind); } - if (BuiltinID == AArch64::BI_ReadStatusReg || - BuiltinID == AArch64::BI_WriteStatusReg) { + if (BuiltinID == clang::AArch64::BI_ReadStatusReg || + BuiltinID == clang::AArch64::BI_WriteStatusReg) { LLVMContext &Context = CGM.getLLVMContext(); unsigned SysReg = @@ -9922,7 +9927,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Type *RegisterType = Int64Ty; llvm::Type *Types[] = { RegisterType }; - if (BuiltinID == AArch64::BI_ReadStatusReg) { + if (BuiltinID == clang::AArch64::BI_ReadStatusReg) { llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); return Builder.CreateCall(F, Metadata); @@ -9934,22 +9939,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, { Metadata, ArgValue }); } - if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { + if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) { llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } - if (BuiltinID == AArch64::BI__builtin_sponentry) { + if (BuiltinID == clang::AArch64::BI__builtin_sponentry) { llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); return Builder.CreateCall(F); } - if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) { + if (BuiltinID == clang::AArch64::BI__mulh || + BuiltinID == clang::AArch64::BI__umulh) { llvm::Type *ResType = ConvertType(E->getType()); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); - bool IsSigned = BuiltinID == AArch64::BI__mulh; + bool IsSigned = BuiltinID == clang::AArch64::BI__mulh; Value *LHS = Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); Value *RHS = @@ -9968,6 +9974,55 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return HigherBits; } + if (BuiltinID == AArch64::BI__writex18byte || + BuiltinID == AArch64::BI__writex18word || + BuiltinID == AArch64::BI__writex18dword || + BuiltinID == AArch64::BI__writex18qword) { + llvm::Type *IntTy = ConvertType(E->getArg(1)->getType()); + + // Read x18 as i8* + LLVMContext &Context = CGM.getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + llvm::Value *X18 = Builder.CreateCall(F, Metadata); + X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0)); + + // Store val at x18 + offset + Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); + Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); + Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One()); + return Store; + } + + if (BuiltinID == AArch64::BI__readx18byte || + BuiltinID == AArch64::BI__readx18word || + BuiltinID == AArch64::BI__readx18dword || + BuiltinID == AArch64::BI__readx18qword) { + llvm::Type *IntTy = ConvertType(E->getType()); + + // Read x18 as i8* + LLVMContext &Context = CGM.getLLVMContext(); + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + llvm::Value *X18 = Builder.CreateCall(F, Metadata); + X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0)); + + // Load x18 + offset + Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); + Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); + Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0)); + LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One()); + return Load; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) @@ -10299,7 +10354,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + if (P == llvm::FCmpInst::FCMP_OEQ) + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqs_f32: @@ -10319,7 +10377,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); - Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + if (P == llvm::FCmpInst::FCMP_OEQ) + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqh_f16: @@ -10339,7 +10400,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); - Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + if (P == llvm::FCmpInst::FCMP_OEQ) + Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); + else + Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqd_s64: @@ -10684,7 +10748,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, "vgetq_lane"); } - case AArch64::BI_InterlockedAdd: { + case clang::AArch64::BI_InterlockedAdd: { Value *Arg0 = EmitScalarExpr(E->getArg(0)); Value *Arg1 = EmitScalarExpr(E->getArg(1)); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( @@ -12520,13 +12584,6 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } -// Emit binary intrinsic with the same type used in result/args. -static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF, - ArrayRef<Value *> Ops, Intrinsic::ID IID) { - llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); - return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); -} - Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); @@ -14383,12 +14440,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); // Reductions - case X86::BI__builtin_ia32_reduce_add_d512: - case X86::BI__builtin_ia32_reduce_add_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } case X86::BI__builtin_ia32_reduce_fadd_pd512: case X86::BI__builtin_ia32_reduce_fadd_ps512: case X86::BI__builtin_ia32_reduce_fadd_ph512: @@ -14429,12 +14480,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } - case X86::BI__builtin_ia32_reduce_mul_d512: - case X86::BI__builtin_ia32_reduce_mul_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } // 3DNow! case X86::BI__builtin_ia32_pswapdsf: @@ -14871,6 +14916,46 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86Select(*this, Ops[2], Res, Ops[1]); } + case X86::BI__cpuid: + case X86::BI__cpuidex: { + Value *FuncId = EmitScalarExpr(E->getArg(1)); + Value *SubFuncId = BuiltinID == X86::BI__cpuidex + ? EmitScalarExpr(E->getArg(2)) + : llvm::ConstantInt::get(Int32Ty, 0); + + llvm::StructType *CpuidRetTy = + llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty); + llvm::FunctionType *FTy = + llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false); + + StringRef Asm, Constraints; + if (getTarget().getTriple().getArch() == llvm::Triple::x86) { + Asm = "cpuid"; + Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}"; + } else { + // x86-64 uses %rbx as the base register, so preserve it. + Asm = "xchgq %rbx, ${1:q}\n" + "cpuid\n" + "xchgq %rbx, ${1:q}"; + Constraints = "={ax},=r,={cx},={dx},0,2"; + } + + llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, + /*hasSideEffects=*/false); + Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId}); + Value *BasePtr = EmitScalarExpr(E->getArg(0)); + Value *Store = nullptr; + for (unsigned i = 0; i < 4; i++) { + Value *Extracted = Builder.CreateExtractValue(IACall, i); + Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i); + Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign()); + } + + // Return the last store instruction to signal that we have emitted the + // the intrinsic. + return Store; + } + case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); @@ -14980,34 +15065,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Load->setVolatile(true); return Load; } - case X86::BI__builtin_ia32_paddsb512: - case X86::BI__builtin_ia32_paddsw512: - case X86::BI__builtin_ia32_paddsb256: - case X86::BI__builtin_ia32_paddsw256: - case X86::BI__builtin_ia32_paddsb128: - case X86::BI__builtin_ia32_paddsw128: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat); - case X86::BI__builtin_ia32_paddusb512: - case X86::BI__builtin_ia32_paddusw512: - case X86::BI__builtin_ia32_paddusb256: - case X86::BI__builtin_ia32_paddusw256: - case X86::BI__builtin_ia32_paddusb128: - case X86::BI__builtin_ia32_paddusw128: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat); - case X86::BI__builtin_ia32_psubsb512: - case X86::BI__builtin_ia32_psubsw512: - case X86::BI__builtin_ia32_psubsb256: - case X86::BI__builtin_ia32_psubsw256: - case X86::BI__builtin_ia32_psubsb128: - case X86::BI__builtin_ia32_psubsw128: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat); - case X86::BI__builtin_ia32_psubusb512: - case X86::BI__builtin_ia32_psubusw512: - case X86::BI__builtin_ia32_psubusb256: - case X86::BI__builtin_ia32_psubusw256: - case X86::BI__builtin_ia32_psubusb128: - case X86::BI__builtin_ia32_psubusw128: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); case X86::BI__builtin_ia32_encodekey128_u32: { Intrinsic::ID IID = Intrinsic::x86_encodekey128; @@ -15189,14 +15246,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - SmallVector<Value*, 4> Ops; - - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - if (E->getArg(i)->getType()->isArrayType()) - Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); - else - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } + // Do not emit the builtin arguments in the arguments of a function call, + // because the evaluation order of function arguments is not specified in C++. + // This is important when testing to ensure the arguments are emitted in the + // same order every time. Eg: + // Instead of: + // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), + // EmitScalarExpr(E->getArg(1)), "swdiv"); + // Use: + // Value *Op0 = EmitScalarExpr(E->getArg(0)); + // Value *Op1 = EmitScalarExpr(E->getArg(1)); + // return Builder.CreateFDiv(Op0, Op1, "swdiv") Intrinsic::ID ID = Intrinsic::not_intrinsic; @@ -15223,6 +15283,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_lxvl: case PPC::BI__builtin_vsx_lxvll: { + SmallVector<Value *, 2> Ops; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops.push_back(EmitScalarExpr(E->getArg(1))); if(BuiltinID == PPC::BI__builtin_vsx_lxvl || BuiltinID == PPC::BI__builtin_vsx_lxvll){ Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); @@ -15291,6 +15354,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_stxvl: case PPC::BI__builtin_vsx_stxvll: { + SmallVector<Value *, 3> Ops; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Ops.push_back(EmitScalarExpr(E->getArg(2))); if(BuiltinID == PPC::BI__builtin_vsx_stxvl || BuiltinID == PPC::BI__builtin_vsx_stxvll ){ Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); @@ -15343,13 +15410,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Essentially boils down to performing an unaligned VMX load sequence so // as to avoid crossing a page boundary and then shuffling the elements // into the right side of the vector register. - int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue(); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); llvm::Type *ResTy = ConvertType(E->getType()); bool IsLE = getTarget().isLittleEndian(); // If the user wants the entire vector, just load the entire vector. if (NumBytes == 16) { - Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo()); + Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo()); Value *LD = Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1))); if (!IsLE) @@ -15367,16 +15436,14 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, : Intrinsic::ppc_altivec_lvsl); llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); Value *HiMem = Builder.CreateGEP( - Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1)); - Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo"); + Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); + Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); - Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1"); + Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); - Ops.clear(); - Ops.push_back(IsLE ? HiLd : LoLd); - Ops.push_back(IsLE ? LoLd : HiLd); - Ops.push_back(Mask1); - Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1"); + Op0 = IsLE ? HiLd : LoLd; + Op1 = IsLE ? LoLd : HiLd; + Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); if (IsLE) { @@ -15397,23 +15464,25 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); } case PPC::BI__builtin_vsx_strmb: { - int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue(); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); bool IsLE = getTarget().isLittleEndian(); auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { // Storing the whole vector, simply store it on BE and reverse bytes and // store on LE. if (Width == 16) { - Value *BC = - Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo()); - Value *StVec = Ops[2]; + Value *BC = Builder.CreateBitCast(Op0, Op2->getType()->getPointerTo()); + Value *StVec = Op2; if (IsLE) { SmallVector<int, 16> RevMask; for (int Idx = 0; Idx < 16; Idx++) RevMask.push_back(15 - Idx); - StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask); + StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); } return Builder.CreateStore( - StVec, Address(BC, Ops[2]->getType(), CharUnits::fromQuantity(1))); + StVec, Address(BC, Op2->getType(), CharUnits::fromQuantity(1))); } auto *ConvTy = Int64Ty; unsigned NumElts = 0; @@ -15438,9 +15507,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, break; } Value *Vec = Builder.CreateBitCast( - Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts)); - Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0], - ConstantInt::get(Int64Ty, Offset)); + Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); + Value *Ptr = + Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo()); Value *Elt = Builder.CreateExtractElement(Vec, EltNo); if (IsLE && Width > 1) { @@ -15512,62 +15581,65 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } - case PPC::BI__builtin_altivec_vec_replace_elt: - case PPC::BI__builtin_altivec_vec_replace_unaligned: { - // The third argument of vec_replace_elt and vec_replace_unaligned must - // be a compile time constant and will be emitted either to the vinsw - // or vinsd instruction. - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + case PPC::BI__builtin_altivec_vinsd: + case PPC::BI__builtin_altivec_vinsw: + case PPC::BI__builtin_altivec_vinsd_elt: + case PPC::BI__builtin_altivec_vinsw_elt: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + + bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || + BuiltinID == PPC::BI__builtin_altivec_vinsd); + + bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || + BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); + + // The third argument must be a compile time constant. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); assert(ArgCI && "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); - llvm::Type *ResultType = ConvertType(E->getType()); - llvm::Function *F = nullptr; - Value *Call = nullptr; + + // Valid value for the third argument is dependent on the input type and + // builtin called. + int ValidMaxValue = 0; + if (IsUnaligned) + ValidMaxValue = (Is32bit) ? 12 : 8; + else + ValidMaxValue = (Is32bit) ? 3 : 1; + + // Get value of third argument. int64_t ConstArg = ArgCI->getSExtValue(); - unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits(); - bool Is32Bit = false; - assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width"); - // The input to vec_replace_elt is an element index, not a byte index. - if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt) - ConstArg *= ArgWidth / 8; - if (ArgWidth == 32) { - Is32Bit = true; - // When the second argument is 32 bits, it can either be an integer or - // a float. The vinsw intrinsic is used in this case. - F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw); + + // Compose range checking error message. + std::string RangeErrMsg = IsUnaligned ? "byte" : "element"; + RangeErrMsg += " number " + llvm::to_string(ConstArg); + RangeErrMsg += " is outside of the valid range [0, "; + RangeErrMsg += llvm::to_string(ValidMaxValue) + "]"; + + // Issue error if third argument is not within the valid range. + if (ConstArg < 0 || ConstArg > ValidMaxValue) + CGM.Error(E->getExprLoc(), RangeErrMsg); + + // Input to vec_replace_elt is an element index, convert to byte index. + if (!IsUnaligned) { + ConstArg *= Is32bit ? 4 : 8; // Fix the constant according to endianess. if (getTarget().isLittleEndian()) - ConstArg = 12 - ConstArg; - } else { - // When the second argument is 64 bits, it can either be a long long or - // a double. The vinsd intrinsic is used in this case. - F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd); - // Fix the constant for little endian. - if (getTarget().isLittleEndian()) - ConstArg = 8 - ConstArg; - } - Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg); - // Depending on ArgWidth, the input vector could be a float or a double. - // If the input vector is a float type, bitcast the inputs to integers. Or, - // if the input vector is a double, bitcast the inputs to 64-bit integers. - if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) { - Ops[0] = Builder.CreateBitCast( - Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4) - : llvm::FixedVectorType::get(Int64Ty, 2)); - Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty); + ConstArg = (Is32bit ? 12 : 8) - ConstArg; } - // Emit the call to vinsw or vinsd. - Call = Builder.CreateCall(F, Ops); - // Depending on the builtin, bitcast to the approriate result type. - if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt && - !Ops[1]->getType()->isIntegerTy()) - return Builder.CreateBitCast(Call, ResultType); - else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt && - Ops[1]->getType()->isIntegerTy()) - return Call; - else - return Builder.CreateBitCast(Call, - llvm::FixedVectorType::get(Int8Ty, 16)); + + ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; + Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); + // Casting input to vector int as per intrinsic definition. + Op0 = + Is32bit + ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)) + : Builder.CreateBitCast(Op0, + llvm::FixedVectorType::get(Int64Ty, 2)); + return Builder.CreateBitCast( + Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType); } case PPC::BI__builtin_altivec_vpopcntb: case PPC::BI__builtin_altivec_vpopcnth: @@ -15580,15 +15652,60 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } case PPC::BI__builtin_altivec_vadduqm: case PPC::BI__builtin_altivec_vsubuqm: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int128Ty, 1)); - Ops[1] = - Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int128Ty, 1)); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) - return Builder.CreateAdd(Ops[0], Ops[1], "vadduqm"); + return Builder.CreateAdd(Op0, Op1, "vadduqm"); else - return Builder.CreateSub(Ops[0], Ops[1], "vsubuqm"); + return Builder.CreateSub(Op0, Op1, "vsubuqm"); + } + case PPC::BI__builtin_altivec_vaddcuq_c: + case PPC::BI__builtin_altivec_vsubcuq_c: { + SmallVector<Value *, 2> Ops; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *V1I128Ty = llvm::FixedVectorType::get( + llvm::IntegerType::get(getLLVMContext(), 128), 1); + Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); + Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); + ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) + ? Intrinsic::ppc_altivec_vaddcuq + : Intrinsic::ppc_altivec_vsubcuq; + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); + } + case PPC::BI__builtin_altivec_vaddeuqm_c: + case PPC::BI__builtin_altivec_vaddecuq_c: + case PPC::BI__builtin_altivec_vsubeuqm_c: + case PPC::BI__builtin_altivec_vsubecuq_c: { + SmallVector<Value *, 3> Ops; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + llvm::Type *V1I128Ty = llvm::FixedVectorType::get( + llvm::IntegerType::get(getLLVMContext(), 128), 1); + Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); + Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); + Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty)); + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case PPC::BI__builtin_altivec_vaddeuqm_c: + ID = Intrinsic::ppc_altivec_vaddeuqm; + break; + case PPC::BI__builtin_altivec_vaddecuq_c: + ID = Intrinsic::ppc_altivec_vaddecuq; + break; + case PPC::BI__builtin_altivec_vsubeuqm_c: + ID = Intrinsic::ppc_altivec_vsubeuqm; + break; + case PPC::BI__builtin_altivec_vsubecuq_c: + ID = Intrinsic::ppc_altivec_vsubecuq; + break; + } + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } // Rotate and insert under mask operation. // __rldimi(rs, is, shift, mask) @@ -15597,29 +15714,37 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // (rotl(rs, shift) & mask) | (is & ~mask) case PPC::BI__builtin_ppc_rldimi: case PPC::BI__builtin_ppc_rlwimi: { - llvm::Type *Ty = Ops[0]->getType(); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + llvm::Type *Ty = Op0->getType(); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); if (BuiltinID == PPC::BI__builtin_ppc_rldimi) - Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); - Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[2]}); - Value *X = Builder.CreateAnd(Shift, Ops[3]); - Value *Y = Builder.CreateAnd(Ops[1], Builder.CreateNot(Ops[3])); + Op2 = Builder.CreateZExt(Op2, Int64Ty); + Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); + Value *X = Builder.CreateAnd(Shift, Op3); + Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3)); return Builder.CreateOr(X, Y); } // Rotate and insert under mask operation. // __rlwnm(rs, shift, mask) // rotl(rs, shift) & mask case PPC::BI__builtin_ppc_rlwnm: { - llvm::Type *Ty = Ops[0]->getType(); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + llvm::Type *Ty = Op0->getType(); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[1]}); - return Builder.CreateAnd(Shift, Ops[2]); + Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1}); + return Builder.CreateAnd(Shift, Op2); } case PPC::BI__builtin_ppc_poppar4: case PPC::BI__builtin_ppc_poppar8: { - llvm::Type *ArgType = Ops[0]->getType(); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Op0->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); - Value *Tmp = Builder.CreateCall(F, Ops[0]); + Value *Tmp = Builder.CreateCall(F, Op0); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); @@ -15629,10 +15754,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Result; } case PPC::BI__builtin_ppc_cmpb: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); if (getTarget().getTriple().isPPC64()) { Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); - return Builder.CreateCall(F, Ops, "cmpb"); + return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); } // For 32 bit, emit the code as below: // %conv = trunc i64 %a to i32 @@ -15650,13 +15777,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // ret i64 %or Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); - Value *ArgOneLo = Builder.CreateTrunc(Ops[0], Int32Ty); - Value *ArgTwoLo = Builder.CreateTrunc(Ops[1], Int32Ty); + Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); + Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); Value *ArgOneHi = - Builder.CreateTrunc(Builder.CreateLShr(Ops[0], ShiftAmt), Int32Ty); + Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); Value *ArgTwoHi = - Builder.CreateTrunc(Builder.CreateLShr(Ops[1], ShiftAmt), Int32Ty); + Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); Value *ResLo = Builder.CreateZExt( Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); Value *ResHiShift = Builder.CreateZExt( @@ -15750,30 +15877,37 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return FDiv; } case PPC::BI__builtin_ppc_alignx: { - ConstantInt *AlignmentCI = cast<ConstantInt>(Ops[0]); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + ConstantInt *AlignmentCI = cast<ConstantInt>(Op0); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) AlignmentCI = ConstantInt::get(AlignmentCI->getType(), llvm::Value::MaximumAlignment); - emitAlignmentAssumption(Ops[1], E->getArg(1), + emitAlignmentAssumption(Op1, E->getArg(1), /*The expr loc is sufficient.*/ SourceLocation(), AlignmentCI, nullptr); - return Ops[1]; + return Op1; } case PPC::BI__builtin_ppc_rdlam: { - llvm::Type *Ty = Ops[0]->getType(); - Value *ShiftAmt = Builder.CreateIntCast(Ops[1], Ty, false); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + llvm::Type *Ty = Op0->getType(); + Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); - Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt}); - return Builder.CreateAnd(Rotate, Ops[2]); + Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); + return Builder.CreateAnd(Rotate, Op2); } case PPC::BI__builtin_ppc_load2r: { Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); - Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); - Value *LoadIntrinsic = Builder.CreateCall(F, Ops); + Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); + Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } // FMA variations + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: case PPC::BI__builtin_vsx_xvnmaddadp: @@ -15812,6 +15946,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, F, {X, Y, Builder.CreateFNeg(Z, "neg")}); else return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: if (Builder.getIsFPConstrained()) @@ -15820,20 +15956,22 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, F, {X, Y, Builder.CreateFNeg(Z, "neg")}), "neg"); else - return Builder.CreateFNeg( - Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), - "neg"); - } + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); + } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } case PPC::BI__builtin_vsx_insertword: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); // Third argument is a compile time constant int. It must be clamped to // to the range [0, 12]. - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); assert(ArgCI && "Third arg to xxinsertw intrinsic must be constant integer"); const int64_t MaxIndex = 12; @@ -15844,40 +15982,38 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // word from the first argument, and inserts it in the second argument. The // instruction extracts the word from its second input register and inserts // it into its first input register, so swap the first and second arguments. - std::swap(Ops[0], Ops[1]); + std::swap(Op0, Op1); // Need to cast the second argument from a vector of unsigned int to a // vector of long long. - Ops[1] = - Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); if (getTarget().isLittleEndian()) { // Reverse the double words in the vector we will extract from. - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); - Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0}); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); + Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0}); // Reverse the index. Index = MaxIndex - Index; } // Intrinsic expects the first arg to be a vector of int. - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); - Ops[2] = ConstantInt::getSigned(Int32Ty, Index); - return Builder.CreateCall(F, Ops); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); + Op2 = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, {Op0, Op1, Op2}); } case PPC::BI__builtin_vsx_extractuword: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); // Intrinsic expects the first argument to be a vector of doublewords. - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); // The second argument is a compile time constant int that needs to // be clamped to the range [0, 12]. - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1); assert(ArgCI && "Second Arg to xxextractuw intrinsic must be a constant integer!"); const int64_t MaxIndex = 12; @@ -15886,29 +16022,30 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, if (getTarget().isLittleEndian()) { // Reverse the index. Index = MaxIndex - Index; - Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + Op1 = ConstantInt::getSigned(Int32Ty, Index); // Emit the call, then reverse the double words of the results vector. - Value *Call = Builder.CreateCall(F, Ops); + Value *Call = Builder.CreateCall(F, {Op0, Op1}); Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0}); return ShuffleCall; } else { - Ops[1] = ConstantInt::getSigned(Int32Ty, Index); - return Builder.CreateCall(F, Ops); + Op1 = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, {Op0, Op1}); } } case PPC::BI__builtin_vsx_xxpermdi: { - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); assert(ArgCI && "Third arg must be constant integer!"); unsigned Index = ArgCI->getZExtValue(); - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); - Ops[1] = - Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2)); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); // Account for endianness by treating this as just a shuffle. So we use the // same indices for both LE and BE in order to produce expected results in @@ -15917,21 +16054,21 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, int ElemIdx1 = 2 + (Index & 1); int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; - Value *ShuffleCall = - Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); + Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_vsx_xxsldwi: { - ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); assert(ArgCI && "Third argument must be a compile time constant"); unsigned Index = ArgCI->getZExtValue() & 0x3; - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); - Ops[1] = - Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4)); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); // Create a shuffle mask int ElemIdx0; @@ -15955,28 +16092,31 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; - Value *ShuffleCall = - Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); + Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_pack_vector_int128: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); bool isLittleEndian = getTarget().isLittleEndian(); Value *UndefValue = - llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2)); + llvm::UndefValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); Value *Res = Builder.CreateInsertElement( - UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); - Res = Builder.CreateInsertElement(Res, Ops[1], + UndefValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); + Res = Builder.CreateInsertElement(Res, Op1, (uint64_t)(isLittleEndian ? 0 : 1)); return Builder.CreateBitCast(Res, ConvertType(E->getType())); } case PPC::BI__builtin_unpack_vector_int128: { - ConstantInt *Index = cast<ConstantInt>(Ops[1]); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + ConstantInt *Index = cast<ConstantInt>(Op1); Value *Unpacked = Builder.CreateBitCast( - Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); + Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); if (getTarget().isLittleEndian()) Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); @@ -15986,9 +16126,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_ppc_sthcx: { llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); - Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); - Ops[1] = Builder.CreateSExt(Ops[1], Int32Ty); - return Builder.CreateCall(F, Ops); + Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); + Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); + return Builder.CreateCall(F, {Op0, Op1}); } // The PPC MMA builtins take a pointer to a __vector_quad as an argument. @@ -16001,6 +16141,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_##Name: #include "clang/Basic/BuiltinsPPC.def" { + SmallVector<Value *, 4> Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) + if (E->getArg(i)->getType()->isArrayType()) + Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer()); + else + Ops.push_back(EmitScalarExpr(E->getArg(i))); // The first argument of these two builtins is a pointer used to store their // result. However, the llvm intrinsics return their result in multiple // return values. So, here we emit code extracting these values from the @@ -16084,8 +16230,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *OldVal = Builder.CreateLoad(OldValAddr); QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); LValue LV = MakeAddrLValue(Addr, AtomicTy); + Value *Op2 = EmitScalarExpr(E->getArg(2)); auto Pair = EmitAtomicCompareExchange( - LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(), + LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); // Unlike c11's atomic_compare_exchange, accroding to // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp @@ -16125,38 +16272,45 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_ppc_lbarx: return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E); case PPC::BI__builtin_ppc_mfspr: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); - return Builder.CreateCall(F, Ops); + return Builder.CreateCall(F, {Op0}); } case PPC::BI__builtin_ppc_mtspr: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); - return Builder.CreateCall(F, Ops); + return Builder.CreateCall(F, {Op0, Op1}); } case PPC::BI__builtin_ppc_popcntb: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); - return Builder.CreateCall(F, Ops, "popcntb"); + return Builder.CreateCall(F, {ArgValue}, "popcntb"); } case PPC::BI__builtin_ppc_mtfsf: { // The builtin takes a uint32 that needs to be cast to an // f64 to be passed to the intrinsic. - Value *Cast = Builder.CreateUIToFP(Ops[1], DoubleTy); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); - return Builder.CreateCall(F, {Ops[0], Cast}, ""); + return Builder.CreateCall(F, {Op0, Cast}, ""); } case PPC::BI__builtin_ppc_swdiv_nochk: case PPC::BI__builtin_ppc_swdivs_nochk: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); FastMathFlags FMF = Builder.getFastMathFlags(); Builder.getFastMathFlags().setFast(); - Value *FDiv = Builder.CreateFDiv(Ops[0], Ops[1], "swdiv_nochk"); + Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk"); Builder.getFastMathFlags() &= (FMF); return FDiv; } @@ -16196,7 +16350,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Intrinsic::experimental_constrained_sqrt)) .getScalarVal(); case PPC::BI__builtin_ppc_test_data_class: { - llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType(); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *ArgType = Op0->getType(); unsigned IntrinsicID; if (ArgType->isDoubleTy()) IntrinsicID = Intrinsic::ppc_test_data_class_d; @@ -16204,12 +16360,63 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, IntrinsicID = Intrinsic::ppc_test_data_class_f; else llvm_unreachable("Invalid Argument Type"); - return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops, + return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), {Op0, Op1}, "test_data_class"); } + case PPC::BI__builtin_ppc_maxfe: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_maxfl: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_maxfs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_minfe: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_minfl: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_minfs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), + {Op0, Op1, Op2, Op3}); + } case PPC::BI__builtin_ppc_swdiv: - case PPC::BI__builtin_ppc_swdivs: - return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv"); + case PPC::BI__builtin_ppc_swdivs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateFDiv(Op0, Op1, "swdiv"); + } } } @@ -16232,12 +16439,31 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); } +Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { + auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr); + auto *Call = CGF.Builder.CreateCall(F); + Call->addRetAttr( + Attribute::getWithDereferenceableBytes(Call->getContext(), 256)); + Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8))); + return Call; +} + // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { - const unsigned XOffset = 4; - auto *DP = EmitAMDGPUDispatchPtr(CGF); - // Indexing the HSA kernel_dispatch_packet struct. - auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2); + bool IsCOV_5 = CGF.getTarget().getTargetOpts().CodeObjectVersion == + clang::TargetOptions::COV_5; + Constant *Offset; + Value *DP; + if (IsCOV_5) { + // Indexing the implicit kernarg segment. + Offset = llvm::ConstantInt::get(CGF.Int32Ty, 12 + Index * 2); + DP = EmitAMDGPUImplicitArgPtr(CGF); + } else { + // Indexing the HSA kernel_dispatch_packet struct. + Offset = llvm::ConstantInt::get(CGF.Int32Ty, 4 + Index * 2); + DP = EmitAMDGPUDispatchPtr(CGF); + } + auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); auto *DstTy = CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace()); @@ -16506,7 +16732,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: - case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: { + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { @@ -16537,6 +16765,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: + ArgTy = llvm::Type::getFloatTy(getLLVMContext()); + IID = Intrinsic::amdgcn_flat_atomic_fadd; + break; + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: + ArgTy = llvm::FixedVectorType::get( + llvm::Type::getHalfTy(getLLVMContext()), 2); + IID = Intrinsic::amdgcn_flat_atomic_fadd; + break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); @@ -16544,6 +16781,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { + Intrinsic::ID IID; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: + IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; + break; + case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: + IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; + break; + } + llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); + return Builder.CreateCall(F, {Addr, Val}); + } case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: { Intrinsic::ID IID; @@ -16608,6 +16861,69 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, RayInverseDir, TextureDescr}); } + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: { + + // These operations perform a matrix multiplication and accumulation of + // the form: + // D = A * B + C + // The return type always matches the type of matrix C. + unsigned ArgForMatchingRetType; + unsigned BuiltinWMMAOp; + + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: + ArgForMatchingRetType = 2; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: + ArgForMatchingRetType = 4; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: + ArgForMatchingRetType = 4; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4; + break; + } + + SmallVector<Value *, 6> Args; + for (int i = 0, e = E->getNumArgs(); i != e; ++i) + Args.push_back(EmitScalarExpr(E->getArg(i))); + + Function *F = CGM.getIntrinsic(BuiltinWMMAOp, + {Args[ArgForMatchingRetType]->getType()}); + + return Builder.CreateCall(F, Args); + } + // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); @@ -17411,18 +17727,19 @@ Value * CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); - clang::CharUnits Align = - CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); + QualType ArgType = E->getArg(0)->getType(); + clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(ArgType); + llvm::Type *ElemTy = ConvertTypeForMem(ArgType->getPointeeType()); return Builder.CreateCall( - CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), - Ptr->getType()}), + CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); }; auto MakeScopedAtomic = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); + llvm::Type *ElemTy = + ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); return Builder.CreateCall( - CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), - Ptr->getType()}), + CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1))}); }; switch (BuiltinID) { @@ -17628,20 +17945,22 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { case NVPTX::BI__nvvm_atom_cta_cas_gen_l: case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { Value *Ptr = EmitScalarExpr(E->getArg(0)); + llvm::Type *ElemTy = + ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); return Builder.CreateCall( CGM.getIntrinsic( - Intrinsic::nvvm_atomic_cas_gen_i_cta, - {Ptr->getType()->getPointerElementType(), Ptr->getType()}), + Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } case NVPTX::BI__nvvm_atom_sys_cas_gen_i: case NVPTX::BI__nvvm_atom_sys_cas_gen_l: case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { Value *Ptr = EmitScalarExpr(E->getArg(0)); + llvm::Type *ElemTy = + ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); return Builder.CreateCall( CGM.getIntrinsic( - Intrinsic::nvvm_atomic_cas_gen_i_sys, - {Ptr->getType()->getPointerElementType(), Ptr->getType()}), + Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } case NVPTX::BI__nvvm_match_all_sync_i32p: @@ -17936,7 +18255,7 @@ RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the -/// llvm.ptrmask instrinsic (with a GEP before in the align_up case). +/// llvm.ptrmask intrinsic (with a GEP before in the align_up case). /// TODO: actually use ptrmask once most optimization passes know about it. RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { BuiltinAlignArgs Args(E, *this); @@ -18368,15 +18687,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); return Builder.CreateCall(Callee, {Low, High}); } - case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4: - case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: { + case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: + case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); unsigned IntNo; switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4: + case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: IntNo = Intrinsic::fptosi_sat; break; - case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: + case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: IntNo = Intrinsic::fptoui_sat; break; default: @@ -18467,8 +18786,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: - case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2: - case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: { + case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: + case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); unsigned IntNo; switch (BuiltinID) { @@ -18478,11 +18797,11 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: IntNo = Intrinsic::wasm_relaxed_trunc_unsigned; break; - case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2: - IntNo = Intrinsic::wasm_relaxed_trunc_zero_signed; + case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: + IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero; break; - case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: - IntNo = Intrinsic::wasm_relaxed_trunc_zero_unsigned; + case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: + IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero; break; default: llvm_unreachable("unexpected builtin ID"); @@ -18490,13 +18809,33 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo); return Builder.CreateCall(Callee, {Vec}); } + case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_s_i16x8: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_signed); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_dot_i8x16_i7x16_add_s_i32x4: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Acc = EmitScalarExpr(E->getArg(2)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_dot_i8x16_i7x16_add_signed); + return Builder.CreateCall(Callee, {LHS, RHS, Acc}); + } default: return nullptr; } } static std::pair<Intrinsic::ID, unsigned> -getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) { +getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) { struct Info { unsigned BuiltinID; Intrinsic::ID IntrinsicID; @@ -18556,7 +18895,7 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Intrinsic::ID ID; unsigned VecLen; - std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID); + std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID); auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { // The base pointer is passed by address, so it needs to be loaded. @@ -18733,8 +19072,34 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, SmallVector<Value *, 4> Ops; llvm::Type *ResultType = ConvertType(E->getType()); - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); + // Find out if any arguments are required to be integer constant expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + if (Error == ASTContext::GE_Missing_type) { + // Vector intrinsics don't have a type string. + assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin && + BuiltinID <= clang::RISCV::LastRVVBuiltin); + ICEArguments = 0; + if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v || + BuiltinID == RISCVVector::BI__builtin_rvv_vset_v) + ICEArguments = 1 << 1; + } else { + assert(Error == ASTContext::GE_None && "Unexpected error"); + } + + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + // If this is a normal argument, just emit it as a scalar. + if ((ICEArguments & (1 << i)) == 0) { + Ops.push_back(EmitScalarExpr(E->getArg(i))); + continue; + } + + // If this is required to be a constant, constant fold it so that we know + // that the generated intrinsic gets a ConstantInt. + Ops.push_back(llvm::ConstantInt::get( + getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + } Intrinsic::ID ID = Intrinsic::not_intrinsic; unsigned NF = 1; @@ -18746,6 +19111,10 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, default: llvm_unreachable("unexpected builtin ID"); case RISCV::BI__builtin_riscv_orc_b_32: case RISCV::BI__builtin_riscv_orc_b_64: + case RISCV::BI__builtin_riscv_clz_32: + case RISCV::BI__builtin_riscv_clz_64: + case RISCV::BI__builtin_riscv_ctz_32: + case RISCV::BI__builtin_riscv_ctz_64: case RISCV::BI__builtin_riscv_clmul: case RISCV::BI__builtin_riscv_clmulh: case RISCV::BI__builtin_riscv_clmulr: @@ -18763,6 +19132,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_shfl_64: case RISCV::BI__builtin_riscv_unshfl_32: case RISCV::BI__builtin_riscv_unshfl_64: + case RISCV::BI__builtin_riscv_xperm4: + case RISCV::BI__builtin_riscv_xperm8: case RISCV::BI__builtin_riscv_xperm_n: case RISCV::BI__builtin_riscv_xperm_b: case RISCV::BI__builtin_riscv_xperm_h: @@ -18778,7 +19149,10 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_fsl_32: case RISCV::BI__builtin_riscv_fsr_32: case RISCV::BI__builtin_riscv_fsl_64: - case RISCV::BI__builtin_riscv_fsr_64: { + case RISCV::BI__builtin_riscv_fsr_64: + case RISCV::BI__builtin_riscv_brev8: + case RISCV::BI__builtin_riscv_zip_32: + case RISCV::BI__builtin_riscv_unzip_32: { switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); // Zbb @@ -18786,6 +19160,16 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_orc_b_64: ID = Intrinsic::riscv_orc_b; break; + case RISCV::BI__builtin_riscv_clz_32: + case RISCV::BI__builtin_riscv_clz_64: { + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } + case RISCV::BI__builtin_riscv_ctz_32: + case RISCV::BI__builtin_riscv_ctz_64: { + Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } // Zbc case RISCV::BI__builtin_riscv_clmul: @@ -18879,11 +19263,140 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_fsr_64: ID = Intrinsic::riscv_fsr; break; + + // Zbkx + case RISCV::BI__builtin_riscv_xperm8: + ID = Intrinsic::riscv_xperm8; + break; + case RISCV::BI__builtin_riscv_xperm4: + ID = Intrinsic::riscv_xperm4; + break; + + // Zbkb + case RISCV::BI__builtin_riscv_brev8: + ID = Intrinsic::riscv_brev8; + break; + case RISCV::BI__builtin_riscv_zip_32: + ID = Intrinsic::riscv_zip; + break; + case RISCV::BI__builtin_riscv_unzip_32: + ID = Intrinsic::riscv_unzip; + break; } IntrinsicTypes = {ResultType}; break; } + + // Zk builtins + + // Zknd + case RISCV::BI__builtin_riscv_aes32dsi_32: + ID = Intrinsic::riscv_aes32dsi; + break; + case RISCV::BI__builtin_riscv_aes32dsmi_32: + ID = Intrinsic::riscv_aes32dsmi; + break; + case RISCV::BI__builtin_riscv_aes64ds_64: + ID = Intrinsic::riscv_aes64ds; + break; + case RISCV::BI__builtin_riscv_aes64dsm_64: + ID = Intrinsic::riscv_aes64dsm; + break; + case RISCV::BI__builtin_riscv_aes64im_64: + ID = Intrinsic::riscv_aes64im; + break; + + // Zkne + case RISCV::BI__builtin_riscv_aes32esi_32: + ID = Intrinsic::riscv_aes32esi; + break; + case RISCV::BI__builtin_riscv_aes32esmi_32: + ID = Intrinsic::riscv_aes32esmi; + break; + case RISCV::BI__builtin_riscv_aes64es_64: + ID = Intrinsic::riscv_aes64es; + break; + case RISCV::BI__builtin_riscv_aes64esm_64: + ID = Intrinsic::riscv_aes64esm; + break; + + // Zknd & Zkne + case RISCV::BI__builtin_riscv_aes64ks1i_64: + ID = Intrinsic::riscv_aes64ks1i; + break; + case RISCV::BI__builtin_riscv_aes64ks2_64: + ID = Intrinsic::riscv_aes64ks2; + break; + + // Zknh + case RISCV::BI__builtin_riscv_sha256sig0: + ID = Intrinsic::riscv_sha256sig0; + IntrinsicTypes = {ResultType}; + break; + case RISCV::BI__builtin_riscv_sha256sig1: + ID = Intrinsic::riscv_sha256sig1; + IntrinsicTypes = {ResultType}; + break; + case RISCV::BI__builtin_riscv_sha256sum0: + ID = Intrinsic::riscv_sha256sum0; + IntrinsicTypes = {ResultType}; + break; + case RISCV::BI__builtin_riscv_sha256sum1: + ID = Intrinsic::riscv_sha256sum1; + IntrinsicTypes = {ResultType}; + break; + case RISCV::BI__builtin_riscv_sha512sig0_64: + ID = Intrinsic::riscv_sha512sig0; + break; + case RISCV::BI__builtin_riscv_sha512sig0h_32: + ID = Intrinsic::riscv_sha512sig0h; + break; + case RISCV::BI__builtin_riscv_sha512sig0l_32: + ID = Intrinsic::riscv_sha512sig0l; + break; + case RISCV::BI__builtin_riscv_sha512sig1_64: + ID = Intrinsic::riscv_sha512sig1; + break; + case RISCV::BI__builtin_riscv_sha512sig1h_32: + ID = Intrinsic::riscv_sha512sig1h; + break; + case RISCV::BI__builtin_riscv_sha512sig1l_32: + ID = Intrinsic::riscv_sha512sig1l; + break; + case RISCV::BI__builtin_riscv_sha512sum0_64: + ID = Intrinsic::riscv_sha512sum0; + break; + case RISCV::BI__builtin_riscv_sha512sum0r_32: + ID = Intrinsic::riscv_sha512sum0r; + break; + case RISCV::BI__builtin_riscv_sha512sum1_64: + ID = Intrinsic::riscv_sha512sum1; + break; + case RISCV::BI__builtin_riscv_sha512sum1r_32: + ID = Intrinsic::riscv_sha512sum1r; + break; + + // Zksed + case RISCV::BI__builtin_riscv_sm4ks: + ID = Intrinsic::riscv_sm4ks; + IntrinsicTypes = {ResultType}; + break; + case RISCV::BI__builtin_riscv_sm4ed: + ID = Intrinsic::riscv_sm4ed; + IntrinsicTypes = {ResultType}; + break; + + // Zksh + case RISCV::BI__builtin_riscv_sm3p0: + ID = Intrinsic::riscv_sm3p0; + IntrinsicTypes = {ResultType}; + break; + case RISCV::BI__builtin_riscv_sm3p1: + ID = Intrinsic::riscv_sm3p1; + IntrinsicTypes = {ResultType}; + break; + // Vector builtins are handled from here. #include "clang/Basic/riscv_vector_builtin_cg.inc" } |
