diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp | 1422 |
1 files changed, 781 insertions, 641 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index 9b8694f9c5f2..787ac5361bbb 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -48,7 +48,7 @@ llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, if (FD->hasAttr<AsmLabelAttr>()) Name = getMangledName(D); else - Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; + Name = Context.BuiltinInfo.getName(BuiltinID) + 10; llvm::FunctionType *Ty = cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); @@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, return EmitFromInt(CGF, Result, T, ValueType); } +static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + Value *Address = CGF.EmitScalarExpr(E->getArg(1)); + + // Convert the type of the pointer to a pointer to the stored type. + Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); + Value *BC = CGF.Builder.CreateBitCast( + Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); + LV.setNontemporal(true); + CGF.EmitStoreOfScalar(Val, LV, false); + return nullptr; +} + +static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Address = CGF.EmitScalarExpr(E->getArg(0)); + + LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); + LV.setNontemporal(true); + return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); +} + static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E) { @@ -215,10 +237,20 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { llvm::Type *IntTy = llvm::IntegerType::get(C, Width); V = CGF.Builder.CreateBitCast(V, IntTy); if (Ty->isPPC_FP128Ty()) { - // The higher-order double comes first, and so we need to truncate the - // pair to extract the overall sign. The order of the pair is the same - // in both little- and big-Endian modes. + // We want the sign bit of the higher-order double. The bitcast we just + // did works as if the double-double was stored to memory and then + // read as an i128. The "store" will put the higher-order double in the + // lower address in both little- and big-Endian modes, but the "load" + // will treat those bits as a different part of the i128: the low bits in + // little-Endian, the high bits in big-Endian. Therefore, on big-Endian + // we need to shift the high bits down to the low before truncating. Width >>= 1; + if (CGF.getTarget().isBigEndian()) { + Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); + V = CGF.Builder.CreateLShr(V, ShiftCst); + } + // We are truncating value in order to extract the higher-order + // double, which we will be using to extract the sign from. IntTy = llvm::IntegerType::get(C, Width); V = CGF.Builder.CreateTrunc(V, IntTy); } @@ -256,6 +288,125 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, return CGF.Builder.CreateExtractValue(Tmp, 0); } +namespace { + struct WidthAndSignedness { + unsigned Width; + bool Signed; + }; +} + +static WidthAndSignedness +getIntegerWidthAndSignedness(const clang::ASTContext &context, + const clang::QualType Type) { + assert(Type->isIntegerType() && "Given type is not an integer."); + unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; + bool Signed = Type->isSignedIntegerType(); + return {Width, Signed}; +} + +// Given one or more integer types, this function produces an integer type that +// encompasses them: any value in one of the given types could be expressed in +// the encompassing type. +static struct WidthAndSignedness +EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { + assert(Types.size() > 0 && "Empty list of types."); + + // If any of the given types is signed, we must return a signed type. + bool Signed = false; + for (const auto &Type : Types) { + Signed |= Type.Signed; + } + + // The encompassing type must have a width greater than or equal to the width + // of the specified types. Aditionally, if the encompassing type is signed, + // its width must be strictly greater than the width of any unsigned types + // given. + unsigned Width = 0; + for (const auto &Type : Types) { + unsigned MinWidth = Type.Width + (Signed && !Type.Signed); + if (Width < MinWidth) { + Width = MinWidth; + } + } + + return {Width, Signed}; +} + +Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { + llvm::Type *DestType = Int8PtrTy; + if (ArgValue->getType() != DestType) + ArgValue = + Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); + + Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; + return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); +} + +/// Checks if using the result of __builtin_object_size(p, @p From) in place of +/// __builtin_object_size(p, @p To) is correct +static bool areBOSTypesCompatible(int From, int To) { + // Note: Our __builtin_object_size implementation currently treats Type=0 and + // Type=2 identically. Encoding this implementation detail here may make + // improving __builtin_object_size difficult in the future, so it's omitted. + return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); +} + +static llvm::Value * +getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { + return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); +} + +llvm::Value * +CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, + llvm::IntegerType *ResType) { + uint64_t ObjectSize; + if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) + return emitBuiltinObjectSize(E, Type, ResType); + return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); +} + +/// Returns a Value corresponding to the size of the given expression. +/// This Value may be either of the following: +/// - A llvm::Argument (if E is a param with the pass_object_size attribute on +/// it) +/// - A call to the @llvm.objectsize intrinsic +llvm::Value * +CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, + llvm::IntegerType *ResType) { + // We need to reference an argument if the pointer is a parameter with the + // pass_object_size attribute. + if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { + auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); + auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); + if (Param != nullptr && PS != nullptr && + areBOSTypesCompatible(PS->getType(), Type)) { + auto Iter = SizeArguments.find(Param); + assert(Iter != SizeArguments.end()); + + const ImplicitParamDecl *D = Iter->second; + auto DIter = LocalDeclMap.find(D); + assert(DIter != LocalDeclMap.end()); + + return EmitLoadOfScalar(DIter->second, /*volatile=*/false, + getContext().getSizeType(), E->getLocStart()); + } + } + + // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't + // evaluate E for side-effects. In either case, we shouldn't lower to + // @llvm.objectsize. + if (Type == 3 || E->HasSideEffects(getContext())) + return getDefaultBuiltinObjectSizeResult(Type, ResType); + + // LLVM only supports 0 and 2, make sure that we pass along that + // as a boolean. + auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); + // FIXME: Get right address space. + llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; + Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); + return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); +} + RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -279,22 +430,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: case Builtin::BI__va_start: - case Builtin::BI__builtin_va_end: { - Value *ArgValue = (BuiltinID == Builtin::BI__va_start) - ? EmitScalarExpr(E->getArg(0)) - : EmitVAListRef(E->getArg(0)); - llvm::Type *DestType = Int8PtrTy; - if (ArgValue->getType() != DestType) - ArgValue = Builder.CreateBitCast(ArgValue, DestType, - ArgValue->getName().data()); - - Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? - Intrinsic::vaend : Intrinsic::vastart; - return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); - } + case Builtin::BI__builtin_va_end: + return RValue::get( + EmitVAStartEnd(BuiltinID == Builtin::BI__va_start + ? EmitScalarExpr(E->getArg(0)) + : EmitVAListRef(E->getArg(0)).getPointer(), + BuiltinID != Builtin::BI__builtin_va_end)); case Builtin::BI__builtin_va_copy: { - Value *DstPtr = EmitVAListRef(E->getArg(0)); - Value *SrcPtr = EmitVAListRef(E->getArg(1)); + Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); + Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); llvm::Type *Type = Int8PtrTy; @@ -455,6 +599,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } + case Builtin::BI__builtin_unpredictable: { + // Always return the argument of __builtin_unpredictable. LLVM does not + // handle this builtin. Metadata for this builtin should be added directly + // to instructions such as branches or switches that use it. + return RValue::get(EmitScalarExpr(E->getArg(0))); + } case Builtin::BI__builtin_expect: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); @@ -501,26 +651,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F, ArgValue)); } case Builtin::BI__builtin_object_size: { - // We rely on constant folding to deal with expressions with side effects. - assert(!E->getArg(0)->HasSideEffects(getContext()) && - "should have been constant folded"); - - // We pass this builtin onto the optimizer so that it can - // figure out the object size in more complex cases. - llvm::Type *ResType = ConvertType(E->getType()); - - // LLVM only supports 0 and 2, make sure that we pass along that - // as a boolean. - Value *Ty = EmitScalarExpr(E->getArg(1)); - ConstantInt *CI = dyn_cast<ConstantInt>(Ty); - assert(CI); - uint64_t val = CI->getZExtValue(); - CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); - // FIXME: Get right address space. - llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); - return RValue::get( - Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI})); + unsigned Type = + E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); + auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); + + // We pass this builtin onto the optimizer so that it can figure out the + // object size in more complex cases. + return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -737,29 +874,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BIbzero: case Builtin::BI__builtin_bzero: { - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *SizeVal = EmitScalarExpr(E->getArg(1)); - EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(), + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); - Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, - Dest.second, false); - return RValue::get(Dest.first); + Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BImemcpy: case Builtin::BI__builtin_memcpy: { - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); - std::pair<llvm::Value*, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - unsigned Align = std::min(Dest.second, Src.second); - EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(), + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); - EmitNonNullArgCheck(RValue::get(Src.first), E->getArg(1)->getType(), + EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 1); - Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); - return RValue::get(Dest.first); + Builder.CreateMemCpy(Dest, Src, SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin___memcpy_chk: { @@ -770,23 +902,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; if (Size.ugt(DstSize)) break; - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); - std::pair<llvm::Value*, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); - unsigned Align = std::min(Dest.second, Src.second); - Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); - return RValue::get(Dest.first); + Builder.CreateMemCpy(Dest, Src, SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin_objc_memmove_collectable: { - Value *Address = EmitScalarExpr(E->getArg(0)); - Value *SrcAddr = EmitScalarExpr(E->getArg(1)); + Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); + Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, - Address, SrcAddr, SizeVal); - return RValue::get(Address); + DestAddr, SrcAddr, SizeVal); + return RValue::get(DestAddr.getPointer()); } case Builtin::BI__builtin___memmove_chk: { @@ -797,42 +926,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; if (Size.ugt(DstSize)) break; - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); - std::pair<llvm::Value*, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); - unsigned Align = std::min(Dest.second, Src.second); - Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); - return RValue::get(Dest.first); + Builder.CreateMemMove(Dest, Src, SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BImemmove: case Builtin::BI__builtin_memmove: { - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); - std::pair<llvm::Value*, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - unsigned Align = std::min(Dest.second, Src.second); - EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(), + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); - EmitNonNullArgCheck(RValue::get(Src.first), E->getArg(1)->getType(), + EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 1); - Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); - return RValue::get(Dest.first); + Builder.CreateMemMove(Dest, Src, SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BImemset: case Builtin::BI__builtin_memset: { - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = EmitScalarExpr(E->getArg(2)); - EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(), + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); - Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); - return RValue::get(Dest.first); + Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. @@ -842,13 +964,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, break; if (Size.ugt(DstSize)) break; - std::pair<llvm::Value*, unsigned> Dest = - EmitPointerWithAlignment(E->getArg(0)); + Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); - Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); - return RValue::get(Dest.first); + Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); + return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin_dwarf_cfa: { // The offset in bytes from the first argument to the CFA. @@ -952,7 +1073,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI__builtin_setjmp: { // Buffer is a void**. - Value *Buf = EmitScalarExpr(E->getArg(0)); + Address Buf = EmitPointerWithAlignment(E->getArg(0)); // Store the frame pointer to the setjmp buffer. Value *FrameAddr = @@ -963,14 +1084,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); - Value *StackSaveSlot = - Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); + Address StackSaveSlot = + Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); Buf = Builder.CreateBitCast(Buf, Int8PtrTy); - return RValue::get(Builder.CreateCall(F, Buf)); + return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } case Builtin::BI__builtin_longjmp: { Value *Buf = EmitScalarExpr(E->getArg(0)); @@ -1135,8 +1256,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, StoreSize.getQuantity() * 8); Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); llvm::StoreInst *Store = - Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); - Store->setAlignment(StoreSize.getQuantity()); + Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, + StoreSize); Store->setAtomic(llvm::Release); return RValue::get(nullptr); } @@ -1153,6 +1274,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } + case Builtin::BI__builtin_nontemporal_load: + return RValue::get(EmitNontemporalLoad(*this, E)); + case Builtin::BI__builtin_nontemporal_store: + return RValue::get(EmitNontemporalStore(*this, E)); case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: { // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the @@ -1270,15 +1395,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, bool Volatile = PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); - Value *Ptr = EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); + Address Ptr = EmitPointerWithAlignment(E->getArg(0)); + unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); Value *NewVal = Builder.getInt8(0); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa<llvm::ConstantInt>(Order)) { int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); - Store->setAlignment(1); switch (ord) { case 0: // memory_order_relaxed default: // invalid order @@ -1311,7 +1435,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, for (unsigned i = 0; i < 3; ++i) { Builder.SetInsertPoint(BBs[i]); StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); - Store->setAlignment(1); Store->setOrdering(Orders[i]); Builder.CreateBr(ContBB); } @@ -1493,8 +1616,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *X = EmitScalarExpr(E->getArg(0)); llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); - std::pair<llvm::Value*, unsigned> CarryOutPtr = - EmitPointerWithAlignment(E->getArg(3)); + Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. llvm::Intrinsic::ID IntrinsicId; @@ -1525,11 +1647,91 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Sum1, Carryin, Carry2); llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), X->getType()); - llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, - CarryOutPtr.first); - CarryOutStore->setAlignment(CarryOutPtr.second); + Builder.CreateStore(CarryOut, CarryOutPtr); return RValue::get(Sum2); } + + case Builtin::BI__builtin_add_overflow: + case Builtin::BI__builtin_sub_overflow: + case Builtin::BI__builtin_mul_overflow: { + const clang::Expr *LeftArg = E->getArg(0); + const clang::Expr *RightArg = E->getArg(1); + const clang::Expr *ResultArg = E->getArg(2); + + clang::QualType ResultQTy = + ResultArg->getType()->castAs<PointerType>()->getPointeeType(); + + WidthAndSignedness LeftInfo = + getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); + WidthAndSignedness RightInfo = + getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); + WidthAndSignedness ResultInfo = + getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); + WidthAndSignedness EncompassingInfo = + EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); + + llvm::Type *EncompassingLLVMTy = + llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); + + llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); + + llvm::Intrinsic::ID IntrinsicId; + switch (BuiltinID) { + default: + llvm_unreachable("Unknown overflow builtin id."); + case Builtin::BI__builtin_add_overflow: + IntrinsicId = EncompassingInfo.Signed + ? llvm::Intrinsic::sadd_with_overflow + : llvm::Intrinsic::uadd_with_overflow; + break; + case Builtin::BI__builtin_sub_overflow: + IntrinsicId = EncompassingInfo.Signed + ? llvm::Intrinsic::ssub_with_overflow + : llvm::Intrinsic::usub_with_overflow; + break; + case Builtin::BI__builtin_mul_overflow: + IntrinsicId = EncompassingInfo.Signed + ? llvm::Intrinsic::smul_with_overflow + : llvm::Intrinsic::umul_with_overflow; + break; + } + + llvm::Value *Left = EmitScalarExpr(LeftArg); + llvm::Value *Right = EmitScalarExpr(RightArg); + Address ResultPtr = EmitPointerWithAlignment(ResultArg); + + // Extend each operand to the encompassing type. + Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); + Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); + + // Perform the operation on the extended values. + llvm::Value *Overflow, *Result; + Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); + + if (EncompassingInfo.Width > ResultInfo.Width) { + // The encompassing type is wider than the result type, so we need to + // truncate it. + llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); + + // To see if the truncation caused an overflow, we will extend + // the result and then compare it to the original result. + llvm::Value *ResultTruncExt = Builder.CreateIntCast( + ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); + llvm::Value *TruncationOverflow = + Builder.CreateICmpNE(Result, ResultTruncExt); + + Overflow = Builder.CreateOr(Overflow, TruncationOverflow); + Result = ResultTrunc; + } + + // Finally, store the result using the pointer. + bool isVolatile = + ResultArg->getType()->getPointeeType().isVolatileQualified(); + Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); + + return RValue::get(Overflow); + } + case Builtin::BI__builtin_uadd_overflow: case Builtin::BI__builtin_uaddl_overflow: case Builtin::BI__builtin_uaddll_overflow: @@ -1554,13 +1756,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Scalarize our inputs. llvm::Value *X = EmitScalarExpr(E->getArg(0)); llvm::Value *Y = EmitScalarExpr(E->getArg(1)); - std::pair<llvm::Value *, unsigned> SumOutPtr = - EmitPointerWithAlignment(E->getArg(2)); + Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); // Decide which of the overflow intrinsics we are lowering to: llvm::Intrinsic::ID IntrinsicId; switch (BuiltinID) { - default: llvm_unreachable("Unknown security overflow builtin id."); + default: llvm_unreachable("Unknown overflow builtin id."); case Builtin::BI__builtin_uadd_overflow: case Builtin::BI__builtin_uaddl_overflow: case Builtin::BI__builtin_uaddll_overflow: @@ -1596,13 +1797,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Carry; llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); - llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); - SumOutStore->setAlignment(SumOutPtr.second); + Builder.CreateStore(Sum, SumOutPtr); return RValue::get(Carry); } case Builtin::BI__builtin_addressof: - return RValue::get(EmitLValue(E->getArg(0)).getAddress()); + return RValue::get(EmitLValue(E->getArg(0)).getPointer()); case Builtin::BI__builtin_operator_new: return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), E->getArg(0), false); @@ -1777,8 +1977,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); + // Check that a call to a target specific builtin has the correct target + // features. + // This is down here to avoid non-target specific builtins, however, if + // generic builtins start to require generic target features then we + // can move this up to the beginning of the function. + checkTargetFeatures(E, FD); + // See if we have a target specific intrinsic. - const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); + const char *Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; if (const char *Prefix = llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { @@ -1856,37 +2063,54 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return GetUndefRValue(E->getType()); } -Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - switch (getTarget().getTriple().getArch()) { +static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, + unsigned BuiltinID, const CallExpr *E, + llvm::Triple::ArchType Arch) { + switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: - return EmitARMBuiltinExpr(BuiltinID, E); + return CGF->EmitARMBuiltinExpr(BuiltinID, E); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - return EmitAArch64BuiltinExpr(BuiltinID, E); + return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::x86: case llvm::Triple::x86_64: - return EmitX86BuiltinExpr(BuiltinID, E); + return CGF->EmitX86BuiltinExpr(BuiltinID, E); case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: - return EmitPPCBuiltinExpr(BuiltinID, E); + return CGF->EmitPPCBuiltinExpr(BuiltinID, E); case llvm::Triple::r600: case llvm::Triple::amdgcn: - return EmitAMDGPUBuiltinExpr(BuiltinID, E); + return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); case llvm::Triple::systemz: - return EmitSystemZBuiltinExpr(BuiltinID, E); + return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - return EmitNVPTXBuiltinExpr(BuiltinID, E); + return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); default: return nullptr; } } +Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { + assert(getContext().getAuxTargetInfo() && "Missing aux target info"); + return EmitTargetArchBuiltinExpr( + this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, + getContext().getAuxTargetInfo()->getTriple().getArch()); + } + + return EmitTargetArchBuiltinExpr(this, BuiltinID, E, + getTarget().getTriple().getArch()); +} + static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool V1Ty=false) { @@ -1917,6 +2141,19 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, llvm_unreachable("Unknown vector element type!"); } +static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, + NeonTypeFlags IntTypeFlags) { + int IsQuad = IntTypeFlags.isQuad(); + switch (IntTypeFlags.getEltType()) { + case NeonTypeFlags::Int32: + return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); + case NeonTypeFlags::Int64: + return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); + default: + llvm_unreachable("Type can't be converted to floating-point!"); + } +} + Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); Value* SV = llvm::ConstantVector::getSplat(nElts, C); @@ -1940,10 +2177,7 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, bool neg) { int SV = cast<ConstantInt>(V)->getSExtValue(); - - llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); - llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); - return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); + return ConstantInt::get(Ty, neg ? -SV : SV); } // \brief Right-shift a vector by a constant. @@ -1962,8 +2196,7 @@ Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, if (ShiftAmt == EltSize) { if (usgn) { // Right-shifting an unsigned value by its size yields 0. - llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); - return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); + return llvm::ConstantAggregateZero::get(VTy); } else { // Right-shifting a signed value by its size is equivalent // to a shift of size-1. @@ -1979,61 +2212,6 @@ Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, return Builder.CreateAShr(Vec, Shift, name); } -/// GetPointeeAlignment - Given an expression with a pointer type, find the -/// alignment of the type referenced by the pointer. Skip over implicit -/// casts. -std::pair<llvm::Value*, unsigned> -CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { - assert(Addr->getType()->isPointerType()); - Addr = Addr->IgnoreParens(); - if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { - if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && - ICE->getSubExpr()->getType()->isPointerType()) { - std::pair<llvm::Value*, unsigned> Ptr = - EmitPointerWithAlignment(ICE->getSubExpr()); - Ptr.first = Builder.CreateBitCast(Ptr.first, - ConvertType(Addr->getType())); - return Ptr; - } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - LValue LV = EmitLValue(ICE->getSubExpr()); - unsigned Align = LV.getAlignment().getQuantity(); - if (!Align) { - // FIXME: Once LValues are fixed to always set alignment, - // zap this code. - QualType PtTy = ICE->getSubExpr()->getType(); - if (!PtTy->isIncompleteType()) - Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); - else - Align = 1; - } - return std::make_pair(LV.getAddress(), Align); - } - } - if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { - if (UO->getOpcode() == UO_AddrOf) { - LValue LV = EmitLValue(UO->getSubExpr()); - unsigned Align = LV.getAlignment().getQuantity(); - if (!Align) { - // FIXME: Once LValues are fixed to always set alignment, - // zap this code. - QualType PtTy = UO->getSubExpr()->getType(); - if (!PtTy->isIncompleteType()) - Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); - else - Align = 1; - } - return std::make_pair(LV.getAddress(), Align); - } - } - - unsigned Align = 1; - QualType PtTy = Addr->getType()->getPointeeType(); - if (!PtTy->isIncompleteType()) - Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); - - return std::make_pair(EmitScalarExpr(Addr), Align); -} - enum { AddRetType = (1 << 0), Add1ArgType = (1 << 1), @@ -2056,31 +2234,36 @@ enum { AddRetType | VectorizeRetType | Add1ArgType | InventFloatType }; - struct NeonIntrinsicInfo { +namespace { +struct NeonIntrinsicInfo { + const char *NameHint; unsigned BuiltinID; unsigned LLVMIntrinsic; unsigned AltLLVMIntrinsic; - const char *NameHint; unsigned TypeModifier; bool operator<(unsigned RHSBuiltinID) const { return BuiltinID < RHSBuiltinID; } + bool operator<(const NeonIntrinsicInfo &TE) const { + return BuiltinID < TE.BuiltinID; + } }; +} // end anonymous namespace #define NEONMAP0(NameBase) \ - { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 } + { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ - { NEON:: BI__builtin_neon_ ## NameBase, \ - Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier } + { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ + Intrinsic::LLVMIntrinsic, 0, TypeModifier } #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ - { NEON:: BI__builtin_neon_ ## NameBase, \ + { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ - #NameBase, TypeModifier } + TypeModifier } -static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { +static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), @@ -2106,7 +2289,7 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vclzq_v, ctlz, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), - NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0), + NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), @@ -2297,7 +2480,7 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vzipq_v) }; -static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { +static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), NEONMAP0(vaddhn_v), @@ -2319,7 +2502,7 @@ static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vclzq_v, ctlz, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), - NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0), + NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), @@ -2412,7 +2595,7 @@ static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vtstq_v), }; -static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { +static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), @@ -2623,9 +2806,7 @@ findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, #ifndef NDEBUG if (!MapProvenSorted) { - // FIXME: use std::is_sorted once C++11 is allowed - for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i) - assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID); + assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); MapProvenSorted = true; } #endif @@ -2744,7 +2925,7 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, - SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) { + SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { // Get the last argument, which specifies the vector type. llvm::APSInt NeonTypeConst; const Expr *Arg = E->getArg(E->getNumArgs() - 1); @@ -2761,6 +2942,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( if (!Ty) return nullptr; + auto getAlignmentValue32 = [&](Address addr) -> Value* { + return Builder.getInt32(addr.getAlignment().getQuantity()); + }; + unsigned Int = LLVMIntrinsic; if ((Modifier & UnsignedAlts) && !Usgn) Int = AltLLVMIntrinsic; @@ -2782,9 +2967,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> - Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), - SrcTy->getScalarSizeInBits() / 2); - ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); + Constant *ShiftAmt = + ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); // %res = trunc <4 x i32> %high to <4 x i16> @@ -2822,13 +3006,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvt_n_f64_v: case NEON::BI__builtin_neon_vcvtq_n_f32_v: case NEON::BI__builtin_neon_vcvtq_n_f64_v: { - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, - false, Quad)); - llvm::Type *Tys[2] = { FloatTy, Ty }; + llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); @@ -2841,13 +3019,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvtq_n_u32_v: case NEON::BI__builtin_neon_vcvtq_n_s64_v: case NEON::BI__builtin_neon_vcvtq_n_u64_v: { - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, - false, Quad)); - llvm::Type *Tys[2] = { Ty, FloatTy }; + llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); } @@ -2859,13 +3031,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: { - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, - false, Quad)); - Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); + Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } @@ -2901,13 +3067,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vcvtmq_s64_v: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtmq_u64_v: { - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *InTy = - GetNeonType(this, - NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, false, Quad)); - llvm::Type *Tys[2] = { Ty, InTy }; + llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); } case NEON::BI__builtin_neon_vext_v: @@ -2933,28 +3093,31 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: - Ops.push_back(Align); - return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); + case NEON::BI__builtin_neon_vld1q_v: { + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Ops.push_back(getAlignmentValue32(PtrOp0)); + return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); + } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { - Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); + Value *Align = getAlignmentValue32(PtrOp1); Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = UndefValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - LoadInst *Ld = Builder.CreateLoad(Ops[0]); - Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); + PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); + LoadInst *Ld = Builder.CreateLoad(PtrOp0); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Ops[0] = Builder.CreateInsertElement(V, Ld, CI); return EmitNeonSplat(Ops[0], CI); @@ -2965,14 +3128,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld3q_lane_v: case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: { - Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); for (unsigned I = 2; I < Ops.size() - 1; ++I) Ops[I] = Builder.CreateBitCast(Ops[I], Ty); - Ops.push_back(Align); + Ops.push_back(getAlignmentValue32(PtrOp1)); Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vmovl_v: { llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); @@ -3019,14 +3183,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vqdmlal_v: case NEON::BI__builtin_neon_vqdmlsl_v: { SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); - Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), - MulOps, "vqdmlal"); - - SmallVector<Value *, 2> AccumOps; - AccumOps.push_back(Ops[0]); - AccumOps.push_back(Mul); - return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), - AccumOps, NameHint); + Ops[1] = + EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); + Ops.resize(2); + return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); } case NEON::BI__builtin_neon_vqshl_n_v: case NEON::BI__builtin_neon_vqshlq_n_v: @@ -3088,9 +3248,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vst3_lane_v: case NEON::BI__builtin_neon_vst3q_lane_v: case NEON::BI__builtin_neon_vst4_lane_v: - case NEON::BI__builtin_neon_vst4q_lane_v: - Ops.push_back(Align); - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); + case NEON::BI__builtin_neon_vst4q_lane_v: { + llvm::Type *Tys[] = {Int8PtrTy, Ty}; + Ops.push_back(getAlignmentValue32(PtrOp0)); + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); + } case NEON::BI__builtin_neon_vsubhn_v: { llvm::VectorType *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); @@ -3101,9 +3263,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> - Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), - SrcTy->getScalarSizeInBits() / 2); - ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); + Constant *ShiftAmt = + ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); // %res = trunc <4 x i32> %high to <4 x i16> @@ -3125,7 +3286,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); - SV = Builder.CreateStore(SV, Addr); + SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } @@ -3153,7 +3314,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); - SV = Builder.CreateStore(SV, Addr); + SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } @@ -3173,7 +3334,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); - SV = Builder.CreateStore(SV, Addr); + SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } @@ -3252,33 +3413,37 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, } Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { + unsigned Value; switch (BuiltinID) { default: return nullptr; case ARM::BI__builtin_arm_nop: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), - llvm::ConstantInt::get(Int32Ty, 0)); + Value = 0; + break; case ARM::BI__builtin_arm_yield: case ARM::BI__yield: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), - llvm::ConstantInt::get(Int32Ty, 1)); + Value = 1; + break; case ARM::BI__builtin_arm_wfe: case ARM::BI__wfe: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), - llvm::ConstantInt::get(Int32Ty, 2)); + Value = 2; + break; case ARM::BI__builtin_arm_wfi: case ARM::BI__wfi: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), - llvm::ConstantInt::get(Int32Ty, 3)); + Value = 3; + break; case ARM::BI__builtin_arm_sev: case ARM::BI__sev: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), - llvm::ConstantInt::get(Int32Ty, 4)); + Value = 4; + break; case ARM::BI__builtin_arm_sevl: case ARM::BI__sevl: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), - llvm::ConstantInt::get(Int32Ty, 5)); + Value = 5; + break; } + + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), + llvm::ConstantInt::get(Int32Ty, Value)); } // Generates the IR for the read/write special register builtin, @@ -3428,9 +3593,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (BuiltinID == ARM::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); - SmallVector<Value*, 2> Ops; + Value *Ops[2]; for (unsigned i = 0; i < 2; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); + Ops[i] = EmitScalarExpr(E->getArg(i)); llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); StringRef Name = FD->getName(); @@ -3504,11 +3669,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, : Intrinsic::arm_strexd); llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); - Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); + Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Value *Val = EmitScalarExpr(E->getArg(0)); Builder.CreateStore(Val, Tmp); - Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); + Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); Val = Builder.CreateLoad(LdPtr); Value *Arg0 = Builder.CreateExtractValue(Val, 0); @@ -3627,8 +3792,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); + auto getAlignmentValue32 = [&](Address addr) -> Value* { + return Builder.getInt32(addr.getAlignment().getQuantity()); + }; + + Address PtrOp0 = Address::invalid(); + Address PtrOp1 = Address::invalid(); SmallVector<Value*, 4> Ops; - llvm::Value *Align = nullptr; bool HasExtraArg = HasExtraNeonArgument(BuiltinID); unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); for (unsigned i = 0, e = NumArgs; i != e; i++) { @@ -3658,10 +3828,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst4q_lane_v: // Get the alignment for the argument in addition to the value; // we'll use it later. - std::pair<llvm::Value*, unsigned> Src = - EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); + PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); + Ops.push_back(PtrOp0.getPointer()); continue; } } @@ -3684,10 +3852,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld4_dup_v: // Get the alignment for the argument in addition to the value; // we'll use it later. - std::pair<llvm::Value*, unsigned> Src = - EmitPointerWithAlignment(E->getArg(1)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); + PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); + Ops.push_back(PtrOp1.getPointer()); continue; } } @@ -3798,7 +3964,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (Builtin) return EmitCommonNeonBuiltinExpr( Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, - Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); + Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); unsigned Int; switch (BuiltinID) { @@ -3809,27 +3975,25 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (VTy->getElementType()->isIntegerTy(64)) { // Extract the other lane. Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); + uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); // Load the value as a one-element vector. Ty = llvm::VectorType::get(VTy->getElementType(), 1); - Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); + Value *Align = getAlignmentValue32(PtrOp0); Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); // Combine them. - SmallVector<Constant*, 2> Indices; - Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); - Indices.push_back(ConstantInt::get(Int32Ty, Lane)); - SV = llvm::ConstantVector::get(Indices); + uint32_t Indices[] = {1 - Lane, Lane}; + SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); } // fall through case NEON::BI__builtin_neon_vld1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - LoadInst *Ld = Builder.CreateLoad(Ops[0]); - Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); + PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); + Value *Ld = Builder.CreateLoad(PtrOp0); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } case NEON::BI__builtin_neon_vld2_dup_v: @@ -3849,11 +4013,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, break; default: llvm_unreachable("unknown vld_dup intrinsic?"); } - Function *F = CGM.getIntrinsic(Int, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(Int, Tys); + llvm::Value *Align = getAlignmentValue32(PtrOp1); Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } switch (BuiltinID) { case NEON::BI__builtin_neon_vld2_dup_v: @@ -3867,7 +4033,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, break; default: llvm_unreachable("unknown vld_dup intrinsic?"); } - Function *F = CGM.getIntrinsic(Int, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(Int, Tys); llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); SmallVector<Value*, 6> Args; @@ -3876,7 +4043,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Args.push_back(CI); - Args.push_back(Align); + Args.push_back(getAlignmentValue32(PtrOp1)); Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); // splat lane 0 to all elts in each vector of the result. @@ -3889,7 +4056,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vqrshrn_n_v: Int = @@ -3941,18 +4108,17 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); - Ops[2] = Align; + Ops[2] = getAlignmentValue32(PtrOp0); + llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, - Ops[1]->getType()), Ops); + Tys), Ops); } // fall through case NEON::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - StoreInst *St = Builder.CreateStore(Ops[1], - Builder.CreateBitCast(Ops[0], Ty)); - St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); + auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); return St; } case NEON::BI__builtin_neon_vtbl1_v: @@ -4029,52 +4195,41 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID // Determine the type of this overloaded NEON intrinsic. NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *VTy = GetNeonType(&CGF, Type); - llvm::Type *Ty = VTy; + llvm::VectorType *Ty = GetNeonType(&CGF, Type); if (!Ty) return nullptr; - unsigned nElts = VTy->getNumElements(); - CodeGen::CGBuilderTy &Builder = CGF.Builder; // AArch64 scalar builtins are not overloaded, they do not have an extra // argument that specifies the vector type, need to handle each case. - SmallVector<Value *, 2> TblOps; switch (BuiltinID) { case NEON::BI__builtin_neon_vtbl1_v: { - TblOps.push_back(Ops[0]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, - Intrinsic::aarch64_neon_tbl1, "vtbl1"); + return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, + Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, + "vtbl1"); } case NEON::BI__builtin_neon_vtbl2_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_tbl1, "vtbl1"); + return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, + Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, + "vtbl1"); } case NEON::BI__builtin_neon_vtbl3_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, - Intrinsic::aarch64_neon_tbl2, "vtbl2"); + return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, + Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, + "vtbl2"); } case NEON::BI__builtin_neon_vtbl4_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_tbl2, "vtbl2"); + return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, + Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, + "vtbl2"); } case NEON::BI__builtin_neon_vtbx1_v: { - TblOps.push_back(Ops[1]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_tbl1, "vtbl1"); + Value *TblRes = + packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], + Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); - llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); - Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); + llvm::Constant *EightV = ConstantInt::get(Ty, 8); Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); CmpRes = Builder.CreateSExt(CmpRes, Ty); @@ -4083,20 +4238,16 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); } case NEON::BI__builtin_neon_vtbx2_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, - Intrinsic::aarch64_neon_tbx1, "vtbx1"); + return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], + Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, + "vtbx1"); } case NEON::BI__builtin_neon_vtbx3_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_tbl2, "vtbl2"); - - llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); - Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); + Value *TblRes = + packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], + Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); + + llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], TwentyFourV); CmpRes = Builder.CreateSExt(CmpRes, Ty); @@ -4106,12 +4257,9 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); } case NEON::BI__builtin_neon_vtbx4_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - TblOps.push_back(Ops[4]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, - Intrinsic::aarch64_neon_tbx2, "vtbx2"); + return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], + Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, + "vtbx2"); } case NEON::BI__builtin_neon_vqtbl1_v: case NEON::BI__builtin_neon_vqtbl1q_v: @@ -4156,15 +4304,6 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { return Op; } -Value *CodeGenFunction::vectorWrapScalar8(Value *Op) { - llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); - Op = Builder.CreateBitCast(Op, Int8Ty); - Value *V = UndefValue::get(VTy); - llvm::Constant *CI = ConstantInt::get(SizeTy, 0); - Op = Builder.CreateInsertElement(V, Op, CI); - return Op; -} - Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { unsigned HintID = static_cast<unsigned>(-1); @@ -4236,9 +4375,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); - SmallVector<Value*, 2> Ops; + Value *Ops[2]; for (unsigned i = 0; i < 2; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); + Ops[i] = EmitScalarExpr(E->getArg(i)); llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); StringRef Name = FD->getName(); @@ -4297,14 +4436,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_stxp); llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); - Value *One = llvm::ConstantInt::get(Int32Ty, 1); - Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), - One); - Value *Val = EmitScalarExpr(E->getArg(0)); - Builder.CreateStore(Val, Tmp); + Address Tmp = CreateMemTemp(E->getArg(0)->getType()); + EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); - Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); - Val = Builder.CreateLoad(LdPtr); + Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); + llvm::Value *Val = Builder.CreateLoad(Tmp); Value *Arg0 = Builder.CreateExtractValue(Val, 0); Value *Arg1 = Builder.CreateExtractValue(Val, 1); @@ -4342,6 +4478,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI__builtin_thread_pointer) { + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer); + return Builder.CreateCall(F); + } + // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { @@ -4453,12 +4594,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vldrq_p128: { llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); - return Builder.CreateLoad(Ptr); + return Builder.CreateDefaultAlignedLoad(Ptr); } case NEON::BI__builtin_neon_vstrq_p128: { llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); - return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); + return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); } case NEON::BI__builtin_neon_vcvts_u32_f32: case NEON::BI__builtin_neon_vcvtd_u64_f64: @@ -4491,8 +4632,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateSIToFP(Ops[0], FTy); } case NEON::BI__builtin_neon_vpaddd_s64: { - llvm::Type *Ty = - llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2); + llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); @@ -4505,7 +4645,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vpaddd_f64: { llvm::Type *Ty = - llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); + llvm::VectorType::get(DoubleTy, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); @@ -4518,7 +4658,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vpadds_f32: { llvm::Type *Ty = - llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2); + llvm::VectorType::get(FloatTy, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f32, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); @@ -4566,12 +4706,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); case NEON::BI__builtin_neon_vceqzd_u64: { - llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0], - llvm::Constant::getNullValue(Ty)); - return Builder.CreateSExt(Ops[0], Ty, "vceqzd"); + Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); + Ops[0] = + Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); + return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); } case NEON::BI__builtin_neon_vceqd_f64: case NEON::BI__builtin_neon_vcled_f64: @@ -4645,14 +4784,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vtstd_s64: case NEON::BI__builtin_neon_vtstd_u64: { - llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); Ops.push_back(EmitScalarExpr(E->getArg(1))); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], - llvm::Constant::getNullValue(Ty)); - return Builder.CreateSExt(Ops[0], Ty, "vtstd"); + llvm::Constant::getNullValue(Int64Ty)); + return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); } case NEON::BI__builtin_neon_vset_lane_i8: case NEON::BI__builtin_neon_vset_lane_i16: @@ -4675,89 +4813,80 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vsetq_lane_f64: // The vector type needs a cast for the v2f64 variant. Ops[1] = Builder.CreateBitCast(Ops[1], - llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); + llvm::VectorType::get(DoubleTy, 2)); Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vdupb_lane_i8: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vdupb_laneq_i8: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vduph_lane_i16: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vduph_laneq_i16: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vdups_lane_i32: - Ops[0] = Builder.CreateBitCast( - Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vdups_lane_f32: Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); + llvm::VectorType::get(FloatTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vdups_lane"); case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vdups_laneq_i32: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i64: case NEON::BI__builtin_neon_vdupd_lane_i64: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vdupd_lane_f64: Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); + llvm::VectorType::get(DoubleTy, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vdupd_lane"); case NEON::BI__builtin_neon_vgetq_lane_i64: case NEON::BI__builtin_neon_vdupd_laneq_i64: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2)); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_f32: Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); + llvm::VectorType::get(FloatTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vget_lane_f64: Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); + llvm::VectorType::get(DoubleTy, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_f32: case NEON::BI__builtin_neon_vdups_laneq_f32: Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4)); + llvm::VectorType::get(FloatTy, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vgetq_lane_f64: case NEON::BI__builtin_neon_vdupd_laneq_f64: Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); + llvm::VectorType::get(DoubleTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vaddd_s64: @@ -4930,7 +5059,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (Builtin) return EmitCommonNeonBuiltinExpr( Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, - Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr); + Builtin->NameHint, Builtin->TypeModifier, E, Ops, + /*never use addresses*/ Address::invalid(), Address::invalid()); if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) return V; @@ -5096,15 +5226,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_fmaxnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); case NEON::BI__builtin_neon_vrecpss_f32: { - llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext()); Ops.push_back(EmitScalarExpr(E->getArg(1))); - return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type), + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), Ops, "vrecps"); } case NEON::BI__builtin_neon_vrecpsd_f64: { - llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext()); Ops.push_back(EmitScalarExpr(E->getArg(1))); - return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), Ops, "vrecps"); } case NEON::BI__builtin_neon_vqshrun_n_v: @@ -5207,13 +5335,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: { - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *InTy = - GetNeonType(this, - NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, false, quad)); - Ops[0] = Builder.CreateBitCast(Ops[0], InTy); + Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); if (usgn) return Builder.CreateFPToUI(Ops[0], Ty); return Builder.CreateFPToSI(Ops[0], Ty); @@ -5227,13 +5349,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvta_u64_v: case NEON::BI__builtin_neon_vcvtaq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *InTy = - GetNeonType(this, - NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, false, quad)); - llvm::Type *Tys[2] = { Ty, InTy }; + llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); } case NEON::BI__builtin_neon_vcvtm_s32_v: @@ -5245,13 +5361,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtm_u64_v: case NEON::BI__builtin_neon_vcvtmq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *InTy = - GetNeonType(this, - NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, false, quad)); - llvm::Type *Tys[2] = { Ty, InTy }; + llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); } case NEON::BI__builtin_neon_vcvtn_s32_v: @@ -5263,13 +5373,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtn_u64_v: case NEON::BI__builtin_neon_vcvtnq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *InTy = - GetNeonType(this, - NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, false, quad)); - llvm::Type *Tys[2] = { Ty, InTy }; + llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); } case NEON::BI__builtin_neon_vcvtp_s32_v: @@ -5281,13 +5385,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtp_u64_v: case NEON::BI__builtin_neon_vcvtpq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; - bool Double = - (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); - llvm::Type *InTy = - GetNeonType(this, - NeonTypeFlags(Double ? NeonTypeFlags::Float64 - : NeonTypeFlags::Float32, false, quad)); - llvm::Type *Tys[2] = { Ty, InTy }; + llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); } case NEON::BI__builtin_neon_vmulx_v: @@ -5338,232 +5436,192 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FALLTHROUGH case NEON::BI__builtin_neon_vaddv_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vaddv_u16: usgn = true; // FALLTHROUGH case NEON::BI__builtin_neon_vaddv_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddvq_u8: usgn = true; // FALLTHROUGH case NEON::BI__builtin_neon_vaddvq_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vaddvq_u16: usgn = true; // FALLTHROUGH case NEON::BI__builtin_neon_vaddvq_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxv_u8: { Int = Intrinsic::aarch64_neon_umaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxv_u16: { Int = Intrinsic::aarch64_neon_umaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxvq_u8: { Int = Intrinsic::aarch64_neon_umaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxvq_u16: { Int = Intrinsic::aarch64_neon_umaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxv_s8: { Int = Intrinsic::aarch64_neon_smaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxv_s16: { Int = Intrinsic::aarch64_neon_smaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxvq_s8: { Int = Intrinsic::aarch64_neon_smaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxvq_s16: { Int = Intrinsic::aarch64_neon_smaxv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminv_u8: { Int = Intrinsic::aarch64_neon_uminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminv_u16: { Int = Intrinsic::aarch64_neon_uminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminvq_u8: { Int = Intrinsic::aarch64_neon_uminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminvq_u16: { Int = Intrinsic::aarch64_neon_uminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminv_s8: { Int = Intrinsic::aarch64_neon_sminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminv_s16: { Int = Intrinsic::aarch64_neon_sminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminvq_s8: { Int = Intrinsic::aarch64_neon_sminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 8)); + return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminvq_s16: { Int = Intrinsic::aarch64_neon_sminv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmul_n_f64: { Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); @@ -5572,80 +5630,68 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vaddlv_u8: { Int = Intrinsic::aarch64_neon_uaddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlv_u16: { Int = Intrinsic::aarch64_neon_uaddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vaddlvq_u8: { Int = Intrinsic::aarch64_neon_uaddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlvq_u16: { Int = Intrinsic::aarch64_neon_uaddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vaddlv_s8: { Int = Intrinsic::aarch64_neon_saddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlv_s16: { Int = Intrinsic::aarch64_neon_saddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vaddlvq_s8: { Int = Intrinsic::aarch64_neon_saddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); - return Builder.CreateTrunc(Ops[0], - llvm::IntegerType::get(getLLVMContext(), 16)); + return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlvq_s16: { Int = Intrinsic::aarch64_neon_saddlv; - Ty = llvm::IntegerType::get(getLLVMContext(), 32); - VTy = - llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); + Ty = Int32Ty; + VTy = llvm::VectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -5708,7 +5754,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vst1_x2_v: case NEON::BI__builtin_neon_vst1q_x2_v: @@ -5733,32 +5779,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_st1x4; break; } - SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end()); - IntOps.push_back(Ops[0]); - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, ""); + std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); - return Builder.CreateLoad(Ops[0]); + return Builder.CreateDefaultAlignedLoad(Ops[0]); case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1q_lane_v: Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[0] = Builder.CreateLoad(Ops[0]); + Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = UndefValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[0] = Builder.CreateLoad(Ops[0]); + Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); return EmitNeonSplat(Ops[0], CI); @@ -5768,7 +5813,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); + return Builder.CreateDefaultAlignedStore(Ops[1], + Builder.CreateBitCast(Ops[0], Ty)); case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: { llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); @@ -5778,7 +5824,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: { @@ -5789,7 +5835,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { @@ -5800,7 +5846,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_dup_v: case NEON::BI__builtin_neon_vld2q_dup_v: { @@ -5812,7 +5858,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_dup_v: case NEON::BI__builtin_neon_vld3q_dup_v: { @@ -5824,7 +5870,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_dup_v: case NEON::BI__builtin_neon_vld4q_dup_v: { @@ -5836,7 +5882,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_lane_v: case NEON::BI__builtin_neon_vld2q_lane_v: { @@ -5846,12 +5892,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.erase(Ops.begin()+1); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[3] = Builder.CreateZExt(Ops[3], - llvm::IntegerType::get(getLLVMContext(), 64)); + Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_lane_v: case NEON::BI__builtin_neon_vld3q_lane_v: { @@ -5862,12 +5907,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); - Ops[4] = Builder.CreateZExt(Ops[4], - llvm::IntegerType::get(getLLVMContext(), 64)); + Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: { @@ -5879,12 +5923,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateBitCast(Ops[4], Ty); - Ops[5] = Builder.CreateZExt(Ops[5], - llvm::IntegerType::get(getLLVMContext(), 64)); + Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vst2_v: case NEON::BI__builtin_neon_vst2q_v: { @@ -5898,8 +5941,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst2q_lane_v: { Ops.push_back(Ops[0]); Ops.erase(Ops.begin()); - Ops[2] = Builder.CreateZExt(Ops[2], - llvm::IntegerType::get(getLLVMContext(), 64)); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), Ops, ""); @@ -5916,8 +5958,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst3q_lane_v: { Ops.push_back(Ops[0]); Ops.erase(Ops.begin()); - Ops[3] = Builder.CreateZExt(Ops[3], - llvm::IntegerType::get(getLLVMContext(), 64)); + Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), Ops, ""); @@ -5934,8 +5975,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vst4q_lane_v: { Ops.push_back(Ops[0]); Ops.erase(Ops.begin()); - Ops[4] = Builder.CreateZExt(Ops[4], - llvm::IntegerType::get(getLLVMContext(), 64)); + Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), Ops, ""); @@ -5956,7 +5996,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); - SV = Builder.CreateStore(SV, Addr); + SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } @@ -5975,7 +6015,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); - SV = Builder.CreateStore(SV, Addr); + SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } @@ -5995,7 +6035,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = llvm::ConstantVector::get(Indices); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); - SV = Builder.CreateStore(SV, Addr); + SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } @@ -6072,6 +6112,31 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + if (BuiltinID == X86::BI__builtin_ms_va_start || + BuiltinID == X86::BI__builtin_ms_va_end) + return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), + BuiltinID == X86::BI__builtin_ms_va_start); + if (BuiltinID == X86::BI__builtin_ms_va_copy) { + // Lower this manually. We can't reliably determine whether or not any + // given va_copy() is for a Win64 va_list from the calling convention + // alone, because it's legal to do this from a System V ABI function. + // With opaque pointer types, we won't have enough information in LLVM + // IR to determine this from the argument types, either. Best to do it + // now, while we have enough information. + Address DestAddr = EmitMSVAListRef(E->getArg(0)); + Address SrcAddr = EmitMSVAListRef(E->getArg(1)); + + llvm::Type *BPP = Int8PtrPtrTy; + + DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), + DestAddr.getAlignment()); + SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), + SrcAddr.getAlignment()); + + Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); + return Builder.CreateStore(ArgPtr, DestAddr); + } + SmallVector<Value*, 4> Ops; // Find out if any arguments are required to be integer constant expressions. @@ -6167,7 +6232,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, ConstantInt::get(Int32Ty, 0) }; Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); - Value *Features = Builder.CreateLoad(CpuFeatures); + Value *Features = Builder.CreateAlignedLoad(CpuFeatures, + CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. Value *Bitset = Builder.CreateAnd( @@ -6175,13 +6241,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); } case X86::BI_mm_prefetch: { - Value *Address = EmitScalarExpr(E->getArg(0)); + Value *Address = Ops[0]; Value *RW = ConstantInt::get(Int32Ty, 0); - Value *Locality = EmitScalarExpr(E->getArg(1)); + Value *Locality = Ops[1]; Value *Data = ConstantInt::get(Int32Ty, 1); Value *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } + case X86::BI__builtin_ia32_undef128: + case X86::BI__builtin_ia32_undef256: + case X86::BI__builtin_ia32_undef512: + return UndefValue::get(ConvertType(E->getType())); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: @@ -6191,17 +6261,57 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractElement(Ops[0], llvm::ConstantInt::get(Ops[1]->getType(), 0)); case X86::BI__builtin_ia32_ldmxcsr: { - Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); + Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Builder.CreateStore(Ops[0], Tmp); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), - Builder.CreateBitCast(Tmp, Int8PtrTy)); + Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); } case X86::BI__builtin_ia32_stmxcsr: { - Value *Tmp = CreateMemTemp(E->getType()); + Address Tmp = CreateMemTemp(E->getType()); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), - Builder.CreateBitCast(Tmp, Int8PtrTy)); + Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); return Builder.CreateLoad(Tmp, "stmxcsr"); } + case X86::BI__builtin_ia32_xsave: + case X86::BI__builtin_ia32_xsave64: + case X86::BI__builtin_ia32_xrstor: + case X86::BI__builtin_ia32_xrstor64: + case X86::BI__builtin_ia32_xsaveopt: + case X86::BI__builtin_ia32_xsaveopt64: + case X86::BI__builtin_ia32_xrstors: + case X86::BI__builtin_ia32_xrstors64: + case X86::BI__builtin_ia32_xsavec: + case X86::BI__builtin_ia32_xsavec64: + case X86::BI__builtin_ia32_xsaves: + case X86::BI__builtin_ia32_xsaves64: { + Intrinsic::ID ID; +#define INTRINSIC_X86_XSAVE_ID(NAME) \ + case X86::BI__builtin_ia32_##NAME: \ + ID = Intrinsic::x86_##NAME; \ + break + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + INTRINSIC_X86_XSAVE_ID(xsave); + INTRINSIC_X86_XSAVE_ID(xsave64); + INTRINSIC_X86_XSAVE_ID(xrstor); + INTRINSIC_X86_XSAVE_ID(xrstor64); + INTRINSIC_X86_XSAVE_ID(xsaveopt); + INTRINSIC_X86_XSAVE_ID(xsaveopt64); + INTRINSIC_X86_XSAVE_ID(xrstors); + INTRINSIC_X86_XSAVE_ID(xrstors64); + INTRINSIC_X86_XSAVE_ID(xsavec); + INTRINSIC_X86_XSAVE_ID(xsavec64); + INTRINSIC_X86_XSAVE_ID(xsaves); + INTRINSIC_X86_XSAVE_ID(xsaves64); + } +#undef INTRINSIC_X86_XSAVE_ID + Value *Mhi = Builder.CreateTrunc( + Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); + Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); + Ops[1] = Mhi; + Ops.push_back(Mlo); + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); @@ -6217,7 +6327,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // cast pointer to i64 & store Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); - return Builder.CreateStore(Ops[1], Ops[0]); + return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: { @@ -6242,18 +6352,19 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - SmallVector<llvm::Constant*, 32> Indices; + uint32_t Indices[32]; // 256-bit palignr operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Idx = ShiftVal + i; if (Idx >= NumLaneElts) Idx += NumElts - NumLaneElts; // End of lane, switch operand. - Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + Indices[l + i] = Idx + l; } } - Value* SV = llvm::ConstantVector::get(Indices); + Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), + makeArrayRef(Indices, NumElts)); return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); } case X86::BI__builtin_ia32_pslldqi256: { @@ -6264,13 +6375,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, if (shiftVal >= 16) return llvm::Constant::getNullValue(ConvertType(E->getType())); - SmallVector<llvm::Constant*, 32> Indices; + uint32_t Indices[32]; // 256-bit pslldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != 32; l += 16) { for (unsigned i = 0; i != 16; ++i) { unsigned Idx = 32 + i - shiftVal; if (Idx < 32) Idx -= 16; // end of lane, switch operand. - Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + Indices[l + i] = Idx + l; } } @@ -6278,7 +6389,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); - Value *SV = llvm::ConstantVector::get(Indices); + Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); llvm::Type *ResultType = ConvertType(E->getType()); return Builder.CreateBitCast(SV, ResultType, "cast"); @@ -6291,13 +6402,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, if (shiftVal >= 16) return llvm::Constant::getNullValue(ConvertType(E->getType())); - SmallVector<llvm::Constant*, 32> Indices; + uint32_t Indices[32]; // 256-bit psrldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != 32; l += 16) { for (unsigned i = 0; i != 16; ++i) { unsigned Idx = i + shiftVal; if (Idx >= 16) Idx += 16; // end of lane, switch operand. - Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + Indices[l + i] = Idx + l; } } @@ -6305,7 +6416,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); - Value *SV = llvm::ConstantVector::get(Indices); + Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); llvm::Type *ResultType = ConvertType(E->getType()); return Builder.CreateBitCast(SV, ResultType, "cast"); @@ -6325,7 +6436,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *BC = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()), "cast"); - StoreInst *SI = Builder.CreateStore(Ops[1], BC); + StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); // If the operand is an integer, we can't assume alignment. Otherwise, @@ -6377,7 +6488,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); - Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), + Ops[0]); return Builder.CreateExtractValue(Call, 1); } // SSE comparison intrisics @@ -6544,6 +6656,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: return nullptr; + // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we + // call __builtin_readcyclecounter. + case PPC::BI__builtin_ppc_get_timebase: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); + // vec_ld, vec_lvsl, vec_lvsr case PPC::BI__builtin_altivec_lvx: case PPC::BI__builtin_altivec_lvxl: @@ -6775,8 +6892,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // Translate from the intrinsics's struct return to the builtin's out // argument. - std::pair<llvm::Value *, unsigned> FlagOutPtr - = EmitPointerWithAlignment(E->getArg(3)); + Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); llvm::Value *X = EmitScalarExpr(E->getArg(0)); llvm::Value *Y = EmitScalarExpr(E->getArg(1)); @@ -6791,11 +6907,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); llvm::Type *RealFlagType - = FlagOutPtr.first->getType()->getPointerElementType(); + = FlagOutPtr.getPointer()->getType()->getPointerElementType(); llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); - llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); - FlagStore->setAlignment(FlagOutPtr.second); + Builder.CreateStore(FlagExt, FlagOutPtr); return Result; } case AMDGPU::BI__builtin_amdgpu_div_fmas: @@ -6846,7 +6961,7 @@ static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, SmallVector<Value *, 8> Args(NumArgs); for (unsigned I = 0; I < NumArgs; ++I) Args[I] = CGF.EmitScalarExpr(E->getArg(I)); - Value *CCPtr = CGF.EmitScalarExpr(E->getArg(NumArgs)); + Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); Value *F = CGF.CGM.getIntrinsic(IntrinsicID); Value *Call = CGF.Builder.CreateCall(F, Args); Value *CC = CGF.Builder.CreateExtractValue(Call, 1); @@ -7115,23 +7230,29 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_max_gen_i: case NVPTX::BI__nvvm_atom_max_gen_l: case NVPTX::BI__nvvm_atom_max_gen_ll: + return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); + case NVPTX::BI__nvvm_atom_max_gen_ui: case NVPTX::BI__nvvm_atom_max_gen_ul: case NVPTX::BI__nvvm_atom_max_gen_ull: - return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); + return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); case NVPTX::BI__nvvm_atom_min_gen_i: case NVPTX::BI__nvvm_atom_min_gen_l: case NVPTX::BI__nvvm_atom_min_gen_ll: + return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); + case NVPTX::BI__nvvm_atom_min_gen_ui: case NVPTX::BI__nvvm_atom_min_gen_ul: case NVPTX::BI__nvvm_atom_min_gen_ull: - return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); + return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); case NVPTX::BI__nvvm_atom_cas_gen_i: case NVPTX::BI__nvvm_atom_cas_gen_l: case NVPTX::BI__nvvm_atom_cas_gen_ll: - return MakeAtomicCmpXchgValue(*this, E, true); + // __nvvm_atom_cas_gen_* should return the old value rather than the + // success flag. + return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); case NVPTX::BI__nvvm_atom_add_gen_f: { Value *Ptr = EmitScalarExpr(E->getArg(0)); @@ -7147,3 +7268,22 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return nullptr; } } + +Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_memory_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + return Builder.CreateCall(Callee); + } + case WebAssembly::BI__builtin_wasm_grow_memory: { + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); + return Builder.CreateCall(Callee, X); + } + + default: + return nullptr; + } +} |