aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-06-13 19:37:19 +0000
commite8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch)
tree94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
parentbb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff)
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp448
1 files changed, 284 insertions, 164 deletions
diff --git a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
index 1e8fdb506619..23e7af6287b6 100644
--- a/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsX86.h"
@@ -68,6 +69,19 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
return true;
}
+// Upgrade the declaration of fp compare intrinsics that change return type
+// from scalar to vXi1 mask.
+static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
+ Function *&NewFn) {
+ // Check if the return type is a vector.
+ if (F->getReturnType()->isVectorTy())
+ return false;
+
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ return true;
+}
+
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
// All of the intrinsics matches below should be marked with which llvm
// version started autoupgrading them. At some point in the future we would
@@ -241,7 +255,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
- Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
+ Name.startswith("avx512.cmp.p") || // Added in 12.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
@@ -456,6 +470,24 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
if (Name == "avx2.mpsadbw") // Added in 3.6
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
+ if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
+ NewFn);
+ if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
+ NewFn);
+ if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
+ NewFn);
+ if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
+ NewFn);
+ if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
+ NewFn);
+ if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
+ return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
+ NewFn);
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
@@ -601,6 +633,63 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return true;
}
}
+
+ // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
+ // respectively
+ if ((Name.startswith("arm.neon.bfdot.") ||
+ Name.startswith("aarch64.neon.bfdot.")) &&
+ Name.endswith("i8")) {
+ Intrinsic::ID IID =
+ StringSwitch<Intrinsic::ID>(Name)
+ .Cases("arm.neon.bfdot.v2f32.v8i8",
+ "arm.neon.bfdot.v4f32.v16i8",
+ Intrinsic::arm_neon_bfdot)
+ .Cases("aarch64.neon.bfdot.v2f32.v8i8",
+ "aarch64.neon.bfdot.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfdot)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID == Intrinsic::not_intrinsic)
+ break;
+
+ size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
+ assert((OperandWidth == 64 || OperandWidth == 128) &&
+ "Unexpected operand width");
+ LLVMContext &Ctx = F->getParent()->getContext();
+ std::array<Type *, 2> Tys {{
+ F->getReturnType(),
+ FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
+ }};
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
+ return true;
+ }
+
+ // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
+ // and accept v8bf16 instead of v16i8
+ if ((Name.startswith("arm.neon.bfm") ||
+ Name.startswith("aarch64.neon.bfm")) &&
+ Name.endswith(".v4f32.v16i8")) {
+ Intrinsic::ID IID =
+ StringSwitch<Intrinsic::ID>(Name)
+ .Case("arm.neon.bfmmla.v4f32.v16i8",
+ Intrinsic::arm_neon_bfmmla)
+ .Case("arm.neon.bfmlalb.v4f32.v16i8",
+ Intrinsic::arm_neon_bfmlalb)
+ .Case("arm.neon.bfmlalt.v4f32.v16i8",
+ Intrinsic::arm_neon_bfmlalt)
+ .Case("aarch64.neon.bfmmla.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfmmla)
+ .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfmlalb)
+ .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
+ Intrinsic::aarch64_neon_bfmlalt)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID == Intrinsic::not_intrinsic)
+ break;
+
+ std::array<Type *, 0> Tys;
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
+ return true;
+ }
break;
}
@@ -629,18 +718,42 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
case 'e': {
SmallVector<StringRef, 2> Groups;
- static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+ static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
if (R.match(Name, &Groups)) {
+ Intrinsic::ID ID;
+ ID = StringSwitch<Intrinsic::ID>(Groups[1])
+ .Case("add", Intrinsic::vector_reduce_add)
+ .Case("mul", Intrinsic::vector_reduce_mul)
+ .Case("and", Intrinsic::vector_reduce_and)
+ .Case("or", Intrinsic::vector_reduce_or)
+ .Case("xor", Intrinsic::vector_reduce_xor)
+ .Case("smax", Intrinsic::vector_reduce_smax)
+ .Case("smin", Intrinsic::vector_reduce_smin)
+ .Case("umax", Intrinsic::vector_reduce_umax)
+ .Case("umin", Intrinsic::vector_reduce_umin)
+ .Case("fmax", Intrinsic::vector_reduce_fmax)
+ .Case("fmin", Intrinsic::vector_reduce_fmin)
+ .Default(Intrinsic::not_intrinsic);
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ auto Args = F->getFunctionType()->params();
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
+ return true;
+ }
+ }
+ static const Regex R2(
+ "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
+ Groups.clear();
+ if (R2.match(Name, &Groups)) {
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (Groups[1] == "fadd")
- ID = Intrinsic::experimental_vector_reduce_v2_fadd;
+ ID = Intrinsic::vector_reduce_fadd;
if (Groups[1] == "fmul")
- ID = Intrinsic::experimental_vector_reduce_v2_fmul;
-
+ ID = Intrinsic::vector_reduce_fmul;
if (ID != Intrinsic::not_intrinsic) {
rename(F);
auto Args = F->getFunctionType()->params();
- Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
+ Type *Tys[] = {Args[1]};
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return true;
}
@@ -900,7 +1013,7 @@ GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
// to byte shuffles.
static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
Value *Op, unsigned Shift) {
- auto *ResultTy = cast<VectorType>(Op->getType());
+ auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
@@ -934,7 +1047,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
// to byte shuffles.
static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
unsigned Shift) {
- auto *ResultTy = cast<VectorType>(Op->getType());
+ auto *ResultTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = ResultTy->getNumElements() * 8;
// Bitcast from a 64-bit element type to a byte element type.
@@ -966,19 +1079,19 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
unsigned NumElts) {
+ assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
llvm::VectorType *MaskTy = FixedVectorType::get(
Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
Mask = Builder.CreateBitCast(Mask, MaskTy);
- // If we have less than 8 elements, then the starting mask was an i8 and
- // we need to extract down to the right number of elements.
- if (NumElts < 8) {
+ // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
+ // i8 and we need to extract down to the right number of elements.
+ if (NumElts <= 4) {
int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
- Mask = Builder.CreateShuffleVector(Mask, Mask,
- makeArrayRef(Indices, NumElts),
- "extract");
+ Mask = Builder.CreateShuffleVector(
+ Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
}
return Mask;
@@ -992,7 +1105,7 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
return Op0;
Mask = getX86MaskVec(Builder, Mask,
- cast<VectorType>(Op0->getType())->getNumElements());
+ cast<FixedVectorType>(Op0->getType())->getNumElements());
return Builder.CreateSelect(Mask, Op0, Op1);
}
@@ -1019,7 +1132,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
bool IsVALIGN) {
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
- unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
@@ -1120,15 +1233,11 @@ static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
}
-static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
- bool IsSigned, bool IsAddition) {
+static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
+ Intrinsic::ID IID) {
Type *Ty = CI.getType();
Value *Op0 = CI.getOperand(0);
Value *Op1 = CI.getOperand(1);
-
- Intrinsic::ID IID =
- IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
- : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
@@ -1150,7 +1259,7 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
- unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
@@ -1220,7 +1329,7 @@ static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
// we only care about the lowest log2 bits anyway.
if (Amt->getType() != Ty) {
- unsigned NumElts = cast<VectorType>(Ty)->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
Amt = Builder.CreateVectorSplat(NumElts, Amt);
}
@@ -1257,7 +1366,7 @@ static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
return Builder.CreateAlignedStore(Data, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
- unsigned NumElts = cast<VectorType>(Data->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
}
@@ -1280,35 +1389,19 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
// Convert the mask from an integer type to a vector of i1.
- unsigned NumElts = cast<VectorType>(Passthru->getType())->getNumElements();
+ unsigned NumElts =
+ cast<FixedVectorType>(Passthru->getType())->getNumElements();
Mask = getX86MaskVec(Builder, Mask, NumElts);
return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
}
static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
+ Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
- llvm::Type *Ty = Op0->getType();
- Value *Zero = llvm::Constant::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
- Value *Neg = Builder.CreateNeg(Op0);
- Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
-
+ Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
+ Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
if (CI.getNumArgOperands() == 3)
- Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
-
- return Res;
-}
-
-static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
- ICmpInst::Predicate Pred) {
- Value *Op0 = CI.getArgOperand(0);
- Value *Op1 = CI.getArgOperand(1);
- Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
- Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
-
- if (CI.getNumArgOperands() == 4)
- Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
-
+ Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
return Res;
}
@@ -1344,7 +1437,7 @@ static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
Value *Mask) {
- unsigned NumElts = cast<VectorType>(Vec->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
if (Mask) {
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
@@ -1367,7 +1460,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
- unsigned NumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
Value *Cmp;
if (CC == 3) {
@@ -1422,7 +1515,7 @@ static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
Value* Op = CI.getArgOperand(0);
Type* ReturnOp = CI.getType();
- unsigned NumElts = cast<VectorType>(CI.getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
Value *Mask = getX86MaskVec(Builder, Op, NumElts);
return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
}
@@ -1676,7 +1769,6 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
(Pos = AsmStr->find("# marker")) != std::string::npos) {
AsmStr->replace(Pos, 1, ";");
}
- return;
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
@@ -1871,8 +1963,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateICmp(Pred, Rep, Zero);
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
- unsigned NumElts =
- cast<VectorType>(CI->getArgOperand(1)->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
+ ->getNumElements();
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
@@ -2000,38 +2092,36 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
- Type *OpTy = CI->getArgOperand(0)->getType();
+ } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+ Type *OpTy = Args[0]->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_cmp_ps_128;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
else if (VecWidth == 256 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_cmp_ps_256;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
else if (VecWidth == 512 && EltWidth == 32)
- IID = Intrinsic::x86_avx512_cmp_ps_512;
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
else if (VecWidth == 128 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_cmp_pd_128;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
else if (VecWidth == 256 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_cmp_pd_256;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
else if (VecWidth == 512 && EltWidth == 64)
- IID = Intrinsic::x86_avx512_cmp_pd_512;
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
else
llvm_unreachable("Unexpected intrinsic");
- SmallVector<Value *, 4> Args;
- Args.push_back(CI->getArgOperand(0));
- Args.push_back(CI->getArgOperand(1));
- Args.push_back(CI->getArgOperand(2));
- if (CI->getNumArgOperands() == 5)
- Args.push_back(CI->getArgOperand(4));
+ Value *Mask = Constant::getAllOnesValue(CI->getType());
+ if (VecWidth == 512)
+ std::swap(Mask, Args.back());
+ Args.push_back(Mask);
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Args);
- Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
- } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
- Name[16] != 'p') {
+ } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
// Integer compare intrinsics.
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
@@ -2057,25 +2147,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name == "sse41.pmaxsd" ||
Name.startswith("avx2.pmaxs") ||
Name.startswith("avx512.mask.pmaxs"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
} else if (IsX86 && (Name == "sse2.pmaxu.b" ||
Name == "sse41.pmaxuw" ||
Name == "sse41.pmaxud" ||
Name.startswith("avx2.pmaxu") ||
Name.startswith("avx512.mask.pmaxu"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
} else if (IsX86 && (Name == "sse41.pminsb" ||
Name == "sse2.pmins.w" ||
Name == "sse41.pminsd" ||
Name.startswith("avx2.pmins") ||
Name.startswith("avx512.mask.pmins"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
} else if (IsX86 && (Name == "sse2.pminu.b" ||
Name == "sse41.pminuw" ||
Name == "sse41.pminud" ||
Name.startswith("avx2.pminu") ||
Name.startswith("avx512.mask.pminu"))) {
- Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
} else if (IsX86 && (Name == "sse2.pmulu.dq" ||
Name == "avx2.pmulu.dq" ||
Name == "avx512.pmulu.dq.512" ||
@@ -2122,9 +2212,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name == "avx.cvt.ps2.pd.256" ||
Name == "avx512.mask.cvtps2pd.128" ||
Name == "avx512.mask.cvtps2pd.256")) {
- auto *DstTy = cast<VectorType>(CI->getType());
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
- auto *SrcTy = cast<VectorType>(Rep->getType());
+ auto *SrcTy = cast<FixedVectorType>(Rep->getType());
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts < SrcTy->getNumElements()) {
@@ -2154,9 +2244,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
Name.startswith("vcvtph2ps."))) {
- auto *DstTy = cast<VectorType>(CI->getType());
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
Rep = CI->getArgOperand(0);
- auto *SrcTy = cast<VectorType>(Rep->getType());
+ auto *SrcTy = cast<FixedVectorType>(Rep->getType());
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts != SrcTy->getNumElements()) {
assert(NumDstElts == 4 && "Unexpected vector size");
@@ -2177,7 +2267,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getArgOperand(1),CI->getArgOperand(2),
/*Aligned*/true);
} else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
- auto *ResultTy = cast<VectorType>(CI->getType());
+ auto *ResultTy = cast<FixedVectorType>(CI->getType());
Type *PtrTy = ResultTy->getElementType();
// Cast the pointer to element type.
@@ -2199,8 +2289,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
llvm::PointerType::getUnqual(PtrTy));
- Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
- ResultTy->getNumElements());
+ Value *MaskVec =
+ getX86MaskVec(Builder, CI->getArgOperand(2),
+ cast<FixedVectorType>(ResultTy)->getNumElements());
Function *CSt = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_compressstore,
@@ -2208,7 +2299,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
} else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
Name.startswith("avx512.mask.expand."))) {
- auto *ResultTy = cast<VectorType>(CI->getType());
+ auto *ResultTy = cast<FixedVectorType>(CI->getType());
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
ResultTy->getNumElements());
@@ -2288,7 +2379,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
Name.startswith("avx512.vbroadcast.s"))) {
// Replace broadcasts with a series of insertelements.
- auto *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecTy->getElementType();
unsigned EltNum = VecTy->getNumElements();
Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
@@ -2305,8 +2396,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx2.pmovzx") ||
Name.startswith("avx512.mask.pmovsx") ||
Name.startswith("avx512.mask.pmovzx"))) {
- VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
- VectorType *DstTy = cast<VectorType>(CI->getType());
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
@@ -2314,8 +2404,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i;
- Value *SV = Builder.CreateShuffleVector(
- CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
+ Value *SV =
+ Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
@@ -2342,12 +2432,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
PointerType::getUnqual(VT));
Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
if (NumSrcElts == 2)
- Rep = Builder.CreateShuffleVector(
- Load, UndefValue::get(Load->getType()), ArrayRef<int>{0, 1, 0, 1});
+ Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
else
- Rep =
- Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
- ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
+ Rep = Builder.CreateShuffleVector(
+ Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
} else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
Name.startswith("avx512.mask.shuf.f"))) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -2373,8 +2461,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
Name.startswith("avx512.mask.broadcasti"))) {
unsigned NumSrcElts =
- cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
- unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();
+ cast<FixedVectorType>(CI->getArgOperand(0)->getType())
+ ->getNumElements();
+ unsigned NumDstElts =
+ cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
@@ -2393,30 +2483,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op = CI->getArgOperand(0);
ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
- Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
- Constant::getNullValue(MaskTy));
+ SmallVector<int, 8> M;
+ ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
+ Rep = Builder.CreateShuffleVector(Op, M);
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("sse2.padds.") ||
- Name.startswith("sse2.psubs.") ||
Name.startswith("avx2.padds.") ||
- Name.startswith("avx2.psubs.") ||
Name.startswith("avx512.padds.") ||
+ Name.startswith("avx512.mask.padds."))) {
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
+ } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
+ Name.startswith("avx2.psubs.") ||
Name.startswith("avx512.psubs.") ||
- Name.startswith("avx512.mask.padds.") ||
Name.startswith("avx512.mask.psubs."))) {
- bool IsAdd = Name.contains(".padds");
- Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
} else if (IsX86 && (Name.startswith("sse2.paddus.") ||
- Name.startswith("sse2.psubus.") ||
Name.startswith("avx2.paddus.") ||
+ Name.startswith("avx512.mask.paddus."))) {
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
+ } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
Name.startswith("avx2.psubus.") ||
- Name.startswith("avx512.mask.paddus.") ||
Name.startswith("avx512.mask.psubus."))) {
- bool IsAdd = Name.contains(".paddus");
- Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
+ Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),
@@ -2463,7 +2554,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
@@ -2477,21 +2568,22 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
- unsigned SrcNumElts = cast<VectorType>(Op1->getType())->getNumElements();
+ unsigned DstNumElts =
+ cast<FixedVectorType>(CI->getType())->getNumElements();
+ unsigned SrcNumElts =
+ cast<FixedVectorType>(Op1->getType())->getNumElements();
unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm % Scale;
// Extend the second operand into a vector the size of the destination.
- Value *UndefV = UndefValue::get(Op1->getType());
SmallVector<int, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
Idxs[i] = SrcNumElts;
- Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
+ Rep = Builder.CreateShuffleVector(Op1, Idxs);
// Insert the second operand into the first operand.
@@ -2521,8 +2613,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- unsigned DstNumElts = cast<VectorType>(CI->getType())->getNumElements();
- unsigned SrcNumElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned DstNumElts =
+ cast<FixedVectorType>(CI->getType())->getNumElements();
+ unsigned SrcNumElts =
+ cast<FixedVectorType>(Op0->getType())->getNumElements();
unsigned Scale = SrcNumElts / DstNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
@@ -2545,7 +2639,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.perm.di."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
SmallVector<int, 8> Idxs(NumElts);
@@ -2569,7 +2663,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned HalfSize = NumElts / 2;
SmallVector<int, 8> ShuffleMask(NumElts);
@@ -2599,7 +2693,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.pshuf.d."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
+ auto *VecTy = cast<FixedVectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
// Calculate the size of each index in the immediate.
unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
@@ -2621,7 +2715,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.pshufl.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
@@ -2640,7 +2734,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.pshufh.w."))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
@@ -2659,7 +2753,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned HalfLaneElts = NumLaneElts / 2;
@@ -2684,7 +2778,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.movshdup") ||
Name.startswith("avx512.mask.movsldup"))) {
Value *Op0 = CI->getArgOperand(0);
- unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned Offset = 0;
@@ -2706,7 +2800,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.unpckl."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
- int NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<int, 64> Idxs(NumElts);
@@ -2722,7 +2816,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx512.mask.unpckh."))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
- int NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
SmallVector<int, 64> Idxs(NumElts);
@@ -3290,7 +3384,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
Ops);
} else {
- int NumElts = cast<VectorType>(CI->getType())->getNumElements();
+ int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2) };
@@ -3547,28 +3641,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
DefaultCase();
return;
}
- case Intrinsic::experimental_vector_reduce_v2_fmul: {
- SmallVector<Value *, 2> Args;
- if (CI->isFast())
- Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
- else
- Args.push_back(CI->getOperand(0));
- Args.push_back(CI->getOperand(1));
- NewCall = Builder.CreateCall(NewFn, Args);
- cast<Instruction>(NewCall)->copyFastMathFlags(CI);
- break;
- }
- case Intrinsic::experimental_vector_reduce_v2_fadd: {
- SmallVector<Value *, 2> Args;
- if (CI->isFast())
- Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
- else
- Args.push_back(CI->getOperand(0));
- Args.push_back(CI->getOperand(1));
- NewCall = Builder.CreateCall(NewFn, Args);
- cast<Instruction>(NewCall)->copyFastMathFlags(CI);
- break;
- }
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
@@ -3589,6 +3661,30 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
break;
}
+ case Intrinsic::arm_neon_bfdot:
+ case Intrinsic::arm_neon_bfmmla:
+ case Intrinsic::arm_neon_bfmlalb:
+ case Intrinsic::arm_neon_bfmlalt:
+ case Intrinsic::aarch64_neon_bfdot:
+ case Intrinsic::aarch64_neon_bfmmla:
+ case Intrinsic::aarch64_neon_bfmlalb:
+ case Intrinsic::aarch64_neon_bfmlalt: {
+ SmallVector<Value *, 3> Args;
+ assert(CI->getNumArgOperands() == 3 &&
+ "Mismatch between function args and call args");
+ size_t OperandWidth =
+ CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
+ assert((OperandWidth == 64 || OperandWidth == 128) &&
+ "Unexpected operand width");
+ Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
+ auto Iter = CI->arg_operands().begin();
+ Args.push_back(*Iter++);
+ Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
+ Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
+ }
+
case Intrinsic::bitreverse:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
@@ -3691,11 +3787,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Replace the original call result with the first result of the new call.
Value *TSC = Builder.CreateExtractValue(NewCall, 0);
- std::string Name = std::string(CI->getName());
- if (!Name.empty()) {
- CI->setName(Name + ".old");
- NewCall->setName(Name);
- }
+ NewCall->takeName(CI);
CI->replaceAllUsesWith(TSC);
CI->eraseFromParent();
return;
@@ -3718,6 +3810,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
break;
}
+ case Intrinsic::x86_avx512_mask_cmp_pd_128:
+ case Intrinsic::x86_avx512_mask_cmp_pd_256:
+ case Intrinsic::x86_avx512_mask_cmp_pd_512:
+ case Intrinsic::x86_avx512_mask_cmp_ps_128:
+ case Intrinsic::x86_avx512_mask_cmp_ps_256:
+ case Intrinsic::x86_avx512_mask_cmp_ps_512: {
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+ unsigned NumElts =
+ cast<FixedVectorType>(Args[0]->getType())->getNumElements();
+ Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
+
+ NewCall = Builder.CreateCall(NewFn, Args);
+ Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
+
+ NewCall->takeName(CI);
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return;
+ }
+
case Intrinsic::thread_pointer: {
NewCall = Builder.CreateCall(NewFn, {});
break;
@@ -3766,11 +3879,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
assert(NewCall && "Should have either set this variable or returned through "
"the default case");
- std::string Name = std::string(CI->getName());
- if (!Name.empty()) {
- CI->setName(Name + ".old");
- NewCall->setName(Name);
- }
+ NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
}
@@ -3784,8 +3893,8 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) {
if (UpgradeIntrinsicFunction(F, NewFn)) {
// Replace all users of the old function with the new function or new
// instructions. This is not a range loop because the call is deleted.
- for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
- if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+ for (User *U : make_early_inc_range(F->users()))
+ if (CallInst *CI = dyn_cast<CallInst>(U))
UpgradeIntrinsicCall(CI, NewFn);
// Remove old function, no longer used, from the module.
@@ -3921,8 +4030,8 @@ void llvm::UpgradeARCRuntime(Module &M) {
Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
- for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
- CallInst *CI = dyn_cast<CallInst>(*I++);
+ for (User *U : make_early_inc_range(Fn->users())) {
+ CallInst *CI = dyn_cast<CallInst>(U);
if (!CI || CI->getCalledFunction() != Fn)
continue;
@@ -3963,7 +4072,7 @@ void llvm::UpgradeARCRuntime(Module &M) {
// Create a call instruction that calls the new function.
CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
- NewCall->setName(CI->getName());
+ NewCall->takeName(CI);
// Bitcast the return value back to the type of the old call.
Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
@@ -4202,6 +4311,13 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
StrictFPUpgradeVisitor SFPV;
SFPV.visit(F);
}
+
+ if (F.getCallingConv() == CallingConv::X86_INTR &&
+ !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
+ Type *ByValTy = cast<PointerType>(F.getArg(0)->getType())->getElementType();
+ Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
+ F.addParamAttr(0, NewAttr);
+ }
}
static bool isOldLoopArgument(Metadata *MD) {
@@ -4267,11 +4383,17 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
}
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
- std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
+ Triple T(TT);
+ // For AMDGPU we uprgrade older DataLayouts to include the default globals
+ // address space of 1.
+ if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
+ return DL.empty() ? std::string("G1") : (DL + "-G1").str();
+ }
+ std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
// If X86, and the datalayout matches the expected format, add pointer size
// address spaces to the datalayout.
- if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
+ if (!T.isX86() || DL.contains(AddrSpaces))
return std::string(DL);
SmallVector<StringRef, 4> Groups;
@@ -4279,9 +4401,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
if (!R.match(DL, &Groups))
return std::string(DL);
- SmallString<1024> Buf;
- std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
- return Res;
+ return (Groups[1] + AddrSpaces + Groups[3]).str();
}
void llvm::UpgradeAttributes(AttrBuilder &B) {