summaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp135
1 files changed, 74 insertions, 61 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0a41f35f93208..5303d7a406ad4 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4753,7 +4753,7 @@ static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(0 < Scale && "Unexpected scaling factor");
int NumElts = Mask.size();
- ScaledMask.assign(NumElts * Scale, -1);
+ ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
for (int i = 0; i != NumElts; ++i) {
int M = Mask[i];
@@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::SCALAR_TO_VECTOR: {
- // Match against a scalar_to_vector of an extract from a similar vector.
+ // Match against a scalar_to_vector of an extract from a vector,
+ // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
SDValue N0 = N.getOperand(0);
- if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- N0.getOperand(0).getValueType() != VT ||
- !isa<ConstantSDNode>(N0.getOperand(1)) ||
- NumElts <= N0.getConstantOperandVal(1) ||
- !N->isOnlyUserOf(N0.getNode()))
+ SDValue SrcExtract;
+
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getValueType() == VT) {
+ SrcExtract = N0;
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRW &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16);
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRB &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
+ }
+
+ if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) ||
+ NumElts <= SrcExtract.getConstantOperandVal(1))
return false;
- Ops.push_back(N0.getOperand(0));
- Mask.push_back(N0.getConstantOperandVal(1));
- Mask.append(NumElts - 1, SM_SentinelUndef);
+
+ SDValue SrcVec = SrcExtract.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
+
+ Ops.push_back(SrcVec);
+ Mask.push_back(SrcExtract.getConstantOperandVal(1));
+ Mask.append(NumZeros, SM_SentinelZero);
+ Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
return true;
}
case X86ISD::PINSRB:
@@ -6542,12 +6564,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
- assert((ScalarSize == 32 || ScalarSize == 64) &&
- "Unsupported floating point scalar size");
- if (ScalarSize == 32)
- Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
- else
- Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
+ if (ScalarSize == 32) {
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+ } else {
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+ }
} else
Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
ConstantVec.push_back(Const);
@@ -6633,11 +6655,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// AVX have support for 32 and 64 bit broadcast for floats only.
// No 64bit integer in 32bit subtarget.
MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
- Constant *C = SplatBitSize == 32
- ? ConstantFP::get(Type::getFloatTy(*Ctx),
- SplatValue.bitsToFloat())
- : ConstantFP::get(Type::getDoubleTy(*Ctx),
- SplatValue.bitsToDouble());
+ // Lower the splat via APFloat directly, to avoid any conversion.
+ Constant *C =
+ SplatBitSize == 32
+ ? ConstantFP::get(*Ctx,
+ APFloat(APFloat::IEEEsingle(), SplatValue))
+ : ConstantFP::get(*Ctx,
+ APFloat(APFloat::IEEEdouble(), SplatValue));
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
@@ -8003,7 +8027,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
- int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
RepeatedMask.assign(LaneSize, -1);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
@@ -16997,7 +17021,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
@@ -17024,18 +17048,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is
// available.
SDValue Cmp;
- unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
+ unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
if (SSECC == 8) {
// LLVM predicate is SETUEQ or SETONE.
unsigned CC0, CC1;
unsigned CombineOpc;
- if (SetCCOpcode == ISD::SETUEQ) {
+ if (Cond == ISD::SETUEQ) {
CC0 = 3; // UNORD
CC1 = 0; // EQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) :
static_cast<unsigned>(ISD::OR);
} else {
- assert(SetCCOpcode == ISD::SETONE);
+ assert(Cond == ISD::SETONE);
CC0 = 7; // ORD
CC1 = 4; // NEQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) :
@@ -17082,7 +17106,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// 2. The original operand type has been promoted to a 256-bit vector.
//
// Note that condition 2. only applies for AVX targets.
- SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode);
+ SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond);
return DAG.getZExtOrTrunc(NewOp, dl, VT);
}
@@ -17122,7 +17146,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) {
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
- switch (SetCCOpcode) {
+ switch (Cond) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETULT:
case ISD::SETLT: CmpMode = 0x00; break;
@@ -17137,60 +17161,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
// Are we comparing unsigned or signed integers?
- unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
- ? X86ISD::VPCOMU : X86ISD::VPCOM;
+ unsigned Opc =
+ ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CmpMode, dl, MVT::i8));
}
- // We are handling one of the integer comparisons here. Since SSE only has
+ // We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc;
- bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
- bool Subus = false;
-
- switch (SetCCOpcode) {
- default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
- case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
- case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
- case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETLE: Opc = X86ISD::PCMPGT;
- Invert = true; break;
- case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETUGT: Opc = X86ISD::PCMPGT;
- FlipSigns = true; break;
- case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETULE: Opc = X86ISD::PCMPGT;
- FlipSigns = true; Invert = true; break;
- }
+ unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
+ : X86ISD::PCMPGT;
+ bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
+ Cond == ISD::SETGE || Cond == ISD::SETUGE;
+ bool Invert = Cond == ISD::SETNE ||
+ (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
+ bool FlipSigns = ISD::isUnsignedIntSetCC(Cond);
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
- bool hasMinMax =
- (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
- || (Subtarget.hasSSE2() && (VET == MVT::i8));
-
- if (hasMinMax) {
- switch (SetCCOpcode) {
+ bool HasMinMax =
+ (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) ||
+ (Subtarget.hasSSE2() && (VET == MVT::i8));
+ bool MinMax = false;
+ if (HasMinMax) {
+ switch (Cond) {
default: break;
case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}
- if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
+ if (MinMax)
+ Swap = Invert = FlipSigns = false;
}
- bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
- if (!MinMax && hasSubus) {
+ bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
+ bool Subus = false;
+ if (!MinMax && HasSubus) {
// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
// Op0 u<= Op1:
// t = psubus Op0, Op1
// pcmpeq t, <0..0>
- switch (SetCCOpcode) {
+ switch (Cond) {
default: break;
case ISD::SETULT: {
// If the comparison is against a constant we can turn this into a