summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp190
1 files changed, 165 insertions, 25 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 172eba0002d4f..f777e56289884 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1662,6 +1662,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
+
+ // TODO: These control memcmp expansion in CGP and are set low to prevent
+ // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder.
+ MaxLoadsPerMemcmp = 1;
+ MaxLoadsPerMemcmpOptSize = 1;
+
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
@@ -14272,9 +14278,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// If we are inserting a element, see if we can do this more efficiently with
// a blend shuffle with a rematerializable vector than a costly integer
// insertion.
- // TODO: pre-SSE41 targets will tend to use bit masking - this could still
- // be beneficial if we are inserting several zeros and can combine the masks.
- if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) {
+ if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() &&
+ 16 <= EltVT.getSizeInBits()) {
SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
BlendMask.push_back(i == IdxVal ? i + NumElts : i);
@@ -17621,23 +17626,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
-
SDValue CmpOp0 = Cmp.getOperand(0);
+
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
- (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
- SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
- DAG.getConstant(0, DL,
- CmpOp0.getValueType()),
- CmpOp0);
- SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- return Res;
- }
+ (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
+ SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ return Res;
+ }
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
@@ -18648,8 +18651,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
+ bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
- SplitStack;
+ SplitStack || EmitStackProbe;
SDLoc dl(Op);
// Get the inputs.
@@ -23705,6 +23709,57 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SDValue RetOps[] = {Exract, NewGather.getValue(1)};
return DAG.getMergeValues(RetOps, dl);
}
+ if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) {
+ // There is a special case when the return type is v2i32 is illegal and
+ // the type legaizer extended it to v2i64. Without this conversion we end up
+ // with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
+ // In order to avoid this situation, we'll build an X86 specific Gather node
+ // with index v2i64 and value type v4i32.
+ assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
+ "Unexpected type in masked gather");
+ Src0 = DAG.getVectorShuffle(MVT::v4i32, dl,
+ DAG.getBitcast(MVT::v4i32, Src0),
+ DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
+ // The mask should match the destination type. Extending mask with zeroes
+ // is not necessary since instruction itself reads only two values from
+ // memory.
+ Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
+ N->getMemOperand());
+
+ SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
+ NewGather.getValue(0), DAG);
+ SDValue RetOps[] = { Sext, NewGather.getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+ if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) {
+ // This transformation is for optimization only.
+ // The type legalizer extended mask and index to 4 elements vector
+ // in order to match requirements of the common gather node - same
+ // vector width of index and value. X86 Gather node allows mismatch
+ // of vector width in order to select more optimal instruction at the
+ // end.
+ assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 &&
+ "Unexpected type in masked gather");
+ if (Mask.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
+ Index.getOpcode() == ISD::CONCAT_VECTORS &&
+ Index.getOperand(1).isUndef()) {
+ Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
+ Index = Index.getOperand(0);
+ } else
+ return Op;
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
+ N->getMemOperand());
+
+ SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+
+ }
return Op;
}
@@ -24508,6 +24563,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
+ case X86ISD::MGATHER: return "X86ISD::MGATHER";
}
return nullptr;
}
@@ -29868,7 +29924,7 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
if (N->getValueType(0) == MVT::i32)
Diff = (unsigned)Diff;
- bool isFastMultiplier = false;
+ bool IsFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default:
@@ -29880,12 +29936,12 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
case 5: // result = lea base(cond, cond*4)
case 8: // result = lea base( , cond*8)
case 9: // result = lea base(cond, cond*8)
- isFastMultiplier = true;
+ IsFastMultiplier = true;
break;
}
}
- if (isFastMultiplier) {
+ if (IsFastMultiplier) {
APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
if (NeedsCondInvert) // Invert the condition if needed.
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
@@ -34841,23 +34897,56 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
- // (cmp Z, 1) sets the carry flag if Z is 0.
SDValue Z = Cmp.getOperand(0);
- SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z,
- DAG.getConstant(1, DL, Z.getValueType()));
+ EVT ZVT = Z.getValueType();
+
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ if (auto *ConstantX = dyn_cast<ConstantSDNode>(X)) {
+ // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
+ // fake operands:
+ // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
+ // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
+ if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) ||
+ (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) {
+ SDValue Zero = DAG.getConstant(0, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ }
+
+ // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
+ // with fake operands:
+ // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
+ // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
+ if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) ||
+ (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1);
+ }
+ }
- SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ // (cmp Z, 1) sets the carry flag if Z is 0.
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
+
+ // Add the flags type for ADC/SBB nodes.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
// X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
// X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
if (CC == X86::COND_NE)
return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
- DAG.getConstant(-1ULL, DL, VT), NewCmp);
+ DAG.getConstant(-1ULL, DL, VT), Cmp1);
// X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
// X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
- DAG.getConstant(0, DL, VT), NewCmp);
+ DAG.getConstant(0, DL, VT), Cmp1);
}
static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
@@ -34976,6 +35065,32 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
}
+/// Convert vector increment or decrement to sub/add with an all-ones constant:
+/// add X, <1, 1...> --> sub X, <-1, -1...>
+/// sub X, <1, 1...> --> add X, <-1, -1...>
+/// The all-ones vector constant can be materialized using a pcmpeq instruction
+/// that is commonly recognized as an idiom (has no register dependency), so
+/// that's better/smaller than loading a splat 1 constant.
+static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB &&
+ "Unexpected opcode for increment/decrement transform");
+
+ // Pseudo-legality check: getOnesVector() expects one of these types, so bail
+ // out and wait for legalization if we have an unsupported vector length.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ SDNode *N1 = N->getOperand(1).getNode();
+ APInt SplatVal;
+ if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue())
+ return SDValue();
+
+ SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
+ unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
+ return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
+}
+
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
const SDNodeFlags Flags = N->getFlags();
@@ -34995,6 +35110,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
+ if (SDValue V = combineIncDecVector(N, DAG))
+ return V;
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -35028,6 +35146,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
+ if (SDValue V = combineIncDecVector(N, DAG))
+ return V;
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -36335,3 +36456,22 @@ void X86TargetLowering::insertCopiesSplitCSR(
bool X86TargetLowering::supportSwiftError() const {
return Subtarget.is64Bit();
}
+
+/// Returns the name of the symbol used to emit stack probes or the empty
+/// string if not applicable.
+StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
+ // If the function specifically requests stack probes, emit them.
+ if (MF.getFunction()->hasFnAttribute("probe-stack"))
+ return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString();
+
+ // Generally, if we aren't on Windows, the platform ABI does not include
+ // support for stack probes, so don't emit them.
+ if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO())
+ return "";
+
+ // We need a stack probe to conform to the Windows ABI. Choose the right
+ // symbol.
+ if (Subtarget.is64Bit())
+ return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
+ return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
+}