summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp266
1 files changed, 133 insertions, 133 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8d78308afe9d..0a41f35f9320 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,3 +1,4 @@
+
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
@@ -80,12 +81,6 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
-static cl::opt<bool> MulConstantOptimization(
- "mul-constant-optimization", cl::init(true),
- cl::desc("Replace 'mul x, Const' with more effective instructions like "
- "SHIFT, LEA, etc."),
- cl::Hidden);
-
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
@@ -317,16 +312,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UREM, VT, Expand);
}
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
- if (VT == MVT::i64 && !Subtarget.is64Bit())
- continue;
- // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
- setOperationAction(ISD::ADDC, VT, Custom);
- setOperationAction(ISD::ADDE, VT, Custom);
- setOperationAction(ISD::SUBC, VT, Custom);
- setOperationAction(ISD::SUBE, VT, Custom);
- }
-
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
@@ -428,7 +413,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
continue;
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SETCCE, VT, Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
@@ -1583,6 +1567,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Support carry in as value rather than glue.
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
+ setOperationAction(ISD::SETCCCARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
@@ -16304,6 +16289,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
case ISD::SHL:
if (Op.getNode()->getFlags().hasNoSignedWrap())
break;
+ LLVM_FALLTHROUGH;
default:
NeedOF = true;
break;
@@ -17167,17 +17153,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE: Invert = true;
+ case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
- case ISD::SETLT: Swap = true;
+ case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
- case ISD::SETGE: Swap = true;
+ case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETLE: Opc = X86ISD::PCMPGT;
Invert = true; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = X86ISD::PCMPGT;
FlipSigns = true; break;
- case ISD::SETUGE: Swap = true;
+ case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULE: Opc = X86ISD::PCMPGT;
FlipSigns = true; Invert = true; break;
}
@@ -17398,19 +17384,24 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SetCC;
}
-SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Carry = Op.getOperand(2);
SDValue Cond = Op.getOperand(3);
SDLoc DL(Op);
- assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+ assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());
- assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+ // Recreate the carry if needed.
+ EVT CarryVT = Carry.getValueType();
+ APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
+ Carry, DAG.getConstant(NegOne, DL, CarryVT));
+
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
- SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
+ SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG);
if (Op.getSimpleValueType() == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
@@ -23269,32 +23260,6 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getNode()->getSimpleValueType(0);
-
- // Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-
- unsigned Opc;
- bool ExtraOp = false;
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid code");
- case ISD::ADDC: Opc = X86ISD::ADD; break;
- case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
- case ISD::SUBC: Opc = X86ISD::SUB; break;
- case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
- }
-
- if (!ExtraOp)
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
- Op.getOperand(1));
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
- Op.getOperand(1), Op.getOperand(2));
-}
-
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
@@ -23785,7 +23750,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::SETCCE: return LowerSETCCE(Op, DAG);
+ case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@@ -23830,10 +23795,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::SUBC:
- case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
@@ -28946,12 +28907,118 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+// Try to match patterns such as
+// (i16 bitcast (v16i1 x))
+// ->
+// (i16 movmsk (16i8 sext (v16i1 x)))
+// before the illegal vector is scalarized on subtargets that don't have legal
+// vxi1 types.
+static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
+ const X86Subtarget &Subtarget) {
+ EVT VT = BitCast.getValueType();
+ SDValue N0 = BitCast.getOperand(0);
+ EVT VecVT = N0->getValueType(0);
+
+ if (!VT.isScalarInteger() || !VecVT.isSimple())
+ return SDValue();
+
+ // With AVX512 vxi1 types are legal and we prefer using k-regs.
+ // MOVMSK is supported in SSE2 or later.
+ if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ return SDValue();
+
+ // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
+ // v8f64. So all legal 128-bit and 256-bit vectors are covered except for
+ // v8i16 and v16i16.
+ // For these two cases, we can shuffle the upper element bytes to a
+ // consecutive sequence at the start of the vector and treat the results as
+ // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However,
+ // for v16i16 this is not the case, because the shuffle is expensive, so we
+ // avoid sign-exteding to this type entirely.
+ // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
+ // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
+ MVT SExtVT;
+ MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ switch (VecVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i1:
+ SExtVT = MVT::v2i64;
+ FPCastVT = MVT::v2f64;
+ break;
+ case MVT::v4i1:
+ SExtVT = MVT::v4i32;
+ FPCastVT = MVT::v4f32;
+ // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
+ // sign-extend to a 256-bit operation to avoid truncation.
+ if (N0->getOpcode() == ISD::SETCC &&
+ N0->getOperand(0)->getValueType(0).is256BitVector() &&
+ Subtarget.hasInt256()) {
+ SExtVT = MVT::v4i64;
+ FPCastVT = MVT::v4f64;
+ }
+ break;
+ case MVT::v8i1:
+ SExtVT = MVT::v8i16;
+ // For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)),
+ // sign-extend to a 256-bit operation to match the compare.
+ // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
+ // 256-bit because the shuffle is cheaper than sign extending the result of
+ // the compare.
+ if (N0->getOpcode() == ISD::SETCC &&
+ N0->getOperand(0)->getValueType(0).is256BitVector() &&
+ Subtarget.hasInt256()) {
+ SExtVT = MVT::v8i32;
+ FPCastVT = MVT::v8f32;
+ }
+ break;
+ case MVT::v16i1:
+ SExtVT = MVT::v16i8;
+ // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
+ // it is not profitable to sign-extend to 256-bit because this will
+ // require an extra cross-lane shuffle which is more exprensive than
+ // truncating the result of the compare to 128-bits.
+ break;
+ case MVT::v32i1:
+ // TODO: Handle pre-AVX2 cases by splitting to two v16i1's.
+ if (!Subtarget.hasInt256())
+ return SDValue();
+ SExtVT = MVT::v32i8;
+ break;
+ };
+
+ SDLoc DL(BitCast);
+ SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
+ if (SExtVT == MVT::v8i16) {
+ V = DAG.getBitcast(MVT::v16i8, V);
+ V = DAG.getVectorShuffle(
+ MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8),
+ {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
+ } else
+ assert(SExtVT.getScalarType() != MVT::i16 &&
+ "Vectors of i16 must be shuffled");
+ if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ V = DAG.getBitcast(FPCastVT, V);
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
+ return DAG.getZExtOrTrunc(V, DL, VT);
+}
+
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
+ // Try to match patterns such as
+ // (i16 bitcast (v16i1 x))
+ // ->
+ // (i16 movmsk (16i8 sext (v16i1 x)))
+ // before the setcc result is scalarized on subtargets that don't have legal
+ // vxi1 types.
+ if (DCI.isBeforeLegalize())
+ if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
+ return V;
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
@@ -29944,6 +30011,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
@@ -29974,6 +30042,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
@@ -30008,6 +30077,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
@@ -30036,6 +30106,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle both negative zeros and NaNs
// incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
@@ -30933,75 +31004,6 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
}
}
-static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
- EVT VT, SDLoc DL) {
-
- auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
- SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- DAG.getConstant(Mult, DL, VT));
- Result = DAG.getNode(ISD::SHL, DL, VT, Result,
- DAG.getConstant(Shift, DL, MVT::i8));
- Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
- Result);
- return Result;
- };
-
- auto combineMulMulAddOrSub = [&](bool isAdd) {
- SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- DAG.getConstant(9, DL, VT));
- Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
- Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
- Result);
- return Result;
- };
-
- switch (MulAmt) {
- default:
- break;
- case 11:
- // mul x, 11 => add ((shl (mul x, 5), 1), x)
- return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
- case 21:
- // mul x, 21 => add ((shl (mul x, 5), 2), x)
- return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
- case 22:
- // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
- return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
- case 19:
- // mul x, 19 => sub ((shl (mul x, 5), 2), x)
- return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
- case 13:
- // mul x, 13 => add ((shl (mul x, 3), 2), x)
- return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
- case 23:
- // mul x, 13 => sub ((shl (mul x, 3), 3), x)
- return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
- case 14:
- // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
- return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
- case 26:
- // mul x, 26 => sub ((mul (mul x, 9), 3), x)
- return combineMulMulAddOrSub(/*isAdd*/ false);
- case 28:
- // mul x, 28 => add ((mul (mul x, 9), 3), x)
- return combineMulMulAddOrSub(/*isAdd*/ true);
- case 29:
- // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
- return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- combineMulMulAddOrSub(/*isAdd*/ true));
- case 30:
- // mul x, 30 => sub (sub ((shl x, 5), x), x)
- return DAG.getNode(
- ISD::SUB, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(5, DL, MVT::i8))));
- }
- return SDValue();
-}
-
/// Optimize a single multiply with constant into two operations in order to
/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@@ -31011,8 +31013,6 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
- if (!MulConstantOptimization)
- return SDValue();
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
@@ -31068,8 +31068,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
- } else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
+ }
if (!NewMul) {
assert(MulAmt != 0 &&
@@ -34558,8 +34557,7 @@ static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG,
isOneConstant(Carry.getOperand(1))))
Carry = Carry.getOperand(0);
- if (Carry.getOpcode() == ISD::SETCC ||
- Carry.getOpcode() == X86ISD::SETCC ||
+ if (Carry.getOpcode() == X86ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC_CARRY) {
if (Carry.getConstantOperandVal(0) == X86::COND_B)
return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1));
@@ -35126,7 +35124,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
- case ISD::BITCAST: return combineBitcast(N, DAG, Subtarget);
+ case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
@@ -35510,6 +35508,7 @@ TargetLowering::ConstraintWeight
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ LLVM_FALLTHROUGH;
case 'R':
case 'q':
case 'Q':
@@ -35861,6 +35860,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
+ LLVM_FALLTHROUGH;
// 32-bit fallthrough
case 'Q': // Q_REGS
if (VT == MVT::i32 || VT == MVT::f32)