summaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ/SystemZISelLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/SystemZ/SystemZISelLowering.cpp
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
Notes
Diffstat (limited to 'lib/Target/SystemZ/SystemZISelLowering.cpp')
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp1149
1 files changed, 913 insertions, 236 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index adf368319dc3..302c7883f97b 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -31,17 +31,6 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-lower"
namespace {
-// Represents a sequence for extracting a 0/1 value from an IPM result:
-// (((X ^ XORValue) + AddValue) >> Bit)
-struct IPMConversion {
- IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
- : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
-
- int64_t XORValue;
- int64_t AddValue;
- unsigned Bit;
-};
-
// Represents information about a comparison.
struct Comparison {
Comparison(SDValue Op0In, SDValue Op1In)
@@ -87,7 +76,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
// Set up the register classes.
if (Subtarget.hasHighWord())
@@ -133,6 +122,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Instructions are strings of 2-byte aligned 2-byte values.
setMinFunctionAlignment(2);
+ // For performance reasons we prefer 16-byte alignment.
+ setPrefFunctionAlignment(4);
// Handle operations that are handled in a similar way for all types.
for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
@@ -173,6 +164,18 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, VT, Custom);
setOperationAction(ISD::UDIVREM, VT, Custom);
+ // Support addition/subtraction with overflow.
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+
+ // Support addition/subtraction with carry.
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+
+ // Support carry in as value rather than glue.
+ setOperationAction(ISD::ADDCARRY, VT, Custom);
+ setOperationAction(ISD::SUBCARRY, VT, Custom);
+
// Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
// stores, putting a serialization instruction after the stores.
setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
@@ -517,7 +520,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Codes for which we want to perform some z-specific combinations.
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
@@ -643,7 +648,8 @@ supportedAddressingMode(Instruction *I, bool HasVector) {
if (SingleUser->getParent() == I->getParent()) {
if (isa<ICmpInst>(SingleUser)) {
if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
- if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))
+ if (C->getBitWidth() <= 64 &&
+ (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
// Comparison of memory with 16 bit signed / unsigned immediate
return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
} else if (isa<StoreInst>(SingleUser))
@@ -748,6 +754,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
case 'f': // Floating-point register
case 'h': // High-part register
case 'r': // General-purpose register
+ case 'v': // Vector register
return C_RegisterClass;
case 'Q': // Memory with base and unsigned 12-bit displacement
@@ -800,6 +807,12 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
weight = CW_Register;
break;
+ case 'v': // Vector register
+ if ((type->isVectorTy() || type->isFloatingPointTy()) &&
+ Subtarget.hasVector())
+ weight = CW_Register;
+ break;
+
case 'I': // Unsigned 8-bit constant
if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
if (isUInt<8>(C->getZExtValue()))
@@ -838,13 +851,13 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
// Map maps 0-based register numbers to LLVM register numbers.
static std::pair<unsigned, const TargetRegisterClass *>
parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
- const unsigned *Map) {
+ const unsigned *Map, unsigned Size) {
assert(*(Constraint.end()-1) == '}' && "Missing '}'");
if (isdigit(Constraint[2])) {
unsigned Index;
bool Failed =
Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
- if (!Failed && Index < 16 && Map[Index])
+ if (!Failed && Index < Size && Map[Index])
return std::make_pair(Map[Index], RC);
}
return std::make_pair(0U, nullptr);
@@ -881,6 +894,16 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
else if (VT == MVT::f128)
return std::make_pair(0U, &SystemZ::FP128BitRegClass);
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+
+ case 'v': // Vector register
+ if (Subtarget.hasVector()) {
+ if (VT == MVT::f32)
+ return std::make_pair(0U, &SystemZ::VR32BitRegClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SystemZ::VR64BitRegClass);
+ return std::make_pair(0U, &SystemZ::VR128BitRegClass);
+ }
+ break;
}
}
if (Constraint.size() > 0 && Constraint[0] == '{') {
@@ -891,22 +914,32 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
if (Constraint[1] == 'r') {
if (VT == MVT::i32)
return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
- SystemZMC::GR32Regs);
+ SystemZMC::GR32Regs, 16);
if (VT == MVT::i128)
return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
- SystemZMC::GR128Regs);
+ SystemZMC::GR128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
- SystemZMC::GR64Regs);
+ SystemZMC::GR64Regs, 16);
}
if (Constraint[1] == 'f') {
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
- SystemZMC::FP32Regs);
+ SystemZMC::FP32Regs, 16);
if (VT == MVT::f128)
return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
- SystemZMC::FP128Regs);
+ SystemZMC::FP128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
- SystemZMC::FP64Regs);
+ SystemZMC::FP64Regs, 16);
+ }
+ if (Constraint[1] == 'v') {
+ if (VT == MVT::f32)
+ return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
+ SystemZMC::VR32Regs, 32);
+ if (VT == MVT::f64)
+ return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
+ SystemZMC::VR64Regs, 32);
+ return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
+ SystemZMC::VR128Regs, 32);
}
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -964,6 +997,13 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
#include "SystemZGenCallingConv.inc"
+const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
+ CallingConv::ID) const {
+ static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
+ SystemZ::R14D, 0 };
+ return ScratchRegs;
+}
+
bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
Type *ToType) const {
return isTruncateFree(FromType, ToType);
@@ -1634,9 +1674,9 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
}
}
-// Emit an intrinsic with chain with a glued value instead of its CC result.
-static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
- unsigned Opcode) {
+// Emit an intrinsic with chain and an explicit CC register result.
+static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
// Copy all operands except the intrinsic ID.
unsigned NumOps = Op.getNumOperands();
SmallVector<SDValue, 6> Ops;
@@ -1646,17 +1686,17 @@ static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
Ops.push_back(Op.getOperand(I));
assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
- SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
SDValue OldChain = SDValue(Op.getNode(), 1);
- SDValue NewChain = SDValue(Intr.getNode(), 0);
+ SDValue NewChain = SDValue(Intr.getNode(), 1);
DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
- return Intr;
+ return Intr.getNode();
}
-// Emit an intrinsic with a glued value instead of its CC result.
-static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
- unsigned Opcode) {
+// Emit an intrinsic with an explicit CC register result.
+static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
// Copy all operands except the intrinsic ID.
unsigned NumOps = Op.getNumOperands();
SmallVector<SDValue, 6> Ops;
@@ -1664,11 +1704,8 @@ static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
for (unsigned I = 1; I < NumOps; ++I)
Ops.push_back(Op.getOperand(I));
- if (Op->getNumValues() == 1)
- return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
- assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
- SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
- return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
+ return Intr.getNode();
}
// CC is a comparison that will be implemented using an integer or
@@ -1699,73 +1736,6 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
#undef CONV
}
-// Return a sequence for getting a 1 from an IPM result when CC has a
-// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
-// The handling of CC values outside CCValid doesn't matter.
-static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
- // Deal with cases where the result can be taken directly from a bit
- // of the IPM result.
- if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
- return IPMConversion(0, 0, SystemZ::IPM_CC);
- if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
- return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
-
- // Deal with cases where we can add a value to force the sign bit
- // to contain the right value. Putting the bit in 31 means we can
- // use SRL rather than RISBG(L), and also makes it easier to get a
- // 0/-1 value, so it has priority over the other tests below.
- //
- // These sequences rely on the fact that the upper two bits of the
- // IPM result are zero.
- uint64_t TopBit = uint64_t(1) << 31;
- if (CCMask == (CCValid & SystemZ::CCMASK_0))
- return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
- return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & (SystemZ::CCMASK_0
- | SystemZ::CCMASK_1
- | SystemZ::CCMASK_2)))
- return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & SystemZ::CCMASK_3))
- return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & (SystemZ::CCMASK_1
- | SystemZ::CCMASK_2
- | SystemZ::CCMASK_3)))
- return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
-
- // Next try inverting the value and testing a bit. 0/1 could be
- // handled this way too, but we dealt with that case above.
- if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
- return IPMConversion(-1, 0, SystemZ::IPM_CC);
-
- // Handle cases where adding a value forces a non-sign bit to contain
- // the right value.
- if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
- return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
- if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
- return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
-
- // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are
- // can be done by inverting the low CC bit and applying one of the
- // sign-based extractions above.
- if (CCMask == (CCValid & SystemZ::CCMASK_1))
- return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & SystemZ::CCMASK_2))
- return IPMConversion(1 << SystemZ::IPM_CC,
- TopBit - (3 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & (SystemZ::CCMASK_0
- | SystemZ::CCMASK_1
- | SystemZ::CCMASK_3)))
- return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
- if (CCMask == (CCValid & (SystemZ::CCMASK_0
- | SystemZ::CCMASK_2
- | SystemZ::CCMASK_3)))
- return IPMConversion(1 << SystemZ::IPM_CC,
- TopBit - (1 << SystemZ::IPM_CC), 31);
-
- llvm_unreachable("Unexpected CC combination");
-}
-
// If C can be converted to a comparison against zero, adjust the operands
// as necessary.
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
@@ -2237,6 +2207,24 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
C.CCMask = NewCCMask;
}
+// See whether the comparison argument contains a redundant AND
+// and remove it if so. This sometimes happens due to the generic
+// BRCOND expansion.
+static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison &C) {
+ if (C.Op0.getOpcode() != ISD::AND)
+ return;
+ auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
+ if (!Mask)
+ return;
+ KnownBits Known;
+ DAG.computeKnownBits(C.Op0.getOperand(0), Known);
+ if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
+ return;
+
+ C.Op0 = C.Op0.getOperand(0);
+}
+
// Return a Comparison that tests the condition-code result of intrinsic
// node Call against constant integer CC using comparison code Cond.
// Opcode is the opcode of the SystemZISD operation for the intrinsic
@@ -2311,6 +2299,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
else
C.ICmpType = SystemZICMP::SignedOnly;
C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
+ adjustForRedundantAnd(DAG, DL, C);
adjustZeroCmp(DAG, DL, C);
adjustSubwordCmp(DAG, DL, C);
adjustForSubtraction(DAG, DL, C);
@@ -2330,29 +2319,28 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
// Emit the comparison instruction described by C.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
if (!C.Op1.getNode()) {
- SDValue Op;
+ SDNode *Node;
switch (C.Op0.getOpcode()) {
case ISD::INTRINSIC_W_CHAIN:
- Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
- break;
+ Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
+ return SDValue(Node, 0);
case ISD::INTRINSIC_WO_CHAIN:
- Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
- break;
+ Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
+ return SDValue(Node, Node->getNumValues() - 1);
default:
llvm_unreachable("Invalid comparison operands");
}
- return SDValue(Op.getNode(), Op->getNumValues() - 1);
}
if (C.Opcode == SystemZISD::ICMP)
- return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
+ return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
DAG.getConstant(C.ICmpType, DL, MVT::i32));
if (C.Opcode == SystemZISD::TM) {
bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
- return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
+ return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
DAG.getConstant(RegisterOnly, DL, MVT::i32));
}
- return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
+ return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
}
// Implement a 32-bit *MUL_LOHI operation by extending both operands to
@@ -2383,29 +2371,16 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
}
-// Return an i32 value that is 1 if the CC value produced by Glue is
+// Return an i32 value that is 1 if the CC value produced by CCReg is
// in the mask CCMask and 0 otherwise. CC is known to have a value
// in CCValid, so other values can be ignored.
-static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
+static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
unsigned CCValid, unsigned CCMask) {
- IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
- SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
-
- if (Conversion.XORValue)
- Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
- DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
-
- if (Conversion.AddValue)
- Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
- DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
-
- // The SHR/AND sequence should get optimized to an RISBG.
- Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
- DAG.getConstant(Conversion.Bit, DL, MVT::i32));
- if (Conversion.Bit != 31)
- Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
- DAG.getConstant(1, DL, MVT::i32));
- return Result;
+ SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(CCValid, DL, MVT::i32),
+ DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
+ return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
}
// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
@@ -2554,8 +2529,8 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
- SDValue Glue = emitCmp(DAG, DL, C);
- return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
+ SDValue CCReg = emitCmp(DAG, DL, C);
+ return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -2566,10 +2541,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
- SDValue Glue = emitCmp(DAG, DL, C);
+ SDValue CCReg = emitCmp(DAG, DL, C);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
- DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
+ DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
}
// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@@ -2619,36 +2594,11 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
}
- SDValue Glue = emitCmp(DAG, DL, C);
-
- // Special case for handling -1/0 results. The shifts we use here
- // should get optimized with the IPM conversion sequence.
- auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
- auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
- if (TrueC && FalseC) {
- int64_t TrueVal = TrueC->getSExtValue();
- int64_t FalseVal = FalseC->getSExtValue();
- if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
- // Invert the condition if we want -1 on false.
- if (TrueVal == 0)
- C.CCMask ^= C.CCValid;
- SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
- EVT VT = Op.getValueType();
- // Extend the result to VT. Upper bits are ignored.
- if (!is32Bit(VT))
- Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
- // Sign-extend from the low bit.
- SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
- return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
- }
- }
-
+ SDValue CCReg = emitCmp(DAG, DL, C);
SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
- DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
+ DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
- return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
+ return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
}
SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
@@ -2757,7 +2707,7 @@ SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const {
- if (DAG.getTarget().Options.EmulatedTLS)
+ if (DAG.getTarget().useEmulatedTLS())
return LowerToTLSEmulatedModel(Node, DAG);
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
@@ -3266,6 +3216,99 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
MVT::i64, HighOp, Low32);
}
+// Lower SADDO/SSUBO/UADDO/USUBO nodes.
+SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc DL(N);
+ unsigned BaseOp = 0;
+ unsigned CCValid = 0;
+ unsigned CCMask = 0;
+
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown instruction!");
+ case ISD::SADDO:
+ BaseOp = SystemZISD::SADDO;
+ CCValid = SystemZ::CCMASK_ARITH;
+ CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
+ break;
+ case ISD::SSUBO:
+ BaseOp = SystemZISD::SSUBO;
+ CCValid = SystemZ::CCMASK_ARITH;
+ CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
+ break;
+ case ISD::UADDO:
+ BaseOp = SystemZISD::UADDO;
+ CCValid = SystemZ::CCMASK_LOGICAL;
+ CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
+ break;
+ case ISD::USUBO:
+ BaseOp = SystemZISD::USUBO;
+ CCValid = SystemZ::CCMASK_LOGICAL;
+ CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
+ break;
+ }
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+ SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
+ if (N->getValueType(1) == MVT::i1)
+ SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
+}
+
+// Lower ADDCARRY/SUBCARRY nodes.
+SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDNode *N = Op.getNode();
+ MVT VT = N->getSimpleValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = Op.getOperand(2);
+ SDLoc DL(N);
+ unsigned BaseOp = 0;
+ unsigned CCValid = 0;
+ unsigned CCMask = 0;
+
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown instruction!");
+ case ISD::ADDCARRY:
+ BaseOp = SystemZISD::ADDCARRY;
+ CCValid = SystemZ::CCMASK_LOGICAL;
+ CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
+ break;
+ case ISD::SUBCARRY:
+ BaseOp = SystemZISD::SUBCARRY;
+ CCValid = SystemZ::CCMASK_LOGICAL;
+ CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
+ break;
+ }
+
+ // Set the condition code from the carry flag.
+ Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
+ DAG.getConstant(CCValid, DL, MVT::i32),
+ DAG.getConstant(CCMask, DL, MVT::i32));
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
+
+ SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
+ if (N->getValueType(1) == MVT::i1)
+ SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
+}
+
SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -3512,16 +3555,16 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
EVT NarrowVT = Node->getMemoryVT();
EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
if (NarrowVT == WideVT) {
- SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue);
+ SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
DL, Tys, Ops, NarrowVT, MMO);
- SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2),
+ SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
return SDValue();
}
@@ -3546,17 +3589,17 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
DAG.getConstant(0, DL, WideVT), BitShift);
// Construct the ATOMIC_CMP_SWAPW node.
- SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue);
+ SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
VTList, Ops, NarrowVT, MMO);
- SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2),
+ SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
return SDValue();
}
@@ -3613,12 +3656,10 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
Node->getMemoryVT(), Node->getMemOperand());
}
-// Return an i32 that contains the value of CC immediately after After,
-// whose final operand must be MVT::Glue.
-static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
- SDLoc DL(After);
- SDValue Glue = SDValue(After, After->getNumValues() - 1);
- SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+// Convert condition code in CCReg to an i32 value.
+static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
+ SDLoc DL(CCReg);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
}
@@ -3629,8 +3670,8 @@ SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
unsigned Opcode, CCValid;
if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
- SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
- SDValue CC = getCCResult(DAG, Glued.getNode());
+ SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, SDValue(Node, 0));
DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
return SDValue();
}
@@ -3643,13 +3684,12 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opcode, CCValid;
if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
- SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
- SDValue CC = getCCResult(DAG, Glued.getNode());
+ SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
if (Op->getNumValues() == 1)
- return CC;
+ return getCCResult(DAG, SDValue(Node, 0));
assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
- return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
- CC);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
+ SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
}
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -3853,20 +3893,34 @@ static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
return nullptr;
}
-// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// Convert the mask of the given shuffle op into a byte-level mask,
// as if it had type vNi8.
-static void getVPermMask(ShuffleVectorSDNode *VSN,
+static bool getVPermMask(SDValue ShuffleOp,
SmallVectorImpl<int> &Bytes) {
- EVT VT = VSN->getValueType(0);
+ EVT VT = ShuffleOp.getValueType();
unsigned NumElements = VT.getVectorNumElements();
unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
- Bytes.resize(NumElements * BytesPerElement, -1);
- for (unsigned I = 0; I < NumElements; ++I) {
- int Index = VSN->getMaskElt(I);
- if (Index >= 0)
+
+ if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
+ Bytes.resize(NumElements * BytesPerElement, -1);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ int Index = VSN->getMaskElt(I);
+ if (Index >= 0)
+ for (unsigned J = 0; J < BytesPerElement; ++J)
+ Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+ }
+ return true;
+ }
+ if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
+ isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
+ unsigned Index = ShuffleOp.getConstantOperandVal(1);
+ Bytes.resize(NumElements * BytesPerElement, -1);
+ for (unsigned I = 0; I < NumElements; ++I)
for (unsigned J = 0; J < BytesPerElement; ++J)
Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+ return true;
}
+ return false;
}
// Bytes is a VPERM-like permute vector, except that -1 is used for
@@ -4035,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
// See whether the bytes we need come from a contiguous part of one
// operand.
SmallVector<int, SystemZ::VectorBytes> OpBytes;
- getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+ if (!getVPermMask(Op, OpBytes))
+ break;
int NewByte;
if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
break;
@@ -4217,9 +4272,9 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
if (!Op.isUndef()) {
uint64_t Value;
if (Op.getOpcode() == ISD::Constant)
- Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
+ Value = cast<ConstantSDNode>(Op)->getZExtValue();
else if (Op.getOpcode() == ISD::ConstantFP)
- Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
+ Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
.getZExtValue());
else
return false;
@@ -4245,12 +4300,15 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
const SDLoc &DL, EVT VT, uint64_t Value,
unsigned BitsPerElement) {
// Signed 16-bit values can be replicated using VREPI.
+ // Mark the constants as opaque or DAGCombiner will convert back to
+ // BUILD_VECTOR.
int64_t SignedValue = SignExtend64(Value, BitsPerElement);
if (isInt<16>(SignedValue)) {
MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
SystemZ::VectorBits / BitsPerElement);
- SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
- DAG.getConstant(SignedValue, DL, MVT::i32));
+ SDValue Op = DAG.getNode(
+ SystemZISD::REPLICATE, DL, VecVT,
+ DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
// See whether rotating the constant left some N places gives a value that
@@ -4266,9 +4324,10 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
End -= 64 - BitsPerElement;
MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
SystemZ::VectorBits / BitsPerElement);
- SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
- DAG.getConstant(Start, DL, MVT::i32),
- DAG.getConstant(End, DL, MVT::i32));
+ SDValue Op = DAG.getNode(
+ SystemZISD::ROTATE_MASK, DL, VecVT,
+ DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
+ DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
return SDValue();
@@ -4481,8 +4540,9 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
// priority over other methods below.
uint64_t Mask = 0;
if (tryBuildVectorByteMask(BVN, Mask)) {
- SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
- DAG.getConstant(Mask, DL, MVT::i32));
+ SDValue Op = DAG.getNode(
+ SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
@@ -4597,7 +4657,7 @@ SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Op1.getOpcode() != ISD::BITCAST &&
Op1.getOpcode() != ISD::ConstantFP &&
Op2.getOpcode() == ISD::Constant) {
- uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
+ uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
unsigned Mask = VT.getVectorNumElements() - 1;
if (Index <= Mask)
return Op;
@@ -4753,6 +4813,14 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSDIVREM(Op, DAG);
case ISD::UDIVREM:
return lowerUDIVREM(Op, DAG);
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ return lowerXALUO(Op, DAG);
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY:
+ return lowerADDSUBCARRY(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
case ISD::CTPOP:
@@ -4881,19 +4949,19 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
SDLoc DL(N);
- SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue);
+ SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
lowerI128ToGR128(DAG, N->getOperand(2)),
lowerI128ToGR128(DAG, N->getOperand(3)) };
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
DL, Tys, Ops, MVT::i128, MMO);
- SDValue Success = emitSETCC(DAG, DL, Res.getValue(2),
+ SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
Results.push_back(lowerGR128ToI128(DAG, Res));
Results.push_back(Success);
- Results.push_back(Res.getValue(1));
+ Results.push_back(Res.getValue(2));
break;
}
default:
@@ -4931,6 +4999,13 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(UMUL_LOHI);
OPCODE(SDIVREM);
OPCODE(UDIVREM);
+ OPCODE(SADDO);
+ OPCODE(SSUBO);
+ OPCODE(UADDO);
+ OPCODE(USUBO);
+ OPCODE(ADDCARRY);
+ OPCODE(SUBCARRY);
+ OPCODE(GET_CCMASK);
OPCODE(MVC);
OPCODE(MVC_LOOP);
OPCODE(NC);
@@ -5049,13 +5124,14 @@ SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
if (Opcode == ISD::BITCAST)
// Look through bitcasts.
Op = Op.getOperand(0);
- else if (Opcode == ISD::VECTOR_SHUFFLE &&
+ else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
canTreatAsByteVector(Op.getValueType())) {
// Get a VPERM-like permute mask and see whether the bytes covered
// by the extracted element are a contiguous sequence from one
// source operand.
SmallVector<int, SystemZ::VectorBytes> Bytes;
- getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+ if (!getVPermMask(Op, Bytes))
+ break;
int First;
if (!getShuffleInput(Bytes, Index * BytesPerElement,
BytesPerElement, First))
@@ -5174,6 +5250,54 @@ SDValue SystemZTargetLowering::combineTruncateExtract(
return SDValue();
}
+SDValue SystemZTargetLowering::combineZERO_EXTEND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
+ auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (TrueOp && FalseOp) {
+ SDLoc DL(N0);
+ SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
+ DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
+ N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
+ SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
+ // If N0 has multiple uses, change other uses as well.
+ if (!N0.hasOneUse()) {
+ SDValue TruncSelect =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
+ DCI.CombineTo(N0.getNode(), TruncSelect);
+ }
+ return NewSelect;
+ }
+ }
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
+ // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
+ // into (select_cc LHS, RHS, -1, 0, COND)
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
+ N0 = N0.getOperand(0);
+ if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
+ SDLoc DL(N0);
+ SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineSIGN_EXTEND(
SDNode *N, DAGCombinerInfo &DCI) const {
// Convert (sext (ashr (shl X, C1), C2)) to
@@ -5249,7 +5373,7 @@ SDValue SystemZTargetLowering::combineSTORE(
// for the extraction to be done on a vMiN value, so that we can use VSTE.
// If X has wider elements then convert it to:
// (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
- if (MemVT.isInteger()) {
+ if (MemVT.isInteger() && SN->isTruncatingStore()) {
if (SDValue Value =
combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
DCI.AddToWorklist(Value.getNode());
@@ -5261,9 +5385,7 @@ SDValue SystemZTargetLowering::combineSTORE(
}
}
// Combine STORE (BSWAP) into STRVH/STRV/STRVG
- // See comment in combineBSWAP about volatile accesses.
if (!SN->isTruncatingStore() &&
- !SN->isVolatile() &&
Op1.getOpcode() == ISD::BSWAP &&
Op1.getNode()->hasOneUse() &&
(Op1.getValueType() == MVT::i16 ||
@@ -5364,13 +5486,10 @@ SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG
- // These loads are allowed to access memory multiple times, and so we must check
- // that the loads are not volatile before performing the combine.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
- N->getValueType(0) == MVT::i64) &&
- !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
+ N->getValueType(0) == MVT::i64)) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
@@ -5475,11 +5594,157 @@ SDValue SystemZTargetLowering::combineSHIFTROT(
return SDValue();
}
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
+ // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
+ // set by the CCReg instruction using the CCValid / CCMask masks,
+ // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing
+ // the condition code set by some other instruction, see whether we
+ // can directly use that condition code.
+ bool Invert = false;
+
+ // Verify that we have an appropriate mask for a EQ or NE comparison.
+ if (CCValid != SystemZ::CCMASK_ICMP)
+ return false;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ Invert = !Invert;
+ else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+ return false;
+
+ // Verify that we have an ICMP that is the user of a SELECT_CCMASK.
+ SDNode *ICmp = CCReg.getNode();
+ if (ICmp->getOpcode() != SystemZISD::ICMP)
+ return false;
+ SDNode *Select = ICmp->getOperand(0).getNode();
+ if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+ return false;
+
+ // Verify that the ICMP compares against one of select values.
+ auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
+ if (!CompareVal)
+ return false;
+ auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
+ if (!TrueVal)
+ return false;
+ auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
+ if (!FalseVal)
+ return false;
+ if (CompareVal->getZExtValue() == FalseVal->getZExtValue())
+ Invert = !Invert;
+ else if (CompareVal->getZExtValue() != TrueVal->getZExtValue())
+ return false;
+
+ // Compute the effective CC mask for the new branch or select.
+ auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
+ auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
+ if (!NewCCValid || !NewCCMask)
+ return false;
+ CCValid = NewCCValid->getZExtValue();
+ CCMask = NewCCMask->getZExtValue();
+ if (Invert)
+ CCMask ^= CCValid;
+
+ // Return the updated CCReg link.
+ CCReg = Select->getOperand(4);
+ return true;
+}
+
+SDValue SystemZTargetLowering::combineBR_CCMASK(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
+ auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
+ if (!CCValid || !CCMask)
+ return SDValue();
+
+ int CCValidVal = CCValid->getZExtValue();
+ int CCMaskVal = CCMask->getZExtValue();
+ SDValue Chain = N->getOperand(0);
+ SDValue CCReg = N->getOperand(4);
+
+ if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
+ return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
+ Chain,
+ DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
+ DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
+ N->getOperand(3), CCReg);
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSELECT_CCMASK(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
+ auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
+ auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CCValid || !CCMask)
+ return SDValue();
+
+ int CCValidVal = CCValid->getZExtValue();
+ int CCMaskVal = CCMask->getZExtValue();
+ SDValue CCReg = N->getOperand(4);
+
+ if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
+ return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
+ N->getOperand(0),
+ N->getOperand(1),
+ DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
+ DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
+ CCReg);
+ return SDValue();
+}
+
+
+SDValue SystemZTargetLowering::combineGET_CCMASK(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+
+ // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
+ auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
+ if (!CCValid || !CCMask)
+ return SDValue();
+ int CCValidVal = CCValid->getZExtValue();
+ int CCMaskVal = CCMask->getZExtValue();
+
+ SDValue Select = N->getOperand(0);
+ if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+ return SDValue();
+
+ auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
+ auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
+ if (!SelectCCValid || !SelectCCMask)
+ return SDValue();
+ int SelectCCValidVal = SelectCCValid->getZExtValue();
+ int SelectCCMaskVal = SelectCCMask->getZExtValue();
+
+ auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
+ auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
+ if (!TrueVal || !FalseVal)
+ return SDValue();
+ if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0)
+ ;
+ else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0)
+ SelectCCMaskVal ^= SelectCCValidVal;
+ else
+ return SDValue();
+
+ if (SelectCCValidVal & ~CCValidVal)
+ return SDValue();
+ if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
+ return SDValue();
+
+ return Select->getOperand(4);
+}
+
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
default: break;
+ case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
+ case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
case SystemZISD::MERGE_HIGH:
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
case ISD::STORE: return combineSTORE(N, DCI);
@@ -5491,11 +5756,303 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL: return combineSHIFTROT(N, DCI);
+ case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
+ case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
+ case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
}
return SDValue();
}
+// Return the demanded elements for the OpNo source operand of Op. DemandedElts
+// are for Op.
+static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
+ unsigned OpNo) {
+ EVT VT = Op.getValueType();
+ unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
+ APInt SrcDemE;
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpksh: // PACKS
+ case Intrinsic::s390_vpksf:
+ case Intrinsic::s390_vpksg:
+ case Intrinsic::s390_vpkshs: // PACKS_CC
+ case Intrinsic::s390_vpksfs:
+ case Intrinsic::s390_vpksgs:
+ case Intrinsic::s390_vpklsh: // PACKLS
+ case Intrinsic::s390_vpklsf:
+ case Intrinsic::s390_vpklsg:
+ case Intrinsic::s390_vpklshs: // PACKLS_CC
+ case Intrinsic::s390_vpklsfs:
+ case Intrinsic::s390_vpklsgs:
+ // VECTOR PACK truncates the elements of two source vectors into one.
+ SrcDemE = DemandedElts;
+ if (OpNo == 2)
+ SrcDemE.lshrInPlace(NumElts / 2);
+ SrcDemE = SrcDemE.trunc(NumElts / 2);
+ break;
+ // VECTOR UNPACK extends half the elements of the source vector.
+ case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
+ case Intrinsic::s390_vuphh:
+ case Intrinsic::s390_vuphf:
+ case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
+ case Intrinsic::s390_vuplhh:
+ case Intrinsic::s390_vuplhf:
+ SrcDemE = APInt(NumElts * 2, 0);
+ SrcDemE.insertBits(DemandedElts, 0);
+ break;
+ case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
+ case Intrinsic::s390_vuplhw:
+ case Intrinsic::s390_vuplf:
+ case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
+ case Intrinsic::s390_vupllh:
+ case Intrinsic::s390_vupllf:
+ SrcDemE = APInt(NumElts * 2, 0);
+ SrcDemE.insertBits(DemandedElts, NumElts);
+ break;
+ case Intrinsic::s390_vpdi: {
+ // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
+ SrcDemE = APInt(NumElts, 0);
+ if (!DemandedElts[OpNo - 1])
+ break;
+ unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
+ // Demand input element 0 or 1, given by the mask bit value.
+ SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
+ break;
+ }
+ case Intrinsic::s390_vsldb: {
+ // VECTOR SHIFT LEFT DOUBLE BY BYTE
+ assert(VT == MVT::v16i8 && "Unexpected type.");
+ unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
+ unsigned NumSrc0Els = 16 - FirstIdx;
+ SrcDemE = APInt(NumElts, 0);
+ if (OpNo == 1) {
+ APInt DemEls = DemandedElts.trunc(NumSrc0Els);
+ SrcDemE.insertBits(DemEls, FirstIdx);
+ } else {
+ APInt DemEls = DemandedElts.lshr(NumSrc0Els);
+ SrcDemE.insertBits(DemEls, 0);
+ }
+ break;
+ }
+ case Intrinsic::s390_vperm:
+ SrcDemE = APInt(NumElts, 1);
+ break;
+ default:
+ llvm_unreachable("Unhandled intrinsic.");
+ break;
+ }
+ } else {
+ switch (Opcode) {
+ case SystemZISD::JOIN_DWORDS:
+ // Scalar operand.
+ SrcDemE = APInt(1, 1);
+ break;
+ case SystemZISD::SELECT_CCMASK:
+ SrcDemE = DemandedElts;
+ break;
+ default:
+ llvm_unreachable("Unhandled opcode.");
+ break;
+ }
+ }
+ return SrcDemE;
+}
+
+static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG, unsigned Depth,
+ unsigned OpNo) {
+ APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
+ APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
+ unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
+ KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth);
+ DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1);
+ DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1);
+ Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
+ Known.One = LHSKnown.One & RHSKnown.One;
+}
+
+void
+SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ Known.resetAll();
+
+ // Intrinsic CC result is returned in the two low bits.
+ unsigned tmp0, tmp1; // not used
+ if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
+ Known.Zero.setBitsFrom(2);
+ return;
+ }
+ EVT VT = Op.getValueType();
+ if (Op.getResNo() != 0 || VT == MVT::Untyped)
+ return;
+ assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
+ "KnownBits does not match VT in bitwidth");
+ assert ((!VT.isVector() ||
+ (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
+ "DemandedElts does not match VT number of elements");
+ unsigned BitWidth = Known.getBitWidth();
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
+ bool IsLogical = false;
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpksh: // PACKS
+ case Intrinsic::s390_vpksf:
+ case Intrinsic::s390_vpksg:
+ case Intrinsic::s390_vpkshs: // PACKS_CC
+ case Intrinsic::s390_vpksfs:
+ case Intrinsic::s390_vpksgs:
+ case Intrinsic::s390_vpklsh: // PACKLS
+ case Intrinsic::s390_vpklsf:
+ case Intrinsic::s390_vpklsg:
+ case Intrinsic::s390_vpklshs: // PACKLS_CC
+ case Intrinsic::s390_vpklsfs:
+ case Intrinsic::s390_vpklsgs:
+ case Intrinsic::s390_vpdi:
+ case Intrinsic::s390_vsldb:
+ case Intrinsic::s390_vperm:
+ computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
+ break;
+ case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
+ case Intrinsic::s390_vuplhh:
+ case Intrinsic::s390_vuplhf:
+ case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
+ case Intrinsic::s390_vupllh:
+ case Intrinsic::s390_vupllf:
+ IsLogical = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
+ case Intrinsic::s390_vuphh:
+ case Intrinsic::s390_vuphf:
+ case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
+ case Intrinsic::s390_vuplhw:
+ case Intrinsic::s390_vuplf: {
+ SDValue SrcOp = Op.getOperand(1);
+ unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
+ Known = KnownBits(SrcBitWidth);
+ APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
+ DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1);
+ if (IsLogical) {
+ Known = Known.zext(BitWidth);
+ Known.Zero.setBitsFrom(SrcBitWidth);
+ } else
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ default:
+ break;
+ }
+ } else {
+ switch (Opcode) {
+ case SystemZISD::JOIN_DWORDS:
+ case SystemZISD::SELECT_CCMASK:
+ computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
+ break;
+ case SystemZISD::REPLICATE: {
+ SDValue SrcOp = Op.getOperand(0);
+ DAG.computeKnownBits(SrcOp, Known, Depth + 1);
+ if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
+ Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ // Known has the width of the source operand(s). Adjust if needed to match
+ // the passed bitwidth.
+ if (Known.getBitWidth() != BitWidth)
+ Known = Known.zextOrTrunc(BitWidth);
+}
+
+static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG, unsigned Depth,
+ unsigned OpNo) {
+ APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
+ unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
+ if (LHS == 1) return 1; // Early out.
+ APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
+ unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
+ if (RHS == 1) return 1; // Early out.
+ unsigned Common = std::min(LHS, RHS);
+ unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ if (SrcBitWidth > VTBits) { // PACK
+ unsigned SrcExtraBits = SrcBitWidth - VTBits;
+ if (Common > SrcExtraBits)
+ return (Common - SrcExtraBits);
+ return 1;
+ }
+ assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
+ return Common;
+}
+
+unsigned
+SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
+ if (Op.getResNo() != 0)
+ return 1;
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpksh: // PACKS
+ case Intrinsic::s390_vpksf:
+ case Intrinsic::s390_vpksg:
+ case Intrinsic::s390_vpkshs: // PACKS_CC
+ case Intrinsic::s390_vpksfs:
+ case Intrinsic::s390_vpksgs:
+ case Intrinsic::s390_vpklsh: // PACKLS
+ case Intrinsic::s390_vpklsf:
+ case Intrinsic::s390_vpklsg:
+ case Intrinsic::s390_vpklshs: // PACKLS_CC
+ case Intrinsic::s390_vpklsfs:
+ case Intrinsic::s390_vpklsgs:
+ case Intrinsic::s390_vpdi:
+ case Intrinsic::s390_vsldb:
+ case Intrinsic::s390_vperm:
+ return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
+ case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
+ case Intrinsic::s390_vuphh:
+ case Intrinsic::s390_vuphf:
+ case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
+ case Intrinsic::s390_vuplhw:
+ case Intrinsic::s390_vuplf: {
+ SDValue PackedOp = Op.getOperand(1);
+ APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
+ unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
+ return Tmp;
+ }
+ default:
+ break;
+ }
+ } else {
+ switch (Opcode) {
+ case SystemZISD::SELECT_CCMASK:
+ return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
+ default:
+ break;
+ }
+ }
+
+ return 1;
+}
+
//===----------------------------------------------------------------------===//
// Custom insertion
//===----------------------------------------------------------------------===//
@@ -5546,34 +6103,141 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
return Reg;
}
+// The CC operand of MI might be missing a kill marker because there
+// were multiple uses of CC, and ISel didn't know which to mark.
+// Figure out whether MI should have had a kill marker.
+static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
+ // Scan forward through BB for a use/def of CC.
+ MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI)));
+ for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
+ const MachineInstr& mi = *miI;
+ if (mi.readsRegister(SystemZ::CC))
+ return false;
+ if (mi.definesRegister(SystemZ::CC))
+ break; // Should have kill-flag - update below.
+ }
+
+ // If we hit the end of the block, check whether CC is live into a
+ // successor.
+ if (miI == MBB->end()) {
+ for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(SystemZ::CC))
+ return false;
+ }
+
+ return true;
+}
+
+// Return true if it is OK for this Select pseudo-opcode to be cascaded
+// together with other Select pseudo-opcodes into a single basic-block with
+// a conditional jump around it.
+static bool isSelectPseudo(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case SystemZ::Select32:
+ case SystemZ::Select64:
+ case SystemZ::SelectF32:
+ case SystemZ::SelectF64:
+ case SystemZ::SelectF128:
+ case SystemZ::SelectVR32:
+ case SystemZ::SelectVR64:
+ case SystemZ::SelectVR128:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Helper function, which inserts PHI functions into SinkMBB:
+// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
+// where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects
+// in [MIItBegin, MIItEnd) range.
+static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
+ MachineBasicBlock::iterator MIItEnd,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB,
+ MachineBasicBlock *SinkMBB) {
+ MachineFunction *MF = TrueMBB->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ unsigned CCValid = MIItBegin->getOperand(3).getImm();
+ unsigned CCMask = MIItBegin->getOperand(4).getImm();
+ DebugLoc DL = MIItBegin->getDebugLoc();
+
+ MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
+
+ // As we are creating the PHIs, we have to be careful if there is more than
+ // one. Later Selects may reference the results of earlier Selects, but later
+ // PHIs have to reference the individual true/false inputs from earlier PHIs.
+ // That also means that PHI construction must work forward from earlier to
+ // later, and that the code must maintain a mapping from earlier PHI's
+ // destination registers, and the registers that went into the PHI.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
+
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+ unsigned DestReg = MIIt->getOperand(0).getReg();
+ unsigned TrueReg = MIIt->getOperand(1).getReg();
+ unsigned FalseReg = MIIt->getOperand(2).getReg();
+
+ // If this Select we are generating is the opposite condition from
+ // the jump we generated, then we have to swap the operands for the
+ // PHI that is going to be generated.
+ if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask))
+ std::swap(TrueReg, FalseReg);
+
+ if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
+ TrueReg = RegRewriteTable[TrueReg].first;
+
+ if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
+ FalseReg = RegRewriteTable[FalseReg].second;
+
+ BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
+ .addReg(TrueReg).addMBB(TrueMBB)
+ .addReg(FalseReg).addMBB(FalseMBB);
+
+ // Add this PHI to the rewrite table.
+ RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
+ }
+}
+
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr &MI,
- MachineBasicBlock *MBB,
- unsigned LOCROpcode) const {
+ MachineBasicBlock *MBB) const {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
- unsigned DestReg = MI.getOperand(0).getReg();
- unsigned TrueReg = MI.getOperand(1).getReg();
- unsigned FalseReg = MI.getOperand(2).getReg();
unsigned CCValid = MI.getOperand(3).getImm();
unsigned CCMask = MI.getOperand(4).getImm();
DebugLoc DL = MI.getDebugLoc();
- // Use LOCROpcode if possible.
- if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
- BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
- .addReg(FalseReg).addReg(TrueReg)
- .addImm(CCValid).addImm(CCMask);
- MI.eraseFromParent();
- return MBB;
- }
+ // If we have a sequence of Select* pseudo instructions using the
+ // same condition code value, we want to expand all of them into
+ // a single pair of basic blocks using the same condition.
+ MachineInstr *LastMI = &MI;
+ MachineBasicBlock::iterator NextMIIt =
+ std::next(MachineBasicBlock::iterator(MI));
+
+ if (isSelectPseudo(MI))
+ while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) &&
+ NextMIIt->getOperand(3).getImm() == CCValid &&
+ (NextMIIt->getOperand(4).getImm() == CCMask ||
+ NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) {
+ LastMI = &*NextMIIt;
+ ++NextMIIt;
+ }
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ // Unless CC was killed in the last Select instruction, mark it as
+ // live-in to both FalseMBB and JoinMBB.
+ if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) {
+ FalseMBB->addLiveIn(SystemZ::CC);
+ JoinMBB->addLiveIn(SystemZ::CC);
+ }
+
// StartMBB:
// BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
@@ -5592,11 +6256,12 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
// ...
MBB = JoinMBB;
- BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
- .addReg(TrueReg).addMBB(StartMBB)
- .addReg(FalseReg).addMBB(FalseMBB);
+ MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator MIItEnd =
+ std::next(MachineBasicBlock::iterator(LastMI));
+ createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB);
- MI.eraseFromParent();
+ StartMBB->erase(MIItBegin, MIItEnd);
return JoinMBB;
}
@@ -5658,6 +6323,13 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ // Unless CC was killed in the CondStore instruction, mark it as
+ // live-in to both FalseMBB and JoinMBB.
+ if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
+ FalseMBB->addLiveIn(SystemZ::CC);
+ JoinMBB->addLiveIn(SystemZ::CC);
+ }
+
// StartMBB:
// BRC CCMask, JoinMBB
// # fallthrough to FalseMBB
@@ -6223,6 +6895,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
DestBase = MachineOperand::CreateReg(NextDestReg, false);
SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
Length &= 255;
+ if (EndMBB && !Length)
+ // If the loop handled the whole CLC range, DoneMBB will be empty with
+ // CC live-through into EndMBB, so add it as live-in.
+ DoneMBB->addLiveIn(SystemZ::CC);
MBB = DoneMBB;
}
// Handle any remaining bytes with straight-line code.
@@ -6415,18 +7091,15 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
- case SystemZ::Select32Mux:
- return emitSelect(MI, MBB,
- Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
case SystemZ::Select32:
- return emitSelect(MI, MBB, SystemZ::LOCR);
case SystemZ::Select64:
- return emitSelect(MI, MBB, SystemZ::LOCGR);
case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
+ case SystemZ::SelectVR32:
+ case SystemZ::SelectVR64:
case SystemZ::SelectVR128:
- return emitSelect(MI, MBB, 0);
+ return emitSelect(MI, MBB);
case SystemZ::CondStore8Mux:
return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
@@ -6675,6 +7348,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return emitPatchPoint(MI, MBB);
+
default:
llvm_unreachable("Unexpected instr type to insert");
}