aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZISelLowering.cpp')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp655
1 files changed, 522 insertions, 133 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c73905d3357a..eb1e51341ec4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -88,25 +88,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
- if (Subtarget.hasVector()) {
- addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
- } else {
- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
- }
- if (Subtarget.hasVectorEnhancements1())
- addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
- else
- addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+ if (!useSoftFloat()) {
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ }
+ if (Subtarget.hasVectorEnhancements1())
+ addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
+ else
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
- if (Subtarget.hasVector()) {
- addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ }
}
// Compute derived properties from the register classes
@@ -639,12 +641,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
setTargetDAGCombine(ISD::SREM);
setTargetDAGCombine(ISD::UREM);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -666,6 +672,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
IsStrictFPEnabled = true;
}
+bool SystemZTargetLowering::useSoftFloat() const {
+ return Subtarget.hasSoftFloat();
+}
+
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &, EVT VT) const {
if (!VT.isVector())
@@ -816,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
+/// Returns true if stack probing through inline assembly is requested.
+bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
@@ -1123,12 +1142,14 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
case 'f': // Floating-point register
- if (VT == MVT::f64)
- return std::make_pair(0U, &SystemZ::FP64BitRegClass);
- else if (VT == MVT::f128)
- return std::make_pair(0U, &SystemZ::FP128BitRegClass);
- return std::make_pair(0U, &SystemZ::FP32BitRegClass);
-
+ if (!useSoftFloat()) {
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+ return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+ }
+ break;
case 'v': // Vector register
if (Subtarget.hasVector()) {
if (VT == MVT::f32)
@@ -1156,6 +1177,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
SystemZMC::GR64Regs, 16);
}
if (Constraint[1] == 'f') {
+ if (useSoftFloat())
+ return std::make_pair(
+ 0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
SystemZMC::FP32Regs, 16);
@@ -1166,6 +1190,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
SystemZMC::FP64Regs, 16);
}
if (Constraint[1] == 'v') {
+ if (!Subtarget.hasVector())
+ return std::make_pair(
+ 0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
SystemZMC::VR32Regs, 32);
@@ -1179,6 +1206,19 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
+
+ Register Reg = StringSwitch<Register>(RegName)
+ .Case("r15", SystemZ::R15D)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
void SystemZTargetLowering::
LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -1437,17 +1477,19 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
// ...and a similar frame index for the caller-allocated save area
// that will be used to store the incoming registers.
- int64_t RegSaveOffset = -SystemZMC::CallFrameSize;
+ int64_t RegSaveOffset =
+ -SystemZMC::CallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
// Store the FPR varargs in the reserved frame slots. (We store the
// GPRs as part of the prologue.)
- if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+ if (NumFixedFPRs < SystemZ::NumArgFPRs && !useSoftFloat()) {
SDValue MemOps[SystemZ::NumArgFPRs];
for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
- unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
- int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
+ unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ArgFPRs[I]);
+ int FI =
+ MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize + Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
&SystemZ::FP64BitRegClass);
@@ -1633,6 +1675,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall)
return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
@@ -2020,8 +2063,9 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
// We must have an 8- or 16-bit load.
auto *Load = cast<LoadSDNode>(C.Op0);
- unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
- if (NumBits != 8 && NumBits != 16)
+ unsigned NumBits = Load->getMemoryVT().getSizeInBits();
+ if ((NumBits != 8 && NumBits != 16) ||
+ NumBits != Load->getMemoryVT().getStoreSizeInBits())
return;
// The load must be an extending one and the constant must be within the
@@ -2161,15 +2205,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
return false;
}
-// Return a version of comparison CC mask CCMask in which the LT and GT
-// actions are swapped.
-static unsigned reverseCCMask(unsigned CCMask) {
- return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
- (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
- (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
- (CCMask & SystemZ::CCMASK_CMP_UO));
-}
-
// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
@@ -2205,7 +2240,7 @@ static void adjustForFNeg(Comparison &C) {
SDNode *N = *I;
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
- C.CCMask = reverseCCMask(C.CCMask);
+ C.CCMask = SystemZ::reverseCCMask(C.CCMask);
return;
}
}
@@ -2572,7 +2607,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
if (shouldSwapCmpOperands(C)) {
std::swap(C.Op0, C.Op1);
- C.CCMask = reverseCCMask(C.CCMask);
+ C.CCMask = SystemZ::reverseCCMask(C.CCMask);
}
adjustForTestUnderMask(DAG, DL, C);
@@ -3103,7 +3138,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3118,7 +3153,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3136,7 +3171,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
// Add the per-symbol offset.
CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
- SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
DTPOffset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), DTPOffset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3161,7 +3196,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3202,11 +3237,11 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue Result;
if (CP->isMachineConstantPoolEntry())
- Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ Result =
+ DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
else
- Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
+ CP->getOffset());
// Use LARL to load the address of the constant pool entry.
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -3214,6 +3249,8 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -3222,9 +3259,12 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ // Return null if the back chain is not present.
+ bool HasBackChain = MF.getFunction().hasFnAttribute("backchain");
+ if (TFL->usePackedStack(MF) && !HasBackChain)
+ return DAG.getConstant(0, DL, PtrVT);
+
// By definition, the frame address is the address of the back chain.
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
@@ -3355,9 +3395,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
- /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
- /*isTailCall*/false,
- MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+ Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
+ /*isTailCall*/ false, MachinePointerInfo(DstSV),
+ MachinePointerInfo(SrcSV));
}
SDValue SystemZTargetLowering::
@@ -3398,10 +3438,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
-
- // Copy the new stack pointer back.
- Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ SDValue NewSP;
+ if (hasInlineStackProbe(MF)) {
+ NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
+ DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
+ Chain = NewSP.getValue(1);
+ }
+ else {
+ NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
+ // Copy the new stack pointer back.
+ Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ }
// The allocated data lives above the 160 bytes allocated for the standard
// frame, plus any outgoing stack arguments. We don't know how much that
@@ -3995,7 +4042,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
}
MachineMemOperand::Flags
-SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
+SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Because of how we convert atomic_load and atomic_store to normal loads and
// stores in the DAG, we need to ensure that the MMOs are marked volatile
// since DAGCombine hasn't been updated to account for atomic, but non
@@ -4362,7 +4409,7 @@ static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
}
// Bytes is a VPERM-like permute vector, except that -1 is used for
-// undefined bytes. Return true if it can be performed using VSLDI.
+// undefined bytes. Return true if it can be performed using VSLDB.
// When returning true, set StartIndex to the shift amount and OpNo0
// and OpNo1 to the VPERM operands that should be used as the first
// and second shift operand respectively.
@@ -4420,23 +4467,86 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
return Op;
}
+static bool isZeroVector(SDValue N) {
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+ if (N->getOpcode() == ISD::SPLAT_VECTOR)
+ if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
+ return Op->getZExtValue() == 0;
+ return ISD::isBuildVectorAllZeros(N.getNode());
+}
+
+// Return the index of the zero/undef vector, or UINT32_MAX if not found.
+static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
+ for (unsigned I = 0; I < Num ; I++)
+ if (isZeroVector(Ops[I]))
+ return I;
+ return UINT32_MAX;
+}
+
// Bytes is a VPERM-like permute vector, except that -1 is used for
// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
-// VSLDI or VPERM.
+// VSLDB or VPERM.
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
SDValue *Ops,
const SmallVectorImpl<int> &Bytes) {
for (unsigned I = 0; I < 2; ++I)
Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
- // First see whether VSLDI can be used.
+ // First see whether VSLDB can be used.
unsigned StartIndex, OpNo0, OpNo1;
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
Ops[OpNo1],
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
- // Fall back on VPERM. Construct an SDNode for the permute vector.
+ // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
+ // eliminate a zero vector by reusing any zero index in the permute vector.
+ unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
+ if (ZeroVecIdx != UINT32_MAX) {
+ bool MaskFirst = true;
+ int ZeroIdx = -1;
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx && I == 0) {
+ // If the first byte is zero, use mask as first operand.
+ ZeroIdx = 0;
+ break;
+ }
+ if (OpNo != ZeroVecIdx && Byte == 0) {
+ // If mask contains a zero, use it by placing that vector first.
+ ZeroIdx = I + SystemZ::VectorBytes;
+ MaskFirst = false;
+ break;
+ }
+ }
+ if (ZeroIdx != -1) {
+ SDValue IndexNodes[SystemZ::VectorBytes];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx)
+ IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
+ else {
+ unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
+ IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
+ }
+ } else
+ IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+ }
+ SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
+ SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
+ if (MaskFirst)
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
+ Mask);
+ else
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
+ Mask);
+ }
+ }
+
SDValue IndexNodes[SystemZ::VectorBytes];
for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
if (Bytes[I] >= 0)
@@ -4444,16 +4554,20 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
else
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
- return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
+ (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
}
namespace {
// Describes a general N-operand vector shuffle.
struct GeneralShuffle {
- GeneralShuffle(EVT vt) : VT(vt) {}
+ GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
void addUndef();
bool add(SDValue, unsigned);
SDValue getNode(SelectionDAG &, const SDLoc &);
+ void tryPrepareForUnpack();
+ bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
+ SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
// The operands of the shuffle.
SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -4465,6 +4579,9 @@ struct GeneralShuffle {
// The type of the shuffle result.
EVT VT;
+
+ // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
+ unsigned UnpackFromEltSize;
};
}
@@ -4547,6 +4664,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
if (Ops.size() == 0)
return DAG.getUNDEF(VT);
+ // Use a single unpack if possible as the last operation.
+ tryPrepareForUnpack();
+
// Make sure that there are at least two shuffle operands.
if (Ops.size() == 1)
Ops.push_back(DAG.getUNDEF(MVT::v16i8));
@@ -4612,13 +4732,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
// to VPERM.
unsigned OpNo0, OpNo1;
SDValue Op;
- if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+ if (unpackWasPrepared() && Ops[1].isUndef())
+ Op = Ops[0];
+ else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
else
Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+
+ Op = insertUnpackIfPrepared(DAG, DL, Op);
+
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+#ifndef NDEBUG
+static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
+ dbgs() << Msg.c_str() << " { ";
+ for (unsigned i = 0; i < Bytes.size(); i++)
+ dbgs() << Bytes[i] << " ";
+ dbgs() << "}\n";
+}
+#endif
+
+// If the Bytes vector matches an unpack operation, prepare to do the unpack
+// after all else by removing the zero vector and the effect of the unpack on
+// Bytes.
+void GeneralShuffle::tryPrepareForUnpack() {
+ uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
+ if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
+ return;
+
+ // Only do this if removing the zero vector reduces the depth, otherwise
+ // the critical path will increase with the final unpack.
+ if (Ops.size() > 2 &&
+ Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
+ return;
+
+ // Find an unpack that would allow removing the zero vector from Ops.
+ UnpackFromEltSize = 1;
+ for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
+ bool MatchUnpack = true;
+ SmallVector<int, SystemZ::VectorBytes> SrcBytes;
+ for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
+ unsigned ToEltSize = UnpackFromEltSize * 2;
+ bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
+ if (!IsZextByte)
+ SrcBytes.push_back(Bytes[Elt]);
+ if (Bytes[Elt] != -1) {
+ unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
+ if (IsZextByte != (OpNo == ZeroVecOpNo)) {
+ MatchUnpack = false;
+ break;
+ }
+ }
+ }
+ if (MatchUnpack) {
+ if (Ops.size() == 2) {
+ // Don't use unpack if a single source operand needs rearrangement.
+ for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
+ if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
+ UnpackFromEltSize = UINT_MAX;
+ return;
+ }
+ }
+ break;
+ }
+ }
+ if (UnpackFromEltSize > 4)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
+ << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
+ << ".\n";
+ dumpBytes(Bytes, "Original Bytes vector:"););
+
+ // Apply the unpack in reverse to the Bytes array.
+ unsigned B = 0;
+ for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
+ Elt += UnpackFromEltSize;
+ for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
+ Bytes[B] = Bytes[Elt];
+ }
+ while (B < SystemZ::VectorBytes)
+ Bytes[B++] = -1;
+
+ // Remove the zero vector from Ops
+ Ops.erase(&Ops[ZeroVecOpNo]);
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ if (OpNo > ZeroVecOpNo)
+ Bytes[I] -= SystemZ::VectorBytes;
+ }
+
+ LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
+ dbgs() << "\n";);
+}
+
+SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
+ const SDLoc &DL,
+ SDValue Op) {
+ if (!unpackWasPrepared())
+ return Op;
+ unsigned InBits = UnpackFromEltSize * 8;
+ EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
+ SystemZ::VectorBits / InBits);
+ SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
+ unsigned OutBits = InBits * 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
+ SystemZ::VectorBits / OutBits);
+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
+}
+
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
static bool isScalarToVector(SDValue Op) {
for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
@@ -5013,9 +5237,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
}
-SDValue
-SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const {
+SDValue SystemZTargetLowering::
+lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
SDValue PackedOp = Op.getOperand(0);
EVT OutVT = Op.getValueType();
EVT InVT = PackedOp.getValueType();
@@ -5025,11 +5248,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
FromBits *= 2;
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
SystemZ::VectorBits / FromBits);
- PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ PackedOp =
+ DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
} while (FromBits != ToBits);
return PackedOp;
}
+// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
+SDValue SystemZTargetLowering::
+lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
+ SDValue PackedOp = Op.getOperand(0);
+ SDLoc DL(Op);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned OutNumElts = OutVT.getVectorNumElements();
+ unsigned NumInPerOut = InNumElts / OutNumElts;
+
+ SDValue ZeroVec =
+ DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
+
+ SmallVector<int, 16> Mask(InNumElts);
+ unsigned ZeroVecElt = InNumElts;
+ for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
+ unsigned MaskElt = PackedElt * NumInPerOut;
+ unsigned End = MaskElt + NumInPerOut - 1;
+ for (; MaskElt < End; MaskElt++)
+ Mask[MaskElt] = ZeroVecElt++;
+ Mask[MaskElt] = PackedElt;
+ }
+ SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
+}
+
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
unsigned ByScalar) const {
// Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -5195,9 +5446,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+ return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::SHL:
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
case ISD::SRL:
@@ -5315,6 +5566,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
+ OPCODE(PROBED_ALLOCA);
OPCODE(POPCNT);
OPCODE(SMUL_LOHI);
OPCODE(UMUL_LOHI);
@@ -6056,6 +6308,32 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINT_TO_FP(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ if (DCI.Level != BeforeLegalizeTypes)
+ return SDValue();
+ unsigned Opcode = N->getOpcode();
+ EVT OutVT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OutScalarBits = OutVT.getScalarSizeInBits();
+ unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
+
+ // Insert an extension before type-legalization to avoid scalarization, e.g.:
+ // v2f64 = uint_to_fp v2i16
+ // =>
+ // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
+ if (OutVT.isVector() && OutScalarBits > InScalarBits) {
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()),
+ OutVT.getVectorNumElements());
+ unsigned ExtOpcode =
+ (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
+ return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6243,15 +6521,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
return false;
// Compute the effective CC mask for the new branch or select.
- switch (CCMask) {
- case SystemZ::CCMASK_CMP_EQ: break;
- case SystemZ::CCMASK_CMP_NE: break;
- case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
- case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
- case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
- case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
- default: return false;
- }
+ CCMask = SystemZ::reverseCCMask(CCMask);
// Return the updated CCReg link.
CCReg = IPM->getOperand(0);
@@ -6367,6 +6637,34 @@ SDValue SystemZTargetLowering::combineIntDIVREM(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINTRINSIC(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (Id) {
+ // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
+ // or larger is simply a vector load.
+ case Intrinsic::s390_vll:
+ case Intrinsic::s390_vlrl:
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (C->getZExtValue() >= 15)
+ return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
+ N->getOperand(3), MachinePointerInfo());
+ break;
+ // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
+ case Intrinsic::s390_vstl:
+ case Intrinsic::s390_vstrl:
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (C->getZExtValue() >= 15)
+ return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
+ N->getOperand(4), MachinePointerInfo());
+ break;
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
return N->getOperand(0);
@@ -6391,6 +6689,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
@@ -6399,6 +6699,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: return combineIntDIVREM(N, DCI);
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
}
return SDValue();
@@ -6580,7 +6882,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
if (IsLogical) {
- Known = Known.zext(BitWidth, true);
+ Known = Known.zext(BitWidth);
} else
Known = Known.sext(BitWidth);
break;
@@ -6609,7 +6911,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// Known has the width of the source operand(s). Adjust if needed to match
// the passed bitwidth.
if (Known.getBitWidth() != BitWidth)
- Known = Known.zextOrTrunc(BitWidth, false);
+ Known = Known.anyextOrTrunc(BitWidth);
}
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
@@ -6690,38 +6992,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+unsigned
+SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
//===----------------------------------------------------------------------===//
// Custom insertion
//===----------------------------------------------------------------------===//
-// Create a new basic block after MBB.
-static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
- MachineFunction &MF = *MBB->getParent();
- MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
- MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
- return NewMBB;
-}
-
-// Split MBB after MI and return the new block (the one that contains
-// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
-// Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
@@ -6859,8 +7152,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
for (MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
NextMIIt != MBB->end(); ++NextMIIt) {
- if (NextMIIt->definesRegister(SystemZ::CC))
- break;
if (isSelectPseudo(*NextMIIt)) {
assert(NextMIIt->getOperand(3).getImm() == CCValid &&
"Bad CCValid operands since CC was not redefined.");
@@ -6871,6 +7162,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
}
break;
}
+ if (NextMIIt->definesRegister(SystemZ::CC) ||
+ NextMIIt->usesCustomInsertionHook())
+ break;
bool User = false;
for (auto SelMI : Selects)
if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
@@ -6891,8 +7185,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
bool CCKilled =
(LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the last Select instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -6985,8 +7279,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the CondStore instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -7069,8 +7363,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// ...
@@ -7187,10 +7481,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
- MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
+ MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
// StartMBB:
// ...
@@ -7298,9 +7592,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
// StartMBB:
// ...
@@ -7460,7 +7754,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
- splitBlockAfter(MI, MBB) : nullptr);
+ SystemZ::splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
@@ -7484,9 +7778,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register NextCountReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB =
+ (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7602,7 +7897,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// If there's another CLC to go, branch to the end if a difference
// was found.
if (EndMBB && Length > 0) {
- MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
.addMBB(EndMBB);
@@ -7642,8 +7937,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7754,6 +8049,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
return MBB;
}
+MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
+ MachineInstr &MI, MachineBasicBlock *MBB) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI.getDebugLoc();
+ const unsigned ProbeSize = getStackProbeSize(MF);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SizeReg = MI.getOperand(2).getReg();
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
+ MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
+ MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
+
+ MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+
+ Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+
+ // LoopTestMBB
+ // BRC TailTestMBB
+ // # fallthrough to LoopBodyMBB
+ StartMBB->addSuccessor(LoopTestMBB);
+ MBB = LoopTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
+ .addReg(SizeReg)
+ .addMBB(StartMBB)
+ .addReg(IncReg)
+ .addMBB(LoopBodyMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
+ .addMBB(TailTestMBB);
+ MBB->addSuccessor(LoopBodyMBB);
+ MBB->addSuccessor(TailTestMBB);
+
+ // LoopBodyMBB: Allocate and probe by means of a volatile compare.
+ // J LoopTestMBB
+ MBB = LoopBodyMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
+ .setMemRefs(VolLdMMO);
+ BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
+ MBB->addSuccessor(LoopTestMBB);
+
+ // TailTestMBB
+ // BRC DoneMBB
+ // # fallthrough to TailMBB
+ MBB = TailTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(PHIReg)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
+ .addMBB(DoneMBB);
+ MBB->addSuccessor(TailMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ // TailMBB
+ // # fallthrough to DoneMBB
+ MBB = TailMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addReg(PHIReg);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
+ .setMemRefs(VolLdMMO);
+ MBB->addSuccessor(DoneMBB);
+
+ // DoneMBB
+ MBB = DoneMBB;
+ BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(SystemZ::R15D);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
@@ -8014,6 +8400,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+ case SystemZ::PROBED_ALLOCA:
+ return emitProbedAlloca(MI, MBB);
+
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);