summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp110
1 files changed, 40 insertions, 70 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 4a9ea69d101c..f38e93109967 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -17,32 +17,29 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-// Decide whether it is best to use a loop or straight-line code for
-// a block operation of Size bytes with source address Src and destination
-// address Dest. Sequence is the opcode to use for straight-line code
-// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
-// Return the chain for the completed operation.
-static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
- unsigned Loop, SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size) {
- EVT PtrVT = Src.getValueType();
- // The heuristic we use is to prefer loops for anything that would
- // require 7 or more MVCs. With these kinds of sizes there isn't
- // much to choose between straight-line code and looping code,
- // since the time will be dominated by the MVCs themselves.
- // However, the loop has 4 or 5 instructions (depending on whether
- // the base addresses can be proved equal), so there doesn't seem
- // much point using a loop for 5 * 256 bytes or fewer. Anything in
- // the range (5 * 256, 6 * 256) will need another instruction after
- // the loop, so it doesn't seem worth using a loop then either.
- // The next value up, 6 * 256, can be implemented in the same
- // number of straight-line MVCs as 6 * 256 - 1.
- if (Size > 6 * 256)
- return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, DL, PtrVT),
- DAG.getConstant(Size / 256, DL, PtrVT));
- return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, DL, PtrVT));
+static SDVTList getMemMemVTs(unsigned Op, SelectionDAG &DAG) {
+ return Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other)
+ : DAG.getVTList(MVT::Other);
+}
+
+// Emit a mem-mem operation after subtracting one from size, which will be
+// added back during pseudo expansion. As the Reg case emitted here may be
+// converted by DAGCombiner into having an Imm length, they are both emitted
+// the same way.
+static SDValue emitMemMemImm(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size) {
+ return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src,
+ DAG.getConstant(Size - 1, DL, Src.getValueType()));
+}
+
+static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size) {
+ SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
+ DAG.getZExtOrTrunc(Size, DL, MVT::i64),
+ DAG.getConstant(-1, DL, MVT::i64));
+ return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src, LenMinus1);
}
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -53,9 +50,10 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
- return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
- Chain, Dst, Src, CSize->getZExtValue());
- return SDValue();
+ return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, Dst, Src,
+ CSize->getZExtValue());
+
+ return emitMemMemReg(DAG, DL, SystemZISD::MVC, Chain, Dst, Src, Size);
}
// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
@@ -127,52 +125,23 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
// Handle the special case of a memset of 0, which can use XC.
if (CByte && CByte->getZExtValue() == 0)
- return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
- Chain, Dst, Dst, Bytes);
+ return emitMemMemImm(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Bytes);
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
- return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
- Chain, DstPlus1, Dst, Bytes - 1);
+ return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, DstPlus1, Dst,
+ Bytes - 1);
}
// Variable length
- if (CByte && CByte->getZExtValue() == 0) {
+ if (CByte && CByte->getZExtValue() == 0)
// Handle the special case of a variable length memset of 0 with XC.
- SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
- DAG.getZExtOrTrunc(Size, DL, MVT::i64),
- DAG.getConstant(-1, DL, MVT::i64));
- SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
- DAG.getConstant(8, DL, MVT::i64));
- return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
- LenMinus1, TripC);
- }
- return SDValue();
-}
+ return emitMemMemReg(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Size);
-// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
-// deciding whether to use a loop or straight-line code.
-static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
- SDValue Src1, SDValue Src2, uint64_t Size) {
- SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
- EVT PtrVT = Src1.getValueType();
- // A two-CLC sequence is a clear win over a loop, not least because it
- // needs only one branch. A three-CLC sequence needs the same number
- // of branches as a loop (i.e. 2), but is shorter. That brings us to
- // lengths greater than 768 bytes. It seems relatively likely that
- // a difference will be found within the first 768 bytes, so we just
- // optimize for the smallest number of branch instructions, in order
- // to avoid polluting the prediction buffer too much. A loop only ever
- // needs 2 branches, whereas a straight-line sequence would need 3 or more.
- if (Size > 3 * 256)
- return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, DL, PtrVT),
- DAG.getConstant(Size / 256, DL, PtrVT));
- return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, DL, PtrVT));
+ return SDValue();
}
// Convert the current CC value into an integer that is 0 if CC == 0,
@@ -193,15 +162,16 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
+ SDValue CCReg;
+ // Swap operands to invert CC == 1 vs. CC == 2 cases.
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
- // Swap operands to invert CC == 1 vs. CC == 2 cases.
- SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
- Chain = CCReg.getValue(1);
- return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
- }
- return std::make_pair(SDValue(), SDValue());
+ CCReg = emitMemMemImm(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Bytes);
+ } else
+ CCReg = emitMemMemReg(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Size);
+ Chain = CCReg.getValue(1);
+ return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(