summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp120
1 files changed, 99 insertions, 21 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c3ac796a0a442..19fce064783d0 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -82,7 +82,7 @@ public:
void PostprocessISelDAG() override;
private:
- SDValue foldFrameIndex(SDValue N) const;
+ std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
@@ -116,9 +116,11 @@ private:
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
- bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+ bool SelectMUBUFScratchOffen(SDNode *Root,
+ SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ bool SelectMUBUFScratchOffset(SDNode *Root,
+ SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
@@ -1074,13 +1076,33 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
-SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
- if (auto FI = dyn_cast<FrameIndexSDNode>(N))
- return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
- return N;
+static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
+ auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
+ return PSV && PSV->isStack();
+}
+
+std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+ const MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
+ FI->getValueType(0));
+
+ // If we can resolve this to a frame index access, this is relative to the
+ // frame pointer SGPR.
+ return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
+ MVT::i32));
+ }
+
+ // If we don't know this private access is a local stack object, it needs to
+ // be relative to the entry point's scratch wave offset register.
+ return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
+ MVT::i32));
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root,
+ SDValue Addr, SDValue &Rsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &ImmOffset) const {
@@ -1089,7 +1111,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
- SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
unsigned Imm = CAddr->getZExtValue();
@@ -1100,6 +1121,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
VAddr = SDValue(MovHighBits, 0);
+
+ // In a call sequence, stores to the argument stack area are relative to the
+ // stack pointer.
+ const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
+ unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
+ Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
+
+ SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
return true;
}
@@ -1113,19 +1142,20 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
// Offsets in vaddr must be positive.
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
- VAddr = foldFrameIndex(N0);
+ std::tie(VAddr, SOffset) = foldFrameIndex(N0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
- VAddr = foldFrameIndex(Addr);
+ std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root,
+ SDValue Addr,
SDValue &SRsrc,
SDValue &SOffset,
SDValue &Offset) const {
@@ -1138,7 +1168,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
- SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
+
+ const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
+ unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
+ Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
+
+ // FIXME: Get from MachinePointerInfo? We should only be using the frame
+ // offset if we know this is in a call sequence.
+ SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
+
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
return true;
}
@@ -1700,12 +1738,46 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
return true;
}
+static SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+ In = stripBitcast(In);
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue Srl = In.getOperand(0);
+ if (Srl.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ if (ShiftAmt->getZExtValue() == 16) {
+ Out = stripBitcast(Srl.getOperand(0));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+ if (In.getOpcode() == ISD::TRUNCATE) {
+ SDValue Src = In.getOperand(0);
+ if (Src.getValueType().getSizeInBits() == 32)
+ return stripBitcast(Src);
+ }
+
+ return In;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
Src = In;
- // FIXME: Look for on separate components
if (Src.getOpcode() == ISD::FNEG) {
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = Src.getOperand(0);
@@ -1714,19 +1786,28 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
if (Src.getOpcode() == ISD::BUILD_VECTOR) {
unsigned VecMods = Mods;
- SDValue Lo = Src.getOperand(0);
- SDValue Hi = Src.getOperand(1);
+ SDValue Lo = stripBitcast(Src.getOperand(0));
+ SDValue Hi = stripBitcast(Src.getOperand(1));
if (Lo.getOpcode() == ISD::FNEG) {
- Lo = Lo.getOperand(0);
+ Lo = stripBitcast(Lo.getOperand(0));
Mods ^= SISrcMods::NEG;
}
if (Hi.getOpcode() == ISD::FNEG) {
- Hi = Hi.getOperand(0);
+ Hi = stripBitcast(Hi.getOperand(0));
Mods ^= SISrcMods::NEG_HI;
}
+ if (isExtractHiElt(Lo, Lo))
+ Mods |= SISrcMods::OP_SEL_0;
+
+ if (isExtractHiElt(Hi, Hi))
+ Mods |= SISrcMods::OP_SEL_1;
+
+ Lo = stripExtractLoElt(Lo);
+ Hi = stripExtractLoElt(Hi);
+
if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
// Really a scalar input. Just select from the low half of the register to
// avoid packing.
@@ -1740,9 +1821,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
}
// Packed instructions do not have abs modifiers.
-
- // FIXME: Handle abs/neg of individual components.
- // FIXME: Handle swizzling with op_sel
Mods |= SISrcMods::OP_SEL_1;
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);