Vendor import of llvm trunk r321017: - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
commit	044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree	1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/AMDGPU/SIMachineFunctionInfo.h
parent	eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)

vendor/llvm/llvm-trunk-r321017

Notes

Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.h')

-rw-r--r--

lib/Target/AMDGPU/SIMachineFunctionInfo.h

227

1 files changed, 144 insertions, 83 deletions

diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4c7f38a09a48..5dde72910ee3 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h

@@ -1,4 +1,4 @@

-//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//

+//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//

// The LLVM Compiler Infrastructure

@@ -14,23 +14,32 @@

#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H

#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H

+#include "AMDGPUArgumentUsageInfo.h"

#include "AMDGPUMachineFunction.h"

-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIRegisterInfo.h"

+#include "llvm/ADT/ArrayRef.h"

+#include "llvm/ADT/DenseMap.h"

+#include "llvm/ADT/Optional.h"

+#include "llvm/ADT/SmallVector.h"

#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/MC/MCRegisterInfo.h"

#include "llvm/Support/ErrorHandling.h"

#include <array>

#include <cassert>

-#include <map>

#include <utility>

+#include <vector>

namespace llvm {

+class MachineFrameInfo;

+class MachineFunction;

+class TargetRegisterClass;

class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {

public:

- explicit AMDGPUImagePseudoSourceValue() :

- PseudoSourceValue(PseudoSourceValue::TargetCustom) { }

+ explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :

+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }

bool isConstant(const MachineFrameInfo *) const override {

// This should probably be true for most images, but we will start by being

@@ -44,7 +53,7 @@ public:

return false;

}

- bool mayAlias(const MachineFrameInfo*) const override {

+ bool mayAlias(const MachineFrameInfo *) const override {

// FIXME: If we ever change image intrinsics to accept fat pointers, then

// this could be true for some cases.

return false;

@@ -53,8 +62,8 @@ public:

class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {

public:

- explicit AMDGPUBufferPseudoSourceValue() :

- PseudoSourceValue(PseudoSourceValue::TargetCustom) { }

+ explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :

+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }

bool isConstant(const MachineFrameInfo *) const override {

// This should probably be true for most images, but we will start by being

@@ -68,7 +77,7 @@ public:

return false;

}

- bool mayAlias(const MachineFrameInfo*) const override {

+ bool mayAlias(const MachineFrameInfo *) const override {

// FIXME: If we ever change image intrinsics to accept fat pointers, then

// this could be true for some cases.

return false;

@@ -78,86 +87,68 @@ public:

/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which

/// tells the hardware which interpolation parameters to load.

class SIMachineFunctionInfo final : public AMDGPUMachineFunction {

- // FIXME: This should be removed and getPreloadedValue moved here.

- friend class SIRegisterInfo;

- unsigned TIDReg;

+ unsigned TIDReg = AMDGPU::NoRegister;

// Registers that may be reserved for spilling purposes. These may be the same

// as the input registers.

- unsigned ScratchRSrcReg;

- unsigned ScratchWaveOffsetReg;

+ unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;

+ unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;

// This is the current function's incremented size from the kernel's scratch

// wave offset register. For an entry function, this is exactly the same as

// the ScratchWaveOffsetReg.

- unsigned FrameOffsetReg;

+ unsigned FrameOffsetReg = AMDGPU::FP_REG;

// Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.

- unsigned StackPtrOffsetReg;

- // Input registers for non-HSA ABI

- unsigned ImplicitBufferPtrUserSGPR;

- // Input registers setup for the HSA ABI.

- // User SGPRs in allocation order.

- unsigned PrivateSegmentBufferUserSGPR;

- unsigned DispatchPtrUserSGPR;

- unsigned QueuePtrUserSGPR;

- unsigned KernargSegmentPtrUserSGPR;

- unsigned DispatchIDUserSGPR;

- unsigned FlatScratchInitUserSGPR;

- unsigned PrivateSegmentSizeUserSGPR;

- unsigned GridWorkGroupCountXUserSGPR;

- unsigned GridWorkGroupCountYUserSGPR;

- unsigned GridWorkGroupCountZUserSGPR;

- // System SGPRs in allocation order.

- unsigned WorkGroupIDXSystemSGPR;

- unsigned WorkGroupIDYSystemSGPR;

- unsigned WorkGroupIDZSystemSGPR;

- unsigned WorkGroupInfoSystemSGPR;

- unsigned PrivateSegmentWaveByteOffsetSystemSGPR;

+ unsigned StackPtrOffsetReg = AMDGPU::SP_REG;

- // VGPR inputs. These are always v0, v1 and v2 for entry functions.

- unsigned WorkItemIDXVGPR;

- unsigned WorkItemIDYVGPR;

- unsigned WorkItemIDZVGPR;

+ AMDGPUFunctionArgInfo ArgInfo;

// Graphics info.

- unsigned PSInputAddr;

- unsigned PSInputEnable;

+ unsigned PSInputAddr = 0;

+ unsigned PSInputEnable = 0;

- bool ReturnsVoid;

+ /// Number of bytes of arguments this function has on the stack. If the callee

+ /// is expected to restore the argument stack this should be a multiple of 16,

+ /// all usable during a tail call.

+ ///

+ /// The alternative would forbid tail call optimisation in some cases: if we

+ /// want to transfer control from a function with 8-bytes of stack-argument

+ /// space to a function with 16-bytes then misalignment of this value would

+ /// make a stack adjustment necessary, which could not be undone by the

+ /// callee.

+ unsigned BytesInStackArgArea = 0;

+ bool ReturnsVoid = true;

// A pair of default/requested minimum/maximum flat work group sizes.

// Minimum - first, maximum - second.

- std::pair<unsigned, unsigned> FlatWorkGroupSizes;

+ std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};

// A pair of default/requested minimum/maximum number of waves per execution

// unit. Minimum - first, maximum - second.

- std::pair<unsigned, unsigned> WavesPerEU;

+ std::pair<unsigned, unsigned> WavesPerEU = {0, 0};

// Stack object indices for work group IDs.

- std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;

+ std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};

// Stack object indices for work item IDs.

- std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;

+ std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};

AMDGPUBufferPseudoSourceValue BufferPSV;

AMDGPUImagePseudoSourceValue ImagePSV;

private:

- unsigned LDSWaveSpillSize;

- unsigned ScratchOffsetReg;

- unsigned NumUserSGPRs;

- unsigned NumSystemSGPRs;

+ unsigned LDSWaveSpillSize = 0;

+ unsigned NumUserSGPRs = 0;

+ unsigned NumSystemSGPRs = 0;

- bool HasSpilledSGPRs;

- bool HasSpilledVGPRs;

- bool HasNonSpillStackObjects;

+ bool HasSpilledSGPRs = false;

+ bool HasSpilledVGPRs = false;

+ bool HasNonSpillStackObjects = false;

- unsigned NumSpilledSGPRs;

- unsigned NumSpilledVGPRs;

+ unsigned NumSpilledSGPRs = 0;

+ unsigned NumSpilledVGPRs = 0;

// Feature bits required for inputs passed in user SGPRs.

bool PrivateSegmentBuffer : 1;

@@ -186,6 +177,15 @@ private:

// Other shaders indirect 64-bits at sgpr[0:1]

bool ImplicitBufferPtr : 1;

+ // Pointer to where the ABI inserts special kernel arguments separate from the

+ // user arguments. This is an offset from the KernargSegmentPtr.

+ bool ImplicitArgPtr : 1;

+ // The hard-wired high half of the address of the global information table

+ // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since

+ // current hardware only allows a 16 bit value.

+ unsigned GITPtrHigh;

MCPhysReg getNextUserSGPR() const {

assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");

return AMDGPU::SGPR0 + NumUserSGPRs;

@@ -201,24 +201,34 @@ public:

int Lane = -1;

SpilledReg() = default;

- SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }

+ SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}

bool hasLane() { return Lane != -1;}

bool hasReg() { return VGPR != AMDGPU::NoRegister;}

};

+ struct SGPRSpillVGPRCSR {

+ // VGPR used for SGPR spills

+ unsigned VGPR;

+ // If the VGPR is a CSR, the stack slot used to save/restore it in the

+ // prolog/epilog.

+ Optional<int> FI;

+ SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}

+ };

private:

// SGPR->VGPR spilling support.

- typedef std::pair<unsigned, unsigned> SpillRegMask;

+ using SpillRegMask = std::pair<unsigned, unsigned>;

// Track VGPR + wave index for each subregister of the SGPR spilled to

// frameindex key.

DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;

unsigned NumVGPRSpillLanes = 0;

- SmallVector<unsigned, 2> SpillVGPRs;

+ SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;

public:

SIMachineFunctionInfo(const MachineFunction &MF);

ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {

@@ -227,13 +237,25 @@ public:

ArrayRef<SpilledReg>() : makeArrayRef(I->second);

}

+ ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {

+ return SpillVGPRs;

+ }

bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);

void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);

- bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };

- unsigned getTIDReg() const { return TIDReg; };

+ bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }

+ unsigned getTIDReg() const { return TIDReg; }

void setTIDReg(unsigned Reg) { TIDReg = Reg; }

+ unsigned getBytesInStackArgArea() const {

+ return BytesInStackArgArea;

+ }

+ void setBytesInStackArgArea(unsigned Bytes) {

+ BytesInStackArgArea = Bytes;

+ }

// Add user SGPRs.

unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);

unsigned addDispatchPtr(const SIRegisterInfo &TRI);

@@ -245,37 +267,51 @@ public:

// Add system SGPRs.

unsigned addWorkGroupIDX() {

- WorkGroupIDXSystemSGPR = getNextSystemSGPR();

+ ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());

NumSystemSGPRs += 1;

- return WorkGroupIDXSystemSGPR;

+ return ArgInfo.WorkGroupIDX.getRegister();

}

unsigned addWorkGroupIDY() {

- WorkGroupIDYSystemSGPR = getNextSystemSGPR();

+ ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());

NumSystemSGPRs += 1;

- return WorkGroupIDYSystemSGPR;

+ return ArgInfo.WorkGroupIDY.getRegister();

}

unsigned addWorkGroupIDZ() {

- WorkGroupIDZSystemSGPR = getNextSystemSGPR();

+ ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());

NumSystemSGPRs += 1;

- return WorkGroupIDZSystemSGPR;

+ return ArgInfo.WorkGroupIDZ.getRegister();

}

unsigned addWorkGroupInfo() {

- WorkGroupInfoSystemSGPR = getNextSystemSGPR();

+ ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());

NumSystemSGPRs += 1;

- return WorkGroupInfoSystemSGPR;

+ return ArgInfo.WorkGroupInfo.getRegister();

+ }

+ // Add special VGPR inputs

+ void setWorkItemIDX(ArgDescriptor Arg) {

+ ArgInfo.WorkItemIDX = Arg;

+ }

+ void setWorkItemIDY(ArgDescriptor Arg) {

+ ArgInfo.WorkItemIDY = Arg;

+ }

+ void setWorkItemIDZ(ArgDescriptor Arg) {

+ ArgInfo.WorkItemIDZ = Arg;

}

unsigned addPrivateSegmentWaveByteOffset() {

- PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();

+ ArgInfo.PrivateSegmentWaveByteOffset

+ = ArgDescriptor::createRegister(getNextSystemSGPR());

NumSystemSGPRs += 1;

- return PrivateSegmentWaveByteOffsetSystemSGPR;

+ return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();

}

void setPrivateSegmentWaveByteOffset(unsigned Reg) {

- PrivateSegmentWaveByteOffsetSystemSGPR = Reg;

+ ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);

}

bool hasPrivateSegmentBuffer() const {

@@ -346,10 +382,35 @@ public:

return WorkItemIDZ;

}

+ bool hasImplicitArgPtr() const {

+ return ImplicitArgPtr;

+ }

bool hasImplicitBufferPtr() const {

return ImplicitBufferPtr;

}

+ AMDGPUFunctionArgInfo &getArgInfo() {

+ return ArgInfo;

+ }

+ const AMDGPUFunctionArgInfo &getArgInfo() const {

+ return ArgInfo;

+ }

+ std::pair<const ArgDescriptor *, const TargetRegisterClass *>

+ getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {

+ return ArgInfo.getPreloadedValue(Value);

+ }

+ unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {

+ return ArgInfo.getPreloadedValue(Value).first->getRegister();

+ }

+ unsigned getGITPtrHigh() const {

+ return GITPtrHigh;

+ }

unsigned getNumUserSGPRs() const {

return NumUserSGPRs;

}

@@ -359,7 +420,7 @@ public:

}

unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {

- return PrivateSegmentWaveByteOffsetSystemSGPR;

+ return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();

}

/// \brief Returns the physical register reserved for use as the resource

@@ -401,11 +462,11 @@ public:

}

unsigned getQueuePtrUserSGPR() const {

- return QueuePtrUserSGPR;

+ return ArgInfo.QueuePtr.getRegister();

}

unsigned getImplicitBufferPtrUserSGPR() const {

- return ImplicitBufferPtrUserSGPR;

+ return ArgInfo.ImplicitBufferPtr.getRegister();

}

bool hasSpilledSGPRs() const {

@@ -537,13 +598,13 @@ public:

switch (Dim) {

case 0:

assert(hasWorkGroupIDX());

- return WorkGroupIDXSystemSGPR;

+ return ArgInfo.WorkGroupIDX.getRegister();

case 1:

assert(hasWorkGroupIDY());

- return WorkGroupIDYSystemSGPR;

+ return ArgInfo.WorkGroupIDY.getRegister();

case 2:

assert(hasWorkGroupIDZ());

- return WorkGroupIDZSystemSGPR;

+ return ArgInfo.WorkGroupIDZ.getRegister();

}

llvm_unreachable("unexpected dimension");

}