diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 142 | 
1 files changed, 77 insertions, 65 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ef0186f7d57f..cf1629fda0af 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -236,23 +236,29 @@ template <> struct MappingTraits<SIArgumentInfo> {  struct SIMode {    bool IEEE = true;    bool DX10Clamp = true; -  bool FP32Denormals = true; -  bool FP64FP16Denormals = true; +  bool FP32InputDenormals = true; +  bool FP32OutputDenormals = true; +  bool FP64FP16InputDenormals = true; +  bool FP64FP16OutputDenormals = true;    SIMode() = default;    SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {      IEEE = Mode.IEEE;      DX10Clamp = Mode.DX10Clamp; -    FP32Denormals = Mode.FP32Denormals; -    FP64FP16Denormals = Mode.FP64FP16Denormals; +    FP32InputDenormals = Mode.FP32InputDenormals; +    FP32OutputDenormals = Mode.FP32OutputDenormals; +    FP64FP16InputDenormals = Mode.FP64FP16InputDenormals; +    FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;    }    bool operator ==(const SIMode Other) const {      return IEEE == Other.IEEE &&             DX10Clamp == Other.DX10Clamp && -           FP32Denormals == Other.FP32Denormals && -           FP64FP16Denormals == Other.FP64FP16Denormals; +           FP32InputDenormals == Other.FP32InputDenormals && +           FP32OutputDenormals == Other.FP32OutputDenormals && +           FP64FP16InputDenormals == Other.FP64FP16InputDenormals && +           FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;    }  }; @@ -260,8 +266,10 @@ template <> struct MappingTraits<SIMode> {    static void mapping(IO &YamlIO, SIMode &Mode) {      YamlIO.mapOptional("ieee", Mode.IEEE, true);      YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); -    YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true); -    YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true); +    YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true); +    YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true); +    YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true); +    YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);    }  }; @@ -276,7 +284,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {    uint32_t HighBitsOf32BitAddress = 0;    StringValue ScratchRSrcReg = "$private_rsrc_reg"; -  StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";    StringValue FrameOffsetReg = "$fp_reg";    StringValue StackPtrOffsetReg = "$sp_reg"; @@ -303,8 +310,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {      YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);      YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,                         StringValue("$private_rsrc_reg")); -    YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg, -                       StringValue("$scratch_wave_offset_reg"));      YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,                         StringValue("$fp_reg"));      YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, @@ -323,20 +328,20 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {  class SIMachineFunctionInfo final : public AMDGPUMachineFunction {    friend class GCNTargetMachine; -  unsigned TIDReg = AMDGPU::NoRegister; +  Register TIDReg = AMDGPU::NoRegister;    // Registers that may be reserved for spilling purposes. These may be the same    // as the input registers. -  unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; -  unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; +  Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; -  // This is the current function's incremented size from the kernel's scratch -  // wave offset register. For an entry function, this is exactly the same as -  // the ScratchWaveOffsetReg. -  unsigned FrameOffsetReg = AMDGPU::FP_REG; +  // This is the the unswizzled offset from the current dispatch's scratch wave +  // base to the beginning of the current function's frame. +  Register FrameOffsetReg = AMDGPU::FP_REG; -  // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. -  unsigned StackPtrOffsetReg = AMDGPU::SP_REG; +  // This is an ABI register used in the non-entry calling convention to +  // communicate the unswizzled offset from the current dispatch's scratch wave +  // base to the beginning of the new function's frame. +  Register StackPtrOffsetReg = AMDGPU::SP_REG;    AMDGPUFunctionArgInfo ArgInfo; @@ -429,11 +434,11 @@ private:  public:    struct SpilledReg { -    unsigned VGPR = 0; +    Register VGPR;      int Lane = -1;      SpilledReg() = default; -    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} +    SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}      bool hasLane() { return Lane != -1;}      bool hasReg() { return VGPR != 0;} @@ -441,13 +446,13 @@ public:    struct SGPRSpillVGPRCSR {      // VGPR used for SGPR spills -    unsigned VGPR; +    Register VGPR;      // If the VGPR is a CSR, the stack slot used to save/restore it in the      // prolog/epilog.      Optional<int> FI; -    SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} +    SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}    };    struct VGPRSpillToAGPR { @@ -457,12 +462,9 @@ public:    SparseBitVector<> WWMReservedRegs; -  void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } +  void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }  private: -  // SGPR->VGPR spilling support. -  using SpillRegMask = std::pair<unsigned, unsigned>; -    // Track VGPR + wave index for each subregister of the SGPR spilled to    // frameindex key.    DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; @@ -480,9 +482,17 @@ private:  public: // FIXME    /// If this is set, an SGPR used for save/restore of the register used for the    /// frame pointer. -  unsigned SGPRForFPSaveRestoreCopy = 0; +  Register SGPRForFPSaveRestoreCopy;    Optional<int> FramePointerSaveIndex; +  /// If this is set, an SGPR used for save/restore of the register used for the +  /// base pointer. +  Register SGPRForBPSaveRestoreCopy; +  Optional<int> BasePointerSaveIndex; + +  Register VGPRReservedForSGPRSpill; +  bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); +  public:    SIMachineFunctionInfo(const MachineFunction &MF); @@ -498,6 +508,14 @@ public:      return SpillVGPRs;    } +  void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) { +    SpillVGPRs[Index].VGPR = NewVGPR; +    SpillVGPRs[Index].FI = newFI; +    VGPRReservedForSGPRSpill = NewVGPR; +  } + +  bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF); +    ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {      return SpillAGPR;    } @@ -515,12 +533,13 @@ public:    bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,                                   unsigned NumLane) const;    bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); +  bool reserveVGPRforSGPRSpills(MachineFunction &MF);    bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);    void removeDeadFrameIndices(MachineFrameInfo &MFI);    bool hasCalculatedTID() const { return TIDReg != 0; }; -  unsigned getTIDReg() const { return TIDReg; }; -  void setTIDReg(unsigned Reg) { TIDReg = Reg; } +  Register getTIDReg() const { return TIDReg; }; +  void setTIDReg(Register Reg) { TIDReg = Reg; }    unsigned getBytesInStackArgArea() const {      return BytesInStackArgArea; @@ -531,34 +550,34 @@ public:    }    // Add user SGPRs. -  unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); -  unsigned addDispatchPtr(const SIRegisterInfo &TRI); -  unsigned addQueuePtr(const SIRegisterInfo &TRI); -  unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); -  unsigned addDispatchID(const SIRegisterInfo &TRI); -  unsigned addFlatScratchInit(const SIRegisterInfo &TRI); -  unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); +  Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI); +  Register addDispatchPtr(const SIRegisterInfo &TRI); +  Register addQueuePtr(const SIRegisterInfo &TRI); +  Register addKernargSegmentPtr(const SIRegisterInfo &TRI); +  Register addDispatchID(const SIRegisterInfo &TRI); +  Register addFlatScratchInit(const SIRegisterInfo &TRI); +  Register addImplicitBufferPtr(const SIRegisterInfo &TRI);    // Add system SGPRs. -  unsigned addWorkGroupIDX() { +  Register addWorkGroupIDX() {      ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());      NumSystemSGPRs += 1;      return ArgInfo.WorkGroupIDX.getRegister();    } -  unsigned addWorkGroupIDY() { +  Register addWorkGroupIDY() {      ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());      NumSystemSGPRs += 1;      return ArgInfo.WorkGroupIDY.getRegister();    } -  unsigned addWorkGroupIDZ() { +  Register addWorkGroupIDZ() {      ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());      NumSystemSGPRs += 1;      return ArgInfo.WorkGroupIDZ.getRegister();    } -  unsigned addWorkGroupInfo() { +  Register addWorkGroupInfo() {      ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());      NumSystemSGPRs += 1;      return ArgInfo.WorkGroupInfo.getRegister(); @@ -577,14 +596,14 @@ public:      ArgInfo.WorkItemIDZ = Arg;    } -  unsigned addPrivateSegmentWaveByteOffset() { +  Register addPrivateSegmentWaveByteOffset() {      ArgInfo.PrivateSegmentWaveByteOffset        = ArgDescriptor::createRegister(getNextSystemSGPR());      NumSystemSGPRs += 1;      return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();    } -  void setPrivateSegmentWaveByteOffset(unsigned Reg) { +  void setPrivateSegmentWaveByteOffset(Register Reg) {      ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);    } @@ -660,13 +679,13 @@ public:      return ArgInfo;    } -  std::pair<const ArgDescriptor *, const TargetRegisterClass *> +  std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>    getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {      return ArgInfo.getPreloadedValue(Value);    }    Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { -    auto Arg = ArgInfo.getPreloadedValue(Value).first; +    auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));      return Arg ? Arg->getRegister() : Register();    } @@ -674,6 +693,8 @@ public:      return GITPtrHigh;    } +  Register getGITPtrLoReg(const MachineFunction &MF) const; +    uint32_t get32BitAddressHighBits() const {      return HighBitsOf32BitAddress;    } @@ -690,35 +711,31 @@ public:      return NumUserSGPRs + NumSystemSGPRs;    } -  unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { +  Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {      return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();    }    /// Returns the physical register reserved for use as the resource    /// descriptor for scratch accesses. -  unsigned getScratchRSrcReg() const { +  Register getScratchRSrcReg() const {      return ScratchRSrcReg;    } -  void setScratchRSrcReg(unsigned Reg) { +  void setScratchRSrcReg(Register Reg) {      assert(Reg != 0 && "Should never be unset");      ScratchRSrcReg = Reg;    } -  unsigned getScratchWaveOffsetReg() const { -    return ScratchWaveOffsetReg; -  } - -  unsigned getFrameOffsetReg() const { +  Register getFrameOffsetReg() const {      return FrameOffsetReg;    } -  void setFrameOffsetReg(unsigned Reg) { +  void setFrameOffsetReg(Register Reg) {      assert(Reg != 0 && "Should never be unset");      FrameOffsetReg = Reg;    } -  void setStackPtrOffsetReg(unsigned Reg) { +  void setStackPtrOffsetReg(Register Reg) {      assert(Reg != 0 && "Should never be unset");      StackPtrOffsetReg = Reg;    } @@ -727,20 +744,15 @@ public:    // NoRegister. This is mostly a workaround for MIR tests where state that    // can't be directly computed from the function is not preserved in serialized    // MIR. -  unsigned getStackPtrOffsetReg() const { +  Register getStackPtrOffsetReg() const {      return StackPtrOffsetReg;    } -  void setScratchWaveOffsetReg(unsigned Reg) { -    assert(Reg != 0 && "Should never be unset"); -    ScratchWaveOffsetReg = Reg; -  } - -  unsigned getQueuePtrUserSGPR() const { +  Register getQueuePtrUserSGPR() const {      return ArgInfo.QueuePtr.getRegister();    } -  unsigned getImplicitBufferPtrUserSGPR() const { +  Register getImplicitBufferPtrUserSGPR() const {      return ArgInfo.ImplicitBufferPtr.getRegister();    } @@ -853,7 +865,7 @@ public:    }    /// \returns SGPR used for \p Dim's work group ID. -  unsigned getWorkGroupIDSGPR(unsigned Dim) const { +  Register getWorkGroupIDSGPR(unsigned Dim) const {      switch (Dim) {      case 0:        assert(hasWorkGroupIDX());  | 
