diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td new file mode 100644 index 000000000000..0b6305f95a0a --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td @@ -0,0 +1,106 @@ +//===-- PPCRegisterInfoMMA.td - The PowerPC Register File --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register info for registers related to MMA. These are the ACC and UACC +// registers. +// +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_pair0 : SubRegIndex<256>; +def sub_pair1 : SubRegIndex<256, 256>; +} + +// ACC - One of the 8 512-bit VSX accumulators. +class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// UACC - One of the 8 512-bit VSX accumulators prior to being primed. +// Without using this register class, the register allocator has no way to +// differentiate a primed accumulator from an unprimed accumulator. +// This may result in invalid copies between primed and unprimed accumulators. +class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// SPE Accumulator for multiply-accumulate SPE operations. Never directly +// accessed, so there's no real encoding for it. +def SPEACC: DwarfRegNum<[99, 111]>; + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, + ACC4, ACC5, ACC6, ACC7)> { + // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers + // the highest possible priority in this range to force the register allocator + // to assign these registers first. This is done because the ACC registers + // must represent 4 advacent vector registers. For example ACC1 must be + // VS4 - VS7. The value here must be at least 32 as we want to allocate + // these registers even before we allocate global ranges. + let AllocationPriority = 63; + let Size = 512; +} + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def UACCRC : RegisterClass<"PPC", [v512i1], 128, + (add UACC0, UACC1, UACC2, UACC3, + UACC4, UACC5, UACC6, UACC7)> { + // The AllocationPriority for the UACC registers is still high and must be at + // least 32 as we want to allocate these registers before we allocate other + // global ranges. The value must be less than the AllocationPriority of the + // ACC registers. + let AllocationPriority = 36; + let Size = 512; +} + +// FIXME: This allocation order may increase stack frame size when allocating +// non-volatile registers. +// +// Placing Altivec registers first and allocate the rest as underlying VSX +// ones, to reduce interference with accumulator registers (lower 32 VSRs). +// This reduces copies when loading for accumulators, which is common use for +// paired VSX registers. +def VSRpRC : + RegisterClass<"PPC", [v256i1], 128, + (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21, + VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30, + VSRp29, VSRp28, VSRp27, VSRp26, + (sequence "VSRp%u", 0, 6), + (sequence "VSRp%u", 15, 7))> { + // Give the VSRp registers a non-zero AllocationPriority. The value is less + // than 32 as these registers should not always be allocated before global + // ranges and the value should be less than the AllocationPriority - 32 for + // the UACC registers. Even global VSRp registers should be allocated after + // the UACC registers have been chosen. + let AllocationPriority = 2; + let Size = 256; +} + + + + |
