diff options
Diffstat (limited to 'ELF/Arch/PPC64.cpp')
-rw-r--r-- | ELF/Arch/PPC64.cpp | 473 |
1 files changed, 436 insertions, 37 deletions
diff --git a/ELF/Arch/PPC64.cpp b/ELF/Arch/PPC64.cpp index fa3bf6c62a0d..8a320c9a4e9e 100644 --- a/ELF/Arch/PPC64.cpp +++ b/ELF/Arch/PPC64.cpp @@ -23,12 +23,49 @@ using namespace lld::elf; static uint64_t PPC64TocOffset = 0x8000; static uint64_t DynamicThreadPointerOffset = 0x8000; +// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform +// instructions that can be used as part of the initial exec TLS sequence. +enum XFormOpcd { + LBZX = 87, + LHZX = 279, + LWZX = 23, + LDX = 21, + STBX = 215, + STHX = 407, + STWX = 151, + STDX = 149, + ADD = 266, +}; + +enum DFormOpcd { + LBZ = 34, + LBZU = 35, + LHZ = 40, + LHZU = 41, + LHAU = 43, + LWZ = 32, + LWZU = 33, + LFSU = 49, + LD = 58, + LFDU = 51, + STB = 38, + STBU = 39, + STH = 44, + STHU = 45, + STW = 36, + STWU = 37, + STFSU = 53, + STFDU = 55, + STD = 62, + ADDI = 14 +}; + uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a // .got when we see a relocation that uses it, so for us the start is always // the .got. - uint64_t TocVA = InX::Got->getVA(); + uint64_t TocVA = In.Got->getVA(); // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 // thus permitting a full 64 Kbytes segment. Note that the glibc startup @@ -37,6 +74,31 @@ uint64_t elf::getPPC64TocBase() { return TocVA + PPC64TocOffset; } +unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) { + // The offset is encoded into the 3 most significant bits of the st_other + // field, with some special values described in section 3.4.1 of the ABI: + // 0 --> Zero offset between the GEP and LEP, and the function does NOT use + // the TOC pointer (r2). r2 will hold the same value on returning from + // the function as it did on entering the function. + // 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a + // caller-saved register for all callers. + // 2-6 --> The binary logarithm of the offset eg: + // 2 --> 2^2 = 4 bytes --> 1 instruction. + // 6 --> 2^6 = 64 bytes --> 16 instructions. + // 7 --> Reserved. + uint8_t GepToLep = (StOther >> 5) & 7; + if (GepToLep < 2) + return 0; + + // The value encoded in the st_other bits is the + // log-base-2(offset). + if (GepToLep < 7) + return 1 << GepToLep; + + error("reserved value of 7 in the 3 most-significant-bits of st_other"); + return 0; +} + namespace { class PPC64 final : public TargetInfo { public: @@ -51,11 +113,16 @@ public: void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; + void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override; + + bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const override; }; } // namespace @@ -71,8 +138,64 @@ static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; } static uint16_t highest(uint64_t V) { return V >> 48; } static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; } +// Extracts the 'PO' field of an instruction encoding. +static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); } + +static bool isDQFormInstruction(uint32_t Encoding) { + switch (getPrimaryOpCode(Encoding)) { + default: + return false; + case 56: + // The only instruction with a primary opcode of 56 is `lq`. + return true; + case 61: + // There are both DS and DQ instruction forms with this primary opcode. + // Namely `lxv` and `stxv` are the DQ-forms that use it. + // The DS 'XO' bits being set to 01 is restricted to DQ form. + return (Encoding & 3) == 0x1; + } +} + +static bool isInstructionUpdateForm(uint32_t Encoding) { + switch (getPrimaryOpCode(Encoding)) { + default: + return false; + case LBZU: + case LHAU: + case LHZU: + case LWZU: + case LFSU: + case LFDU: + case STBU: + case STHU: + case STWU: + case STFSU: + case STFDU: + return true; + // LWA has the same opcode as LD, and the DS bits is what differentiates + // between LD/LDU/LWA + case LD: + case STD: + return (Encoding & 3) == 1; + } +} + +// There are a number of places when we either want to read or write an +// instruction when handling a half16 relocation type. On big-endian the buffer +// pointer is pointing into the middle of the word we want to extract, and on +// little-endian it is pointing to the start of the word. These 2 helpers are to +// simplify reading and writing in that context. +static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) { + write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr); +} + +static uint32_t readInstrFromHalf16(const uint8_t *Loc) { + return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0)); +} + PPC64::PPC64() { GotRel = R_PPC64_GLOB_DAT; + NoneRel = R_PPC64_NONE; PltRel = R_PPC64_JMP_SLOT; RelativeRel = R_PPC64_RELATIVE; IRelativeRel = R_PPC64_IRELATIVE; @@ -85,14 +208,14 @@ PPC64::PPC64() { GotPltHeaderEntriesNum = 2; PltHeaderSize = 60; NeedsThunks = true; - TcbSize = 8; - TlsTpOffset = 0x7000; TlsModuleIndexRel = R_PPC64_DTPMOD64; TlsOffsetRel = R_PPC64_DTPREL64; TlsGotRel = R_PPC64_TPREL64; + NeedsMoreStackNonSplit = false; + // We need 64K pages (at least under glibc/Linux, the loader won't // set different permissions on a finer granularity than that). DefaultMaxPageSize = 65536; @@ -107,8 +230,7 @@ PPC64::PPC64() { // use 0x10000000 as the starting address. DefaultImageBase = 0x10000000; - TrapInstr = - (Config->IsLE == sys::IsLittleEndianHost) ? 0x7fe00008 : 0x0800e07f; + write32(TrapInstr.data(), 0x7fe00008); } static uint32_t getEFlags(InputFile *File) { @@ -146,27 +268,29 @@ void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, x@tprel@l - uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; - switch (Type) { case R_PPC64_GOT_TLSGD16_HA: - write32(Loc - EndianOffset, 0x60000000); // nop + writeInstrFromHalf16(Loc, 0x60000000); // nop break; + case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: - write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13 + writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13 relocateOne(Loc, R_PPC64_TPREL16_HA, Val); break; case R_PPC64_TLSGD: write32(Loc, 0x60000000); // nop write32(Loc + 4, 0x38630000); // addi r3, r3 - relocateOne(Loc + 4 + EndianOffset, R_PPC64_TPREL16_LO, Val); + // Since we are relocating a half16 type relocation and Loc + 4 points to + // the start of an instruction we need to advance the buffer by an extra + // 2 bytes on BE. + relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0), + R_PPC64_TPREL16_LO, Val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } - void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement. // The local dynamic code sequence for a global `x` will look like: @@ -183,13 +307,12 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, 4096 - uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; switch (Type) { case R_PPC64_GOT_TLSLD16_HA: - write32(Loc - EndianOffset, 0x60000000); // nop + writeInstrFromHalf16(Loc, 0x60000000); // nop break; case R_PPC64_GOT_TLSLD16_LO: - write32(Loc - EndianOffset, 0x3c6d0000); // addis r3, r13, 0 + writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0 break; case R_PPC64_TLSLD: write32(Loc, 0x60000000); // nop @@ -212,9 +335,90 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { } } +static unsigned getDFormOp(unsigned SecondaryOp) { + switch (SecondaryOp) { + case LBZX: + return LBZ; + case LHZX: + return LHZ; + case LWZX: + return LWZ; + case LDX: + return LD; + case STBX: + return STB; + case STHX: + return STH; + case STWX: + return STW; + case STDX: + return STD; + case ADD: + return ADDI; + default: + error("unrecognized instruction for IE to LE R_PPC64_TLS"); + return 0; + } +} + +void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { + // The initial exec code sequence for a global `x` will look like: + // Instruction Relocation Symbol + // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x + // ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x + // add r9, r9, x@tls R_PPC64_TLS x + + // Relaxing to local exec entails converting: + // addis r9, r2, x@got@tprel@ha into nop + // ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha + // add r9, r9, x@tls into addi r9, r9, x@tprel@l + + // x@tls R_PPC64_TLS is a relocation which does not compute anything, + // it is replaced with r13 (thread pointer). + + // The add instruction in the initial exec sequence has multiple variations + // that need to be handled. If we are building an address it will use an add + // instruction, if we are accessing memory it will use any of the X-form + // indexed load or store instructions. + + unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0; + switch (Type) { + case R_PPC64_GOT_TPREL16_HA: + write32(Loc - Offset, 0x60000000); // nop + break; + case R_PPC64_GOT_TPREL16_LO_DS: + case R_PPC64_GOT_TPREL16_DS: { + uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10 + write32(Loc - Offset, 0x3C0D0000 | RegNo); // addis RegNo, r13 + relocateOne(Loc, R_PPC64_TPREL16_HA, Val); + break; + } + case R_PPC64_TLS: { + uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc)); + if (PrimaryOp != 31) + error("unrecognized instruction for IE to LE R_PPC64_TLS"); + uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30 + uint32_t DFormOp = getDFormOp(SecondaryOp); + write32(Loc, ((DFormOp << 26) | (read32(Loc) & 0x03FFFFFF))); + relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val); + break; + } + default: + llvm_unreachable("unknown relocation for IE to LE"); + break; + } +} + RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, const uint8_t *Loc) const { switch (Type) { + case R_PPC64_GOT16: + case R_PPC64_GOT16_DS: + case R_PPC64_GOT16_HA: + case R_PPC64_GOT16_HI: + case R_PPC64_GOT16_LO: + case R_PPC64_GOT16_LO_DS: + return R_GOT_OFF; case R_PPC64_TOC16: case R_PPC64_TOC16_DS: case R_PPC64_TOC16_HA: @@ -224,6 +428,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, return R_GOTREL; case R_PPC64_TOC: return R_PPC_TOC; + case R_PPC64_REL14: case R_PPC64_REL24: return R_PPC_CALL_PLT; case R_PPC64_REL16_LO: @@ -279,7 +484,7 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S, case R_PPC64_TLSLD: return R_TLSLD_HINT; case R_PPC64_TLS: - return R_HINT; + return R_TLSIE_HINT; default: return R_ABS; } @@ -308,16 +513,16 @@ void PPC64::writePltHeader(uint8_t *Buf) const { // The 'bcl' instruction will set the link register to the address of the // following instruction ('mflr r11'). Here we store the offset from that // instruction to the first entry in the GotPlt section. - int64_t GotPltOffset = InX::GotPlt->getVA() - (InX::Plt->getVA() + 8); + int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8); write64(Buf + 52, GotPltOffset); } void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { - int32_t Offset = PltHeaderSize + Index * PltEntrySize; - // bl __glink_PLTresolve - write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc)); + int32_t Offset = PltHeaderSize + Index * PltEntrySize; + // bl __glink_PLTresolve + write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc)); } static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) { @@ -328,30 +533,36 @@ static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) { switch (Type) { // TOC biased relocation. + case R_PPC64_GOT16: case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSLD16: case R_PPC64_TOC16: return {R_PPC64_ADDR16, TocBiasedVal}; + case R_PPC64_GOT16_DS: case R_PPC64_TOC16_DS: case R_PPC64_GOT_TPREL16_DS: case R_PPC64_GOT_DTPREL16_DS: return {R_PPC64_ADDR16_DS, TocBiasedVal}; + case R_PPC64_GOT16_HA: case R_PPC64_GOT_TLSGD16_HA: case R_PPC64_GOT_TLSLD16_HA: case R_PPC64_GOT_TPREL16_HA: case R_PPC64_GOT_DTPREL16_HA: case R_PPC64_TOC16_HA: return {R_PPC64_ADDR16_HA, TocBiasedVal}; + case R_PPC64_GOT16_HI: case R_PPC64_GOT_TLSGD16_HI: case R_PPC64_GOT_TLSLD16_HI: case R_PPC64_GOT_TPREL16_HI: case R_PPC64_GOT_DTPREL16_HI: case R_PPC64_TOC16_HI: return {R_PPC64_ADDR16_HI, TocBiasedVal}; + case R_PPC64_GOT16_LO: case R_PPC64_GOT_TLSGD16_LO: case R_PPC64_GOT_TLSLD16_LO: case R_PPC64_TOC16_LO: return {R_PPC64_ADDR16_LO, TocBiasedVal}; + case R_PPC64_GOT16_LO_DS: case R_PPC64_TOC16_LO_DS: case R_PPC64_GOT_TPREL16_LO_DS: case R_PPC64_GOT_DTPREL16_LO_DS: @@ -386,9 +597,27 @@ static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) { } } +static bool isTocOptType(RelType Type) { + switch (Type) { + case R_PPC64_GOT16_HA: + case R_PPC64_GOT16_LO_DS: + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO_DS: + case R_PPC64_TOC16_LO: + return true; + default: + return false; + } +} + void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { - // For a TOC-relative relocation, proceed in terms of the corresponding - // ADDR16 relocation type. + // We need to save the original relocation type to use in diagnostics, and + // use the original type to determine if we should toc-optimize the + // instructions being relocated. + RelType OriginalType = Type; + bool ShouldTocOptimize = isTocOptType(Type); + // For dynamic thread pointer relative, toc-relative, and got-indirect + // relocations, proceed in terms of the corresponding ADDR16 relocation type. std::tie(Type, Val) = toAddr16Rel(Type, Val); switch (Type) { @@ -401,18 +630,25 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { } case R_PPC64_ADDR16: case R_PPC64_TPREL16: - checkInt(Loc, Val, 16, Type); + checkInt(Loc, Val, 16, OriginalType); write16(Loc, Val); break; case R_PPC64_ADDR16_DS: - case R_PPC64_TPREL16_DS: - checkInt(Loc, Val, 16, Type); - write16(Loc, (read16(Loc) & 3) | (Val & ~3)); - break; + case R_PPC64_TPREL16_DS: { + checkInt(Loc, Val, 16, OriginalType); + // DQ-form instructions use bits 28-31 as part of the instruction encoding + // DS-form instructions only use bits 30-31. + uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3; + checkAlignment(Loc, lo(Val), Mask + 1, OriginalType); + write16(Loc, (read16(Loc) & Mask) | lo(Val)); + } break; case R_PPC64_ADDR16_HA: case R_PPC64_REL16_HA: case R_PPC64_TPREL16_HA: - write16(Loc, ha(Val)); + if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) + writeInstrFromHalf16(Loc, 0x60000000); + else + write16(Loc, ha(Val)); break; case R_PPC64_ADDR16_HI: case R_PPC64_REL16_HI: @@ -438,12 +674,40 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { case R_PPC64_ADDR16_LO: case R_PPC64_REL16_LO: case R_PPC64_TPREL16_LO: + // When the high-adjusted part of a toc relocation evalutes to 0, it is + // changed into a nop. The lo part then needs to be updated to use the + // toc-pointer register r2, as the base register. + if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) { + uint32_t Instr = readInstrFromHalf16(Loc); + if (isInstructionUpdateForm(Instr)) + error(getErrorLocation(Loc) + + "can't toc-optimize an update instruction: 0x" + + utohexstr(Instr)); + Instr = (Instr & 0xFFE00000) | 0x00020000; + writeInstrFromHalf16(Loc, Instr); + } write16(Loc, lo(Val)); break; case R_PPC64_ADDR16_LO_DS: - case R_PPC64_TPREL16_LO_DS: - write16(Loc, (read16(Loc) & 3) | (lo(Val) & ~3)); - break; + case R_PPC64_TPREL16_LO_DS: { + // DQ-form instructions use bits 28-31 as part of the instruction encoding + // DS-form instructions only use bits 30-31. + uint32_t Inst = readInstrFromHalf16(Loc); + uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3; + checkAlignment(Loc, lo(Val), Mask + 1, OriginalType); + if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) { + // When the high-adjusted part of a toc relocation evalutes to 0, it is + // changed into a nop. The lo part then needs to be updated to use the toc + // pointer register r2, as the base register. + if (isInstructionUpdateForm(Inst)) + error(getErrorLocation(Loc) + + "Can't toc-optimize an update instruction: 0x" + + Twine::utohexstr(Inst)); + Inst = (Inst & 0xFFE0000F) | 0x00020000; + writeInstrFromHalf16(Loc, Inst); + } + write16(Loc, (read16(Loc) & Mask) | lo(Val)); + } break; case R_PPC64_ADDR32: case R_PPC64_REL32: checkInt(Loc, Val, 32, Type); @@ -454,9 +718,17 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { case R_PPC64_TOC: write64(Loc, Val); break; + case R_PPC64_REL14: { + uint32_t Mask = 0x0000FFFC; + checkInt(Loc, Val, 16, Type); + checkAlignment(Loc, Val, 4, Type); + write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask)); + break; + } case R_PPC64_REL24: { uint32_t Mask = 0x03FFFFFC; - checkInt(Loc, Val, 24, Type); + checkInt(Loc, Val, 26, Type); + checkAlignment(Loc, Val, 4, Type); write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask)); break; } @@ -470,9 +742,30 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const { bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const { - // If a function is in the plt it needs to be called through - // a call stub. - return Type == R_PPC64_REL24 && S.isInPlt(); + if (Type != R_PPC64_REL14 && Type != R_PPC64_REL24) + return false; + + // If a function is in the Plt it needs to be called with a call-stub. + if (S.isInPlt()) + return true; + + // If a symbol is a weak undefined and we are compiling an executable + // it doesn't need a range-extending thunk since it can't be called. + if (S.isUndefWeak() && !Config->Shared) + return false; + + // If the offset exceeds the range of the branch type then it will need + // a range-extending thunk. + return !inBranchRange(Type, BranchAddr, S.getVA()); +} + +bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { + int64_t Offset = Dst - Src; + if (Type == R_PPC64_REL14) + return isInt<16>(Offset); + if (Type == R_PPC64_REL24) + return isInt<26>(Offset); + llvm_unreachable("unsupported relocation type used in branch"); } RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data, @@ -511,9 +804,8 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { case R_PPC64_GOT_TLSGD16_LO: { // Relax from addi r3, rA, sym@got@tlsgd@l to // ld r3, sym@got@tprel@l(rA) - uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; - uint32_t InputRegister = (read32(Loc - EndianOffset) & (0x1f << 16)); - write32(Loc - EndianOffset, 0xE8600000 | InputRegister); + uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16)); + writeInstrFromHalf16(Loc, 0xE8600000 | InputRegister); relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val); return; } @@ -526,6 +818,113 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { } } +// The prologue for a split-stack function is expected to look roughly +// like this: +// .Lglobal_entry_point: +// # TOC pointer initalization. +// ... +// .Llocal_entry_point: +// # load the __private_ss member of the threads tcbhead. +// ld r0,-0x7000-64(r13) +// # subtract the functions stack size from the stack pointer. +// addis r12, r1, ha(-stack-frame size) +// addi r12, r12, l(-stack-frame size) +// # compare needed to actual and branch to allocate_more_stack if more +// # space is needed, otherwise fallthrough to 'normal' function body. +// cmpld cr7,r12,r0 +// blt- cr7, .Lallocate_more_stack +// +// -) The allocate_more_stack block might be placed after the split-stack +// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body` +// instead. +// -) If either the addis or addi is not needed due to the stack size being +// smaller then 32K or a multiple of 64K they will be replaced with a nop, +// but there will always be 2 instructions the linker can overwrite for the +// adjusted stack size. +// +// The linkers job here is to increase the stack size used in the addis/addi +// pair by split-stack-size-adjust. +// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size) +// addi r12, r12, l(-stack-frame size - split-stack-adjust-size) +bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End, + uint8_t StOther) const { + // If the caller has a global entry point adjust the buffer past it. The start + // of the split-stack prologue will be at the local entry point. + Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther); + + // At the very least we expect to see a load of some split-stack data from the + // tcb, and 2 instructions that calculate the ending stack address this + // function will require. If there is not enough room for at least 3 + // instructions it can't be a split-stack prologue. + if (Loc + 12 >= End) + return false; + + // First instruction must be `ld r0, -0x7000-64(r13)` + if (read32(Loc) != 0xe80d8fc0) + return false; + + int16_t HiImm = 0; + int16_t LoImm = 0; + // First instruction can be either an addis if the frame size is larger then + // 32K, or an addi if the size is less then 32K. + int32_t FirstInstr = read32(Loc + 4); + if (getPrimaryOpCode(FirstInstr) == 15) { + HiImm = FirstInstr & 0xFFFF; + } else if (getPrimaryOpCode(FirstInstr) == 14) { + LoImm = FirstInstr & 0xFFFF; + } else { + return false; + } + + // Second instruction is either an addi or a nop. If the first instruction was + // an addi then LoImm is set and the second instruction must be a nop. + uint32_t SecondInstr = read32(Loc + 8); + if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) { + LoImm = SecondInstr & 0xFFFF; + } else if (SecondInstr != 0x60000000) { + return false; + } + + // The register operands of the first instruction should be the stack-pointer + // (r1) as the input (RA) and r12 as the output (RT). If the second + // instruction is not a nop, then it should use r12 as both input and output. + auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT, + uint8_t ExpectedRA) { + return ((Instr & 0x3E00000) >> 21 == ExpectedRT) && + ((Instr & 0x1F0000) >> 16 == ExpectedRA); + }; + if (!CheckRegOperands(FirstInstr, 12, 1)) + return false; + if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12)) + return false; + + int32_t StackFrameSize = (HiImm * 65536) + LoImm; + // Check that the adjusted size doesn't overflow what we can represent with 2 + // instructions. + if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) { + error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows"); + return false; + } + + int32_t AdjustedStackFrameSize = + StackFrameSize - Config->SplitStackAdjustSize; + + LoImm = AdjustedStackFrameSize & 0xFFFF; + HiImm = (AdjustedStackFrameSize + 0x8000) >> 16; + if (HiImm) { + write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm); + // If the low immediate is zero the second instruction will be a nop. + SecondInstr = LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000; + write32(Loc + 8, SecondInstr); + } else { + // addi r12, r1, imm + write32(Loc + 4, (0x39810000) | (uint16_t)LoImm); + write32(Loc + 8, 0x60000000); + } + + return true; +} + TargetInfo *elf::getPPC64TargetInfo() { static PPC64 Target; return &Target; |