diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /lld/ELF/Arch | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'lld/ELF/Arch')
-rw-r--r-- | lld/ELF/Arch/AArch64.cpp | 145 | ||||
-rw-r--r-- | lld/ELF/Arch/AMDGPU.cpp | 17 | ||||
-rw-r--r-- | lld/ELF/Arch/ARM.cpp | 326 | ||||
-rw-r--r-- | lld/ELF/Arch/AVR.cpp | 142 | ||||
-rw-r--r-- | lld/ELF/Arch/Hexagon.cpp | 66 | ||||
-rw-r--r-- | lld/ELF/Arch/MSP430.cpp | 28 | ||||
-rw-r--r-- | lld/ELF/Arch/Mips.cpp | 84 | ||||
-rw-r--r-- | lld/ELF/Arch/MipsArchTree.cpp | 26 | ||||
-rw-r--r-- | lld/ELF/Arch/PPC.cpp | 104 | ||||
-rw-r--r-- | lld/ELF/Arch/PPC64.cpp | 266 | ||||
-rw-r--r-- | lld/ELF/Arch/RISCV.cpp | 54 | ||||
-rw-r--r-- | lld/ELF/Arch/SPARCV9.cpp | 83 | ||||
-rw-r--r-- | lld/ELF/Arch/X86.cpp | 55 | ||||
-rw-r--r-- | lld/ELF/Arch/X86_64.cpp | 386 |
14 files changed, 1353 insertions, 429 deletions
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index df41a12f7454..637046e90bbd 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -17,14 +17,13 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; // Page(Expr) is the page address of the expression Expr, defined // as (Expr & ~0xFFF). (This applies even if the machine page size // supported by the platform has a different value.) -uint64_t getAArch64Page(uint64_t expr) { +uint64_t elf::getAArch64Page(uint64_t expr) { return expr & ~static_cast<uint64_t>(0xFFF); } @@ -45,12 +44,16 @@ public: uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; bool usesOnlyLowPageBits(RelType type) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -123,6 +126,7 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_CONDBR19: case R_AARCH64_JUMP26: case R_AARCH64_TSTBR14: + case R_AARCH64_PLT32: return R_PLT_PC; case R_AARCH64_PREL16: case R_AARCH64_PREL32: @@ -208,10 +212,10 @@ void AArch64::writePltHeader(uint8_t *buf) const { uint64_t got = in.gotPlt->getVA(); uint64_t plt = in.plt->getVA(); - relocateOne(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(got + 16) - getAArch64Page(plt + 4)); - relocateOne(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); - relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(got + 16) - getAArch64Page(plt + 4)); + relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); } void AArch64::writePlt(uint8_t *buf, const Symbol &sym, @@ -225,10 +229,10 @@ void AArch64::writePlt(uint8_t *buf, const Symbol &sym, memcpy(buf, inst, sizeof(inst)); uint64_t gotPltEntryAddr = sym.getGotPltVA(); - relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); - relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); - relocateOne(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); + relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); } bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, @@ -241,7 +245,8 @@ bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, // ELF for the ARM 64-bit architecture, section Call and Jump relocations // only permits range extension thunks for R_AARCH64_CALL26 and // R_AARCH64_JUMP26 relocation types. - if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) + if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && + type != R_AARCH64_PLT32) return false; uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a); return !inBranchRange(type, branchAddr, dst); @@ -255,11 +260,13 @@ uint32_t AArch64::getThunkSectionSpacing() const { } bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { - if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) + if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && + type != R_AARCH64_PLT32) return true; // The AArch64 call and unconditional branch instructions have a range of - // +/- 128 MiB. - uint64_t range = 128 * 1024 * 1024; + // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB. + uint64_t range = + type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024); if (dst > src) { // Immediate of branch is signed. range -= 4; @@ -309,16 +316,21 @@ static void writeSMovWImm(uint8_t *loc, uint32_t imm) { write32le(loc, inst | ((imm & 0xFFFF) << 5)); } -void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void AArch64::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_AARCH64_ABS16: case R_AARCH64_PREL16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_AARCH64_ABS32: case R_AARCH64_PREL32: - checkIntUInt(loc, val, 32, type); + checkIntUInt(loc, val, 32, rel); + write32le(loc, val); + break; + case R_AARCH64_PLT32: + checkInt(loc, val, 32, rel); write32le(loc, val); break; case R_AARCH64_ABS64: @@ -332,13 +344,13 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: - checkInt(loc, val, 33, type); + checkInt(loc, val, 33, rel); LLVM_FALLTHROUGH; case R_AARCH64_ADR_PREL_PG_HI21_NC: write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write32AArch64Addr(loc, val); break; case R_AARCH64_JUMP26: @@ -352,13 +364,13 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, 0x14000000); LLVM_FALLTHROUGH; case R_AARCH64_CALL26: - checkInt(loc, val, 28, type); + checkInt(loc, val, 28, rel); or32le(loc, (val & 0x0FFFFFFC) >> 2); break; case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 21, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 21, rel); or32le(loc, (val & 0x1FFFFC) << 3); break; case R_AARCH64_LDST8_ABS_LO12_NC: @@ -367,12 +379,12 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_AARCH64_LDST16_ABS_LO12_NC: case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: - checkAlignment(loc, val, 2, type); + checkAlignment(loc, val, 2, rel); or32AArch64Imm(loc, getBits(val, 1, 11)); break; case R_AARCH64_LDST32_ABS_LO12_NC: case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: - checkAlignment(loc, val, 4, type); + checkAlignment(loc, val, 4, rel); or32AArch64Imm(loc, getBits(val, 2, 11)); break; case R_AARCH64_LDST64_ABS_LO12_NC: @@ -380,28 +392,28 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12: - checkAlignment(loc, val, 8, type); + checkAlignment(loc, val, 8, rel); or32AArch64Imm(loc, getBits(val, 3, 11)); break; case R_AARCH64_LDST128_ABS_LO12_NC: case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: - checkAlignment(loc, val, 16, type); + checkAlignment(loc, val, 16, rel); or32AArch64Imm(loc, getBits(val, 4, 11)); break; case R_AARCH64_MOVW_UABS_G0: - checkUInt(loc, val, 16, type); + checkUInt(loc, val, 16, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_UABS_G0_NC: or32le(loc, (val & 0xFFFF) << 5); break; case R_AARCH64_MOVW_UABS_G1: - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_UABS_G1_NC: or32le(loc, (val & 0xFFFF0000) >> 11); break; case R_AARCH64_MOVW_UABS_G2: - checkUInt(loc, val, 48, type); + checkUInt(loc, val, 48, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_UABS_G2_NC: or32le(loc, (val & 0xFFFF00000000) >> 27); @@ -412,7 +424,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_MOVW_PREL_G0: case R_AARCH64_MOVW_SABS_G0: case R_AARCH64_TLSLE_MOVW_TPREL_G0: - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_PREL_G0_NC: case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: @@ -421,7 +433,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_MOVW_PREL_G1: case R_AARCH64_MOVW_SABS_G1: case R_AARCH64_TLSLE_MOVW_TPREL_G1: - checkInt(loc, val, 33, type); + checkInt(loc, val, 33, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_PREL_G1_NC: case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: @@ -430,7 +442,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_MOVW_PREL_G2: case R_AARCH64_MOVW_SABS_G2: case R_AARCH64_TLSLE_MOVW_TPREL_G2: - checkInt(loc, val, 49, type); + checkInt(loc, val, 49, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_PREL_G2_NC: writeSMovWImm(loc, val >> 32); @@ -439,11 +451,11 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { writeSMovWImm(loc, val >> 48); break; case R_AARCH64_TSTBR14: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); or32le(loc, (val & 0xFFFC) << 3); break; case R_AARCH64_TLSLE_ADD_TPREL_HI12: - checkUInt(loc, val, 24, type); + checkUInt(loc, val, 24, rel); or32AArch64Imm(loc, val >> 12); break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: @@ -455,7 +467,8 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // TLSDESC Global-Dynamic relocation are in the form: // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] @@ -467,9 +480,9 @@ void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // movk x0, #0x10 // nop // nop - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); - switch (type) { + switch (rel.type) { case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_CALL: write32le(loc, 0xd503201f); // nop @@ -485,7 +498,8 @@ void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void AArch64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { +void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // TLSDESC Global-Dynamic relocation are in the form: // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] @@ -498,34 +512,35 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // nop // nop - switch (type) { + switch (rel.type) { case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_CALL: write32le(loc, 0xd503201f); // nop break; case R_AARCH64_TLSDESC_ADR_PAGE21: write32le(loc, 0x90000000); // adrp - relocateOne(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val); + relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val); break; case R_AARCH64_TLSDESC_LD64_LO12: write32le(loc, 0xf9400000); // ldr - relocateOne(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); + relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } -void AArch64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { - checkUInt(loc, val, 32, type); +void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + checkUInt(loc, val, 32, rel); - if (type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { + if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { // Generate MOVZ. uint32_t regNo = read32le(loc) & 0x1f; write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5)); return; } - if (type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { + if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { // Generate MOVK. uint32_t regNo = read32le(loc) & 0x1f; write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5)); @@ -593,8 +608,10 @@ AArch64BtiPac::AArch64BtiPac() { // the function in an executable being taken by a shared library. // FIXME: There is a potential optimization to omit the BTI if we detect // that the address of the PLT entry isn't taken. + // The PAC PLT entries require dynamic loader support and this isn't known + // from properties in the objects, so we use the command line flag. btiEntry = btiHeader && !config->shared; - pacEntry = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_PAC); + pacEntry = config->zPacPlt; if (btiEntry || pacEntry) { pltEntrySize = 24; @@ -627,10 +644,10 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const { } memcpy(buf, pltData, sizeof(pltData)); - relocateOne(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(got + 16) - getAArch64Page(plt + 8)); - relocateOne(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); - relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(got + 16) - getAArch64Page(plt + 8)); + relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); if (!btiHeader) // We didn't add the BTI c instruction so round out size with NOP. memcpy(buf + sizeof(pltData), nopData, sizeof(nopData)); @@ -664,11 +681,10 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, uint64_t gotPltEntryAddr = sym.getGotPltVA(); memcpy(buf, addrInst, sizeof(addrInst)); - relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(gotPltEntryAddr) - - getAArch64Page(pltEntryAddr)); - relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); - relocateOne(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); + relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); if (pacEntry) memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); @@ -689,7 +705,4 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *getAArch64TargetInfo() { return getTargetInfo(); } - -} // namespace elf -} // namespace lld +TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); } diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index b42ca7746742..3610a38692d6 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -17,16 +17,16 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class AMDGPU final : public TargetInfo { public: AMDGPU(); uint32_t calcEFlags() const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; @@ -58,8 +58,8 @@ uint32_t AMDGPU::calcEFlags() const { return ret; } -void AMDGPU::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void AMDGPU::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_AMDGPU_ABS32: case R_AMDGPU_GOTPCREL: case R_AMDGPU_GOTPCREL32_LO: @@ -108,10 +108,7 @@ RelType AMDGPU::getDynRel(RelType type) const { return R_AMDGPU_NONE; } -TargetInfo *getAMDGPUTargetInfo() { +TargetInfo *elf::getAMDGPUTargetInfo() { static AMDGPU target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index de1023346aa5..fd90557cc4f6 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -18,9 +18,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class ARM final : public TargetInfo { @@ -43,7 +42,8 @@ public: int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -64,6 +64,7 @@ ARM::ARM() { ipltEntrySize = 16; trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; needsThunks = true; + defaultMaxPageSize = 65536; } uint32_t ARM::calcEFlags() const { @@ -120,6 +121,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, return R_TLSGD_PC; case R_ARM_TLS_LDM32: return R_TLSLD_PC; + case R_ARM_TLS_LDO32: + return R_DTPREL; case R_ARM_BASE_PREL: // B(S) + A - P // FIXME: currently B(S) assumed to be .got, this may not hold for all @@ -131,6 +134,19 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, case R_ARM_THM_MOVW_PREL_NC: case R_ARM_THM_MOVT_PREL: return R_PC; + case R_ARM_ALU_PC_G0: + case R_ARM_LDR_PC_G0: + case R_ARM_THM_ALU_PREL_11_0: + case R_ARM_THM_PC8: + case R_ARM_THM_PC12: + return R_ARM_PCA; + case R_ARM_MOVW_BREL_NC: + case R_ARM_MOVW_BREL: + case R_ARM_MOVT_BREL: + case R_ARM_THM_MOVW_BREL_NC: + case R_ARM_THM_MOVW_BREL: + case R_ARM_THM_MOVT_BREL: + return R_ARM_SBREL; case R_ARM_NONE: return R_NONE; case R_ARM_TLS_LE32: @@ -262,7 +278,8 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { + uint64_t branchAddr, const Symbol &s, + int64_t /*a*/) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) @@ -275,8 +292,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_PLT32: case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. - // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). - if (expr == R_PC && ((s.getVA() & 1) == 1)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). + if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; LLVM_FALLTHROUGH; case R_ARM_CALL: { @@ -286,8 +303,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: // Source is Thumb, all PLT entries are ARM so interworking is required. - // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). - if (expr == R_PLT_PC || ((s.getVA() & 1) == 0)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). + if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) return true; LLVM_FALLTHROUGH; case R_ARM_THM_CALL: { @@ -375,8 +392,82 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { return distance <= range; } -void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +// Helper to produce message text when LLD detects that a CALL relocation to +// a non STT_FUNC symbol that may result in incorrect interworking between ARM +// or Thumb. +static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) { + assert(!s.isFunc()); + if (s.isSection()) { + // Section symbols must be defined and in a section. Users cannot change + // the type. Use the section name as getName() returns an empty string. + warn(getErrorLocation(loc) + "branch and link relocation: " + + toString(relt) + " to STT_SECTION symbol " + + cast<Defined>(s).section->name + " ; interworking not performed"); + } else { + // Warn with hint on how to alter the symbol type. + warn(getErrorLocation(loc) + "branch and link relocation: " + + toString(relt) + " to non STT_FUNC symbol: " + s.getName() + + " interworking not performed; consider using directive '.type " + + s.getName() + + ", %function' to give symbol type STT_FUNC if" + " interworking between ARM and Thumb is required"); + } +} + +// Utility functions taken from ARMAddressingModes.h, only changes are LLD +// coding style. + +// Rotate a 32-bit unsigned value right by a specified amt of bits. +static uint32_t rotr32(uint32_t val, uint32_t amt) { + assert(amt < 32 && "Invalid rotate amount"); + return (val >> amt) | (val << ((32 - amt) & 31)); +} + +// Rotate a 32-bit unsigned value left by a specified amt of bits. +static uint32_t rotl32(uint32_t val, uint32_t amt) { + assert(amt < 32 && "Invalid rotate amount"); + return (val << amt) | (val >> ((32 - amt) & 31)); +} + +// Try to encode a 32-bit unsigned immediate imm with an immediate shifter +// operand, this form is an 8-bit immediate rotated right by an even number of +// bits. We compute the rotate amount to use. If this immediate value cannot be +// handled with a single shifter-op, determine a good rotate amount that will +// take a maximal chunk of bits out of the immediate. +static uint32_t getSOImmValRotate(uint32_t imm) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((imm & ~255U) == 0) + return 0; + + // Use CTZ to compute the rotate amount. + unsigned tz = llvm::countTrailingZeros(imm); + + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, + // not 9. + unsigned rotAmt = tz & ~1; + + // If we can handle this spread, return it. + if ((rotr32(imm, rotAmt) & ~255U) == 0) + return (32 - rotAmt) & 31; // HW rotates right, not left. + + // For values like 0xF000000F, we should ignore the low 6 bits, then + // retry the hunt. + if (imm & 63U) { + unsigned tz2 = countTrailingZeros(imm & ~63U); + unsigned rotAmt2 = tz2 & ~1; + if ((rotr32(imm, rotAmt2) & ~255U) == 0) + return (32 - rotAmt2) & 31; // HW rotates right, not left. + } + + // Otherwise, we have no way to cover this span of bits with a single + // shifter_op immediate. Return a chunk of bits that will be useful to + // handle. + return (32 - rotAmt) & 31; // HW rotates right, not left. +} + +void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_ARM_ABS32: case R_ARM_BASE_PREL: case R_ARM_GOTOFF32: @@ -397,40 +488,49 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val); break; case R_ARM_PREL31: - checkInt(loc, val, 31, type); + checkInt(loc, val, 31, rel); write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000)); break; - case R_ARM_CALL: - // R_ARM_CALL is used for BL and BLX instructions, depending on the - // value of bit 0 of Val, we must select a BL or BLX instruction - if (val & 1) { - // If bit 0 of Val is 1 the target is Thumb, we must select a BLX. + case R_ARM_CALL: { + // R_ARM_CALL is used for BL and BLX instructions, for symbols of type + // STT_FUNC we choose whether to write a BL or BLX depending on the + // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is + // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we don't need to interwork. + assert(rel.sym); // R_ARM_CALL is always reached via relocate(). + bool bit0Thumb = val & 1; + bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000; + // lld 10.0 and before always used bit0Thumb when deciding to write a BLX + // even when type not STT_FUNC. + if (!rel.sym->isFunc() && isBlx != bit0Thumb) + stateChangeWarning(loc, rel.type, *rel.sym); + if (rel.sym->isFunc() ? bit0Thumb : isBlx) { // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' - checkInt(loc, val, 26, type); + checkInt(loc, val, 26, rel); write32le(loc, 0xfa000000 | // opcode ((val & 2) << 23) | // H ((val >> 2) & 0x00ffffff)); // imm24 break; } - if ((read32le(loc) & 0xfe000000) == 0xfa000000) - // BLX (always unconditional) instruction to an ARM Target, select an - // unconditional BL. - write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff)); + // BLX (always unconditional) instruction to an ARM Target, select an + // unconditional BL. + write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff)); // fall through as BL encoding is shared with B + } LLVM_FALLTHROUGH; case R_ARM_JUMP24: case R_ARM_PC24: case R_ARM_PLT32: - checkInt(loc, val, 26, type); + checkInt(loc, val, 26, rel); write32le(loc, (read32le(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff)); break; case R_ARM_THM_JUMP11: - checkInt(loc, val, 12, type); + checkInt(loc, val, 12, rel); write16le(loc, (read32le(loc) & 0xf800) | ((val >> 1) & 0x07ff)); break; case R_ARM_THM_JUMP19: // Encoding T3: Val = S:J2:J1:imm6:imm11:0 - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write16le(loc, (read16le(loc) & 0xfbc0) | // opcode cond ((val >> 10) & 0x0400) | // S @@ -441,20 +541,32 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val >> 5) & 0x2000) | // J1 ((val >> 1) & 0x07ff)); // imm11 break; - case R_ARM_THM_CALL: - // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the - // value of bit 0 of Val, we must select a BL or BLX instruction - if ((val & 1) == 0) { - // Ensure BLX destination is 4-byte aligned. As BLX instruction may - // only be two byte aligned. This must be done before overflow check + case R_ARM_THM_CALL: { + // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type + // STT_FUNC we choose whether to write a BL or BLX depending on the + // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is + // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we need to interwork. + assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). + bool bit0Thumb = val & 1; + bool isBlx = (read16le(loc + 2) & 0x1000) == 0; + // lld 10.0 and before always used bit0Thumb when deciding to write a BLX + // even when type not STT_FUNC. PLT entries generated by LLD are always ARM. + if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb) + stateChangeWarning(loc, rel.type, *rel.sym); + if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) { + // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As + // the BLX instruction may only be two byte aligned. This must be done + // before overflow check. val = alignTo(val, 4); + write16le(loc + 2, read16le(loc + 2) & ~0x1000); + } else { + write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12); } - // Bit 12 is 0 for BLX, 1 for BL - write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12); if (!config->armJ1J2BranchEncoding) { // Older Arm architectures do not support R_ARM_THM_JUMP24 and have // different encoding rules and range due to J1 and J2 always being 1. - checkInt(loc, val, 23, type); + checkInt(loc, val, 23, rel); write16le(loc, 0xf000 | // opcode ((val >> 12) & 0x07ff)); // imm11 @@ -464,11 +576,12 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val >> 1) & 0x07ff)); // imm11 break; } + } // Fall through as rest of encoding is the same as B.W LLVM_FALLTHROUGH; case R_ARM_THM_JUMP24: // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 - checkInt(loc, val, 25, type); + checkInt(loc, val, 25, rel); write16le(loc, 0xf000 | // opcode ((val >> 14) & 0x0400) | // S @@ -481,16 +594,19 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_ARM_MOVW_ABS_NC: case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVW_BREL_NC: write32le(loc, (read32le(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) | (val & 0x0fff)); break; case R_ARM_MOVT_ABS: case R_ARM_MOVT_PREL: + case R_ARM_MOVT_BREL: write32le(loc, (read32le(loc) & ~0x000f0fff) | (((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff)); break; case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVT_BREL: // Encoding T1: A = imm4:i:imm3:imm8 write16le(loc, 0xf2c0 | // opcode @@ -503,6 +619,7 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVW_BREL_NC: // Encoding T3: A = imm4:i:imm3:imm8 write16le(loc, 0xf240 | // opcode @@ -513,8 +630,92 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val << 4) & 0x7000) | // imm3 (val & 0x00ff)); // imm8 break; + case R_ARM_ALU_PC_G0: { + // ADR (literal) add = bit23, sub = bit22 + // literal is a 12-bit modified immediate, made up of a 4-bit even rotate + // right and an 8-bit immediate. The code-sequence here is derived from + // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we + // want to give an error if we cannot encode the constant. + uint32_t opcode = 0x00800000; + if (val >> 63) { + opcode = 0x00400000; + val = ~val + 1; + } + if ((val & ~255U) != 0) { + uint32_t rotAmt = getSOImmValRotate(val); + // Error if we cannot encode this with a single shift + if (rotr32(~255U, rotAmt) & val) + error(getErrorLocation(loc) + "unencodeable immediate " + + Twine(val).str() + " for relocation " + toString(rel.type)); + val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8); + } + write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val); + break; + } + case R_ARM_LDR_PC_G0: { + // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - P. + if (rel.sym->isFunc()) + val &= ~0x1; + // LDR (literal) u = bit23 + int64_t imm = val; + uint32_t u = 0x00800000; + if (imm < 0) { + imm = -imm; + u = 0; + } + checkUInt(loc, imm, 12, rel); + write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm); + break; + } + case R_ARM_THM_ALU_PREL_11_0: { + // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 + int64_t imm = val; + uint16_t sub = 0; + if (imm < 0) { + imm = -imm; + sub = 0x00a0; + } + checkUInt(loc, imm, 12, rel); + write16le(loc, (read16le(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1); + write16le(loc + 2, + (read16le(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff)); + break; + } + case R_ARM_THM_PC8: + // ADR and LDR literal encoding T1 positive offset only imm8:00 + // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - Pa. + if (rel.sym->isFunc()) + val &= ~0x1; + checkUInt(loc, val, 10, rel); + checkAlignment(loc, val, 4, rel); + write16le(loc, (read16le(loc) & 0xff00) | (val & 0x3fc) >> 2); + break; + case R_ARM_THM_PC12: { + // LDR (literal) encoding T2, add = (U == '1') imm12 + // imm12 is unsigned + // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - Pa. + if (rel.sym->isFunc()) + val &= ~0x1; + int64_t imm12 = val; + uint16_t u = 0x0080; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; + } + checkUInt(loc, imm12, 12, rel); + write16le(loc, read16le(loc) | u); + write16le(loc + 2, (read16le(loc + 2) & 0xf000) | imm12); + break; + } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } @@ -582,14 +783,18 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVT_PREL: { + case R_ARM_MOVT_PREL: + case R_ARM_MOVW_BREL_NC: + case R_ARM_MOVT_BREL: { uint64_t val = read32le(buf) & 0x000f0fff; return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff)); } case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVT_PREL: { + case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVW_BREL_NC: + case R_ARM_THM_MOVT_BREL: { // Encoding T3: A = imm4:i:imm3:imm8 uint16_t hi = read16le(buf); uint16_t lo = read16le(buf + 2); @@ -598,13 +803,50 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { ((lo & 0x7000) >> 4) | // imm3 (lo & 0x00ff)); // imm8 } + case R_ARM_ALU_PC_G0: { + // 12-bit immediate is a modified immediate made up of a 4-bit even + // right rotation and 8-bit constant. After the rotation the value + // is zero-extended. When bit 23 is set the instruction is an add, when + // bit 22 is set it is a sub. + uint32_t instr = read32le(buf); + uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2); + return (instr & 0x00400000) ? -val : val; + } + case R_ARM_LDR_PC_G0: { + // ADR (literal) add = bit23, sub = bit22 + // LDR (literal) u = bit23 unsigned imm12 + bool u = read32le(buf) & 0x00800000; + uint32_t imm12 = read32le(buf) & 0xfff; + return u ? imm12 : -imm12; + } + case R_ARM_THM_ALU_PREL_11_0: { + // Thumb2 ADR, which is an alias for a sub or add instruction with an + // unsigned immediate. + // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 + uint16_t hi = read16le(buf); + uint16_t lo = read16le(buf + 2); + uint64_t imm = (hi & 0x0400) << 1 | // i + (lo & 0x7000) >> 4 | // imm3 + (lo & 0x00ff); // imm8 + // For sub, addend is negative, add is positive. + return (hi & 0x00f0) ? -imm : imm; + } + case R_ARM_THM_PC8: + // ADR and LDR (literal) encoding T1 + // From ELF for the ARM Architecture the initial signed addend is formed + // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4) + // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff + return ((((read16le(buf) & 0xff) << 2) + 4) & 0x3ff) - 4; + case R_ARM_THM_PC12: { + // LDR (literal) encoding T2, add = (U == '1') imm12 + bool u = read16le(buf) & 0x0080; + uint64_t imm12 = read16le(buf + 2) & 0x0fff; + return u ? imm12 : -imm12; + } } } -TargetInfo *getARMTargetInfo() { +TargetInfo *elf::getARMTargetInfo() { static ARM target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp index cb33ff448ba4..4513a970b32d 100644 --- a/lld/ELF/Arch/AVR.cpp +++ b/lld/ELF/Arch/AVR.cpp @@ -36,9 +36,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class AVR final : public TargetInfo { @@ -46,7 +45,8 @@ public: AVR(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -54,11 +54,131 @@ AVR::AVR() { noneRel = R_AVR_NONE; } RelExpr AVR::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { - return R_ABS; + switch (type) { + case R_AVR_7_PCREL: + case R_AVR_13_PCREL: + return R_PC; + default: + return R_ABS; + } } -void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +static void writeLDI(uint8_t *loc, uint64_t val) { + write16le(loc, (read16le(loc) & 0xf0f0) | (val & 0xf0) << 4 | (val & 0x0f)); +} + +void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { + case R_AVR_8: + checkUInt(loc, val, 8, rel); + *loc = val; + break; + case R_AVR_16: + // Note: this relocation is often used between code and data space, which + // are 0x800000 apart in the output ELF file. The bitmask cuts off the high + // bit. + write16le(loc, val & 0xffff); + break; + case R_AVR_16_PM: + checkAlignment(loc, val, 2, rel); + checkUInt(loc, val >> 1, 16, rel); + write16le(loc, val >> 1); + break; + case R_AVR_32: + checkUInt(loc, val, 32, rel); + write32le(loc, val); + break; + + case R_AVR_LDI: + checkUInt(loc, val, 8, rel); + writeLDI(loc, val & 0xff); + break; + + case R_AVR_LO8_LDI_NEG: + writeLDI(loc, -val & 0xff); + break; + case R_AVR_LO8_LDI: + writeLDI(loc, val & 0xff); + break; + case R_AVR_HI8_LDI_NEG: + writeLDI(loc, (-val >> 8) & 0xff); + break; + case R_AVR_HI8_LDI: + writeLDI(loc, (val >> 8) & 0xff); + break; + case R_AVR_HH8_LDI_NEG: + writeLDI(loc, (-val >> 16) & 0xff); + break; + case R_AVR_HH8_LDI: + writeLDI(loc, (val >> 16) & 0xff); + break; + case R_AVR_MS8_LDI_NEG: + writeLDI(loc, (-val >> 24) & 0xff); + break; + case R_AVR_MS8_LDI: + writeLDI(loc, (val >> 24) & 0xff); + break; + + case R_AVR_LO8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 1) & 0xff); + break; + case R_AVR_HI8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 9) & 0xff); + break; + case R_AVR_HH8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 17) & 0xff); + break; + + case R_AVR_LO8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 1) & 0xff); + break; + case R_AVR_HI8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 9) & 0xff); + break; + case R_AVR_HH8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 17) & 0xff); + break; + + case R_AVR_PORT5: + checkUInt(loc, val, 5, rel); + write16le(loc, (read16le(loc) & 0xff07) | (val << 3)); + break; + case R_AVR_PORT6: + checkUInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xf9f0) | (val & 0x30) << 5 | (val & 0x0f)); + break; + + // Since every jump destination is word aligned we gain an extra bit + case R_AVR_7_PCREL: { + checkInt(loc, val, 7, rel); + checkAlignment(loc, val, 2, rel); + const uint16_t target = (val - 2) >> 1; + write16le(loc, (read16le(loc) & 0xfc07) | ((target & 0x7f) << 3)); + break; + } + case R_AVR_13_PCREL: { + checkAlignment(loc, val, 2, rel); + const uint16_t target = (val - 2) >> 1; + write16le(loc, (read16le(loc) & 0xf000) | (target & 0xfff)); + break; + } + + case R_AVR_6: + checkInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xd3f8) | (val & 0x20) << 8 | + (val & 0x18) << 7 | (val & 0x07)); + break; + case R_AVR_6_ADIW: + checkInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xff30) | (val & 0x30) << 2 | (val & 0x0F)); + break; + case R_AVR_CALL: { uint16_t hi = val >> 17; uint16_t lo = val >> 1; @@ -67,14 +187,12 @@ void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } -TargetInfo *getAVRTargetInfo() { +TargetInfo *elf::getAVRTargetInfo() { static AVR target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 106bc9bab5bd..7740ce9a71e0 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -19,9 +19,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class Hexagon final : public TargetInfo { @@ -31,7 +30,8 @@ public: RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; @@ -55,6 +55,8 @@ Hexagon::Hexagon() { defaultMaxPageSize = 0x10000; noneRel = R_HEX_NONE; tlsGotRel = R_HEX_TPREL_32; + tlsModuleIndexRel = R_HEX_DTPMOD_32; + tlsOffsetRel = R_HEX_DTPREL_32; } uint32_t Hexagon::calcEFlags() const { @@ -102,6 +104,7 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_32_6_X: case R_HEX_HI16: case R_HEX_LO16: + case R_HEX_DTPREL_32: return R_ABS; case R_HEX_B9_PCREL: case R_HEX_B13_PCREL: @@ -115,12 +118,19 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_PLT_B22_PCREL: case R_HEX_B22_PCREL_X: case R_HEX_B32_PCREL_X: + case R_HEX_GD_PLT_B22_PCREL: + case R_HEX_GD_PLT_B22_PCREL_X: + case R_HEX_GD_PLT_B32_PCREL_X: return R_PLT_PC; case R_HEX_IE_32_6_X: case R_HEX_IE_16_X: case R_HEX_IE_HI16: case R_HEX_IE_LO16: return R_GOT; + case R_HEX_GD_GOT_11_X: + case R_HEX_GD_GOT_16_X: + case R_HEX_GD_GOT_32_6_X: + return R_TLSGD_GOTPLT; case R_HEX_GOTREL_11_X: case R_HEX_GOTREL_16_X: case R_HEX_GOTREL_32_6_X: @@ -152,6 +162,13 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, } } +static bool isDuplex(uint32_t insn) { + // Duplex forms have a fixed mask and parse bits 15:14 are always + // zero. Non-duplex insns will always have at least one bit set in the + // parse field. + return (0xC000 & insn) == 0; +} + static uint32_t findMaskR6(uint32_t insn) { // There are (arguably too) many relocation masks for the DSP's // R_HEX_6_X type. The table below is used to select the correct mask @@ -176,10 +193,7 @@ static uint32_t findMaskR6(uint32_t insn) { {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0}, {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}}; - // Duplex forms have a fixed mask and parse bits 15:14 are always - // zero. Non-duplex insns will always have at least one bit set in the - // parse field. - if ((0xC000 & insn) == 0x0) + if (isDuplex(insn)) return 0x03f00000; for (InstructionMask i : r6) @@ -215,6 +229,9 @@ static uint32_t findMaskR16(uint32_t insn) { if ((0xff000000 & insn) == 0xb0000000) return 0x0fe03fe0; + if (isDuplex(insn)) + return 0x03f00000; + error("unrecognized instruction for R_HEX_16_X relocation: 0x" + utohexstr(insn)); return 0; @@ -222,8 +239,9 @@ static uint32_t findMaskR16(uint32_t insn) { static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) | v); } -void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void Hexagon::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_HEX_NONE: break; case R_HEX_6_PCREL_X: @@ -240,6 +258,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x00203fe0, val & 0x3f)); break; case R_HEX_11_X: + case R_HEX_GD_GOT_11_X: case R_HEX_IE_GOT_11_X: case R_HEX_GOT_11_X: case R_HEX_GOTREL_11_X: @@ -252,6 +271,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_HEX_16_X: // These relocs only have 6 effective bits. case R_HEX_IE_16_X: case R_HEX_IE_GOT_16_X: + case R_HEX_GD_GOT_16_X: case R_HEX_GOT_16_X: case R_HEX_GOTREL_16_X: case R_HEX_TPREL_16_X: @@ -262,9 +282,11 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_HEX_32: case R_HEX_32_PCREL: + case R_HEX_DTPREL_32: or32le(loc, val); break; case R_HEX_32_6_X: + case R_HEX_GD_GOT_32_6_X: case R_HEX_GOT_32_6_X: case R_HEX_GOTREL_32_6_X: case R_HEX_IE_GOT_32_6_X: @@ -273,32 +295,35 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; case R_HEX_B9_PCREL: - checkInt(loc, val, 11, type); + checkInt(loc, val, 11, rel); or32le(loc, applyMask(0x003000fe, val >> 2)); break; case R_HEX_B9_PCREL_X: or32le(loc, applyMask(0x003000fe, val & 0x3f)); break; case R_HEX_B13_PCREL: - checkInt(loc, val, 15, type); + checkInt(loc, val, 15, rel); or32le(loc, applyMask(0x00202ffe, val >> 2)); break; case R_HEX_B15_PCREL: - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); or32le(loc, applyMask(0x00df20fe, val >> 2)); break; case R_HEX_B15_PCREL_X: or32le(loc, applyMask(0x00df20fe, val & 0x3f)); break; case R_HEX_B22_PCREL: + case R_HEX_GD_PLT_B22_PCREL: case R_HEX_PLT_B22_PCREL: - checkInt(loc, val, 22, type); + checkInt(loc, val, 22, rel); or32le(loc, applyMask(0x1ff3ffe, val >> 2)); break; case R_HEX_B22_PCREL_X: + case R_HEX_GD_PLT_B22_PCREL_X: or32le(loc, applyMask(0x1ff3ffe, val & 0x3f)); break; case R_HEX_B32_PCREL_X: + case R_HEX_GD_PLT_B32_PCREL_X: or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; case R_HEX_GOTREL_HI16: @@ -335,8 +360,8 @@ void Hexagon::writePltHeader(uint8_t *buf) const { // Offset from PLT0 to the GOT. uint64_t off = in.gotPlt->getVA() - in.plt->getVA(); - relocateOne(buf, R_HEX_B32_PCREL_X, off); - relocateOne(buf + 4, R_HEX_6_PCREL_X, off); + relocateNoSym(buf, R_HEX_B32_PCREL_X, off); + relocateNoSym(buf + 4, R_HEX_6_PCREL_X, off); } void Hexagon::writePlt(uint8_t *buf, const Symbol &sym, @@ -350,8 +375,8 @@ void Hexagon::writePlt(uint8_t *buf, const Symbol &sym, memcpy(buf, inst, sizeof(inst)); uint64_t gotPltEntryAddr = sym.getGotPltVA(); - relocateOne(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr); - relocateOne(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr); } RelType Hexagon::getDynRel(RelType type) const { @@ -360,10 +385,7 @@ RelType Hexagon::getDynRel(RelType type) const { return R_HEX_NONE; } -TargetInfo *getHexagonTargetInfo() { +TargetInfo *elf::getHexagonTargetInfo() { static Hexagon target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/MSP430.cpp b/lld/ELF/Arch/MSP430.cpp index f03e8181923b..4af90b40a346 100644 --- a/lld/ELF/Arch/MSP430.cpp +++ b/lld/ELF/Arch/MSP430.cpp @@ -26,9 +26,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class MSP430 final : public TargetInfo { @@ -36,7 +35,8 @@ public: MSP430(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -60,38 +60,36 @@ RelExpr MSP430::getRelExpr(RelType type, const Symbol &s, } } -void MSP430::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void MSP430::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_MSP430_8: - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_MSP430_16: case R_MSP430_16_PCREL: case R_MSP430_16_BYTE: case R_MSP430_16_PCREL_BYTE: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_MSP430_32: - checkIntUInt(loc, val, 32, type); + checkIntUInt(loc, val, 32, rel); write32le(loc, val); break; case R_MSP430_10_PCREL: { int16_t offset = ((int16_t)val >> 1) - 1; - checkInt(loc, offset, 10, type); + checkInt(loc, offset, 10, rel); write16le(loc, (read16le(loc) & 0xFC00) | (offset & 0x3FF)); break; } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } -TargetInfo *getMSP430TargetInfo() { +TargetInfo *elf::getMSP430TargetInfo() { static MSP430 target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index ed6f4ca24130..fd1c5f507734 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -18,9 +18,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { namespace { template <class ELFT> class MIPS final : public TargetInfo { public: @@ -37,7 +37,8 @@ public: bool needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, int64_t a) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool usesOnlyLowPageBits(RelType type) const override; }; } // namespace @@ -274,12 +275,12 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *buf) const { write16(buf + 18, 0x0f83); // move $28, $3 write16(buf + 20, 0x472b); // jalrc $25 write16(buf + 22, 0x0c00); // nop - relocateOne(buf, R_MICROMIPS_PC19_S2, gotPlt - plt); + relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPlt - plt); } else { write16(buf + 18, 0x45f9); // jalrc $25 write16(buf + 20, 0x0f83); // move $28, $3 write16(buf + 22, 0x0c00); // nop - relocateOne(buf, R_MICROMIPS_PC23_S2, gotPlt - plt); + relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPlt - plt); } return; } @@ -330,13 +331,13 @@ void MIPS<ELFT>::writePlt(uint8_t *buf, const Symbol &sym, write16(buf + 4, 0xff22); // lw $25, 0($2) write16(buf + 8, 0x0f02); // move $24, $2 write16(buf + 10, 0x4723); // jrc $25 / jr16 $25 - relocateOne(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr); } else { write16(buf, 0x7900); // addiupc $2, (GOTPLT) - . write16(buf + 4, 0xff22); // lw $25, 0($2) write16(buf + 8, 0x4599); // jrc $25 / jr16 $25 write16(buf + 10, 0x0f02); // move $24, $2 - relocateOne(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr); } return; } @@ -537,8 +538,10 @@ static uint64_t fixupCrossModeJump(uint8_t *loc, RelType type, uint64_t val) { } template <class ELFT> -void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +void MIPS<ELFT>::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { const endianness e = ELFT::TargetEndianness; + RelType type = rel.type; if (ELFT::Is64Bits || config->mipsN32Abi) std::tie(type, val) = calculateMipsRelChain(loc, type, val); @@ -577,7 +580,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { if (config->relocatable) { writeValue(loc, val + 0x8000, 16, 16); } else { - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); writeValue(loc, val, 16, 0); } break; @@ -585,7 +588,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { if (config->relocatable) { writeShuffleValue<e>(loc, val + 0x8000, 16, 16); } else { - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); writeShuffleValue<e>(loc, val, 16, 0); } break; @@ -596,7 +599,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MIPS_TLS_GD: case R_MIPS_TLS_GOTTPREL: case R_MIPS_TLS_LDM: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); LLVM_FALLTHROUGH; case R_MIPS_CALL_LO16: case R_MIPS_GOT_LO16: @@ -610,7 +613,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MICROMIPS_GPREL16: case R_MICROMIPS_TLS_GD: case R_MICROMIPS_TLS_LDM: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); writeShuffleValue<e>(loc, val, 16, 0); break; case R_MICROMIPS_CALL16: @@ -622,7 +625,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { writeShuffleValue<e>(loc, val, 16, 0); break; case R_MICROMIPS_GPREL7_S2: - checkInt(loc, val, 7, type); + checkInt(loc, val, 7, rel); writeShuffleValue<e>(loc, val, 7, 2); break; case R_MIPS_CALL_HI16: @@ -665,23 +668,23 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // Ignore this optimization relocation for now break; case R_MIPS_PC16: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 18, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 18, rel); writeValue(loc, val, 16, 2); break; case R_MIPS_PC19_S2: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 21, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 21, rel); writeValue(loc, val, 19, 2); break; case R_MIPS_PC21_S2: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 23, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 23, rel); writeValue(loc, val, 21, 2); break; case R_MIPS_PC26_S2: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 28, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 28, rel); writeValue(loc, val, 26, 2); break; case R_MIPS_PC32: @@ -689,35 +692,35 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_MICROMIPS_26_S1: case R_MICROMIPS_PC26_S1: - checkInt(loc, val, 27, type); + checkInt(loc, val, 27, rel); writeShuffleValue<e>(loc, val, 26, 1); break; case R_MICROMIPS_PC7_S1: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); writeMicroRelocation16<e>(loc, val, 7, 1); break; case R_MICROMIPS_PC10_S1: - checkInt(loc, val, 11, type); + checkInt(loc, val, 11, rel); writeMicroRelocation16<e>(loc, val, 10, 1); break; case R_MICROMIPS_PC16_S1: - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); writeShuffleValue<e>(loc, val, 16, 1); break; case R_MICROMIPS_PC18_S3: - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); writeShuffleValue<e>(loc, val, 18, 3); break; case R_MICROMIPS_PC19_S2: - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); writeShuffleValue<e>(loc, val, 19, 2); break; case R_MICROMIPS_PC21_S1: - checkInt(loc, val, 22, type); + checkInt(loc, val, 22, rel); writeShuffleValue<e>(loc, val, 21, 1); break; case R_MICROMIPS_PC23_S2: - checkInt(loc, val, 25, type); + checkInt(loc, val, 25, rel); writeShuffleValue<e>(loc, val, 23, 2); break; default: @@ -731,7 +734,7 @@ template <class ELFT> bool MIPS<ELFT>::usesOnlyLowPageBits(RelType type) const { } // Return true if the symbol is a PIC function. -template <class ELFT> bool isMipsPIC(const Defined *sym) { +template <class ELFT> bool elf::isMipsPIC(const Defined *sym) { if (!sym->isFunc()) return false; @@ -749,20 +752,17 @@ template <class ELFT> bool isMipsPIC(const Defined *sym) { return file->getObj().getHeader()->e_flags & EF_MIPS_PIC; } -template <class ELFT> TargetInfo *getMipsTargetInfo() { +template <class ELFT> TargetInfo *elf::getMipsTargetInfo() { static MIPS<ELFT> target; return ⌖ } -template TargetInfo *getMipsTargetInfo<ELF32LE>(); -template TargetInfo *getMipsTargetInfo<ELF32BE>(); -template TargetInfo *getMipsTargetInfo<ELF64LE>(); -template TargetInfo *getMipsTargetInfo<ELF64BE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF32LE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF32BE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF64LE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF64BE>(); -template bool isMipsPIC<ELF32LE>(const Defined *); -template bool isMipsPIC<ELF32BE>(const Defined *); -template bool isMipsPIC<ELF64LE>(const Defined *); -template bool isMipsPIC<ELF64BE>(const Defined *); - -} // namespace elf -} // namespace lld +template bool elf::isMipsPIC<ELF32LE>(const Defined *); +template bool elf::isMipsPIC<ELF32BE>(const Defined *); +template bool elf::isMipsPIC<ELF64LE>(const Defined *); +template bool elf::isMipsPIC<ELF64BE>(const Defined *); diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp index 923458afae0d..85329c3bef53 100644 --- a/lld/ELF/Arch/MipsArchTree.cpp +++ b/lld/ELF/Arch/MipsArchTree.cpp @@ -23,8 +23,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { struct ArchTreeEdge { @@ -294,7 +294,7 @@ static uint32_t getArchFlags(ArrayRef<FileFlags> files) { return ret; } -template <class ELFT> uint32_t calcMipsEFlags() { +template <class ELFT> uint32_t elf::calcMipsEFlags() { std::vector<FileFlags> v; for (InputFile *f : objectFiles) v.push_back({f, cast<ObjFile<ELFT>>(f)->getObj().getHeader()->e_flags}); @@ -350,7 +350,8 @@ static StringRef getMipsFpAbiName(uint8_t fpAbi) { } } -uint8_t getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, StringRef fileName) { +uint8_t elf::getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, + StringRef fileName) { if (compareMipsFpAbi(newFlag, oldFlag) >= 0) return newFlag; if (compareMipsFpAbi(oldFlag, newFlag) < 0) @@ -366,7 +367,7 @@ template <class ELFT> static bool isN32Abi(const InputFile *f) { return false; } -bool isMipsN32Abi(const InputFile *f) { +bool elf::isMipsN32Abi(const InputFile *f) { switch (config->ekind) { case ELF32LEKind: return isN32Abi<ELF32LE>(f); @@ -381,17 +382,14 @@ bool isMipsN32Abi(const InputFile *f) { } } -bool isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; } +bool elf::isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; } -bool isMipsR6() { +bool elf::isMipsR6() { uint32_t arch = config->eflags & EF_MIPS_ARCH; return arch == EF_MIPS_ARCH_32R6 || arch == EF_MIPS_ARCH_64R6; } -template uint32_t calcMipsEFlags<ELF32LE>(); -template uint32_t calcMipsEFlags<ELF32BE>(); -template uint32_t calcMipsEFlags<ELF64LE>(); -template uint32_t calcMipsEFlags<ELF64BE>(); - -} // namespace elf -} // namespace lld +template uint32_t elf::calcMipsEFlags<ELF32LE>(); +template uint32_t elf::calcMipsEFlags<ELF32BE>(); +template uint32_t elf::calcMipsEFlags<ELF64LE>(); +template uint32_t elf::calcMipsEFlags<ELF64BE>(); diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 3c0b0c290b58..a004cf74ddd8 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -17,9 +17,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class PPC final : public TargetInfo { @@ -44,14 +43,19 @@ public: int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; int getTlsGdRelaxSkip(RelType type) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -66,7 +70,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) { write32(config->isLE ? loc : loc - 2, insn); } -void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { +void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { + // Create canonical PLT entries for non-PIE code. Compilers don't generate + // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE. + uint32_t glink = in.plt->getVA(); // VA of .glink + if (!config->isPic) { + for (const Symbol *sym : cast<PPC32GlinkSection>(in.plt)->canonical_plts) { + writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); + buf += 16; + glink += 16; + } + } + // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an // absolute address from a specific .plt slot (usually called .got.plt on // other targets) and jumps there. @@ -85,15 +100,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // computes the PLT index (by computing the distance from the landing b to // itself) and calls _dl_runtime_resolve() (in glibc). uint32_t got = in.got->getVA(); - uint32_t glink = in.plt->getVA(); // VA of .glink const uint8_t *end = buf + 64; if (config->isPic) { - uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12; + uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12; uint32_t gotBcl = got + 4 - (glink + afterBcl); write32(buf + 0, 0x3d6b0000 | ha(afterBcl)); // addis r11,r11,1f-glink@ha write32(buf + 4, 0x7c0802a6); // mflr r0 write32(buf + 8, 0x429f0005); // bcl 20,30,.+4 - write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l + write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l write32(buf + 16, 0x7d8802a6); // mflr r12 write32(buf + 20, 0x7c0803a6); // mtlr r0 write32(buf + 24, 0x7d6c5850); // sub r11,r11,r12 @@ -113,16 +127,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { buf += 56; } else { write32(buf + 0, 0x3d800000 | ha(got + 4)); // lis r12,GOT+4@ha - write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-Glink@ha + write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-glink@ha if (ha(got + 4) == ha(got + 8)) write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12) else write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12) - write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-Glink@l + write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-glink@l write32(buf + 16, 0x7c0903a6); // mtctr r0 write32(buf + 20, 0x7c0b5a14); // add r0,r11,r11 if (ha(got + 4) == ha(got + 8)) - write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12) + write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12) else write32(buf + 24, 0x818c0000 | 4); // lwz r12,4(r12) write32(buf + 28, 0x7d605a14); // add r11,r0,r11 @@ -136,6 +150,7 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { } PPC::PPC() { + copyRel = R_PPC_COPY; gotRel = R_PPC_GLOB_DAT; noneRel = R_PPC_NONE; pltRel = R_PPC_JMP_SLOT; @@ -145,7 +160,7 @@ PPC::PPC() { gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 3; gotPltHeaderEntriesNum = 0; - pltHeaderSize = 64; // size of PLTresolve in .glink + pltHeaderSize = 0; pltEntrySize = 4; ipltEntrySize = 16; @@ -177,25 +192,25 @@ void PPC::writeGotHeader(uint8_t *buf) const { void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { // Address of the symbol resolver stub in .glink . - write32(buf, in.plt->getVA() + 4 * s.pltIndex); + write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex); } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { - if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) + uint64_t branchAddr, const Symbol &s, int64_t a) const { + if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) return true; if (s.isUndefWeak()) return false; - return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA())); + return !PPC::inBranchRange(type, branchAddr, s.getVA(a)); } uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; } bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { uint64_t offset = dst - src; - if (type == R_PPC_REL24 || type == R_PPC_PLTREL24) + if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } @@ -218,13 +233,13 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s, return R_DTPREL; case R_PPC_REL14: case R_PPC_REL32: - case R_PPC_LOCAL24PC: case R_PPC_REL16_LO: case R_PPC_REL16_HI: case R_PPC_REL16_HA: return R_PC; case R_PPC_GOT16: return R_GOT_OFF; + case R_PPC_LOCAL24PC: case R_PPC_REL24: return R_PLT_PC; case R_PPC_PLTREL24: @@ -277,12 +292,12 @@ static std::pair<RelType, uint64_t> fromDTPREL(RelType type, uint64_t val) { } } -void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { RelType newType; - std::tie(newType, val) = fromDTPREL(type, val); + std::tie(newType, val) = fromDTPREL(rel.type, val); switch (newType) { case R_PPC_ADDR16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC_GOT16: @@ -290,7 +305,7 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC_GOT_TLSLD16: case R_PPC_GOT_TPREL16: case R_PPC_TPREL16: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC_ADDR16_HA: @@ -326,8 +341,8 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_PPC_REL14: { uint32_t mask = 0x0000FFFC; - checkInt(loc, val, 16, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 16, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } @@ -335,8 +350,8 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC_LOCAL24PC: case R_PPC_PLTREL24: { uint32_t mask = 0x03FFFFFC; - checkInt(loc, val, 26, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 26, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } @@ -368,13 +383,14 @@ int PPC::getTlsGdRelaxSkip(RelType type) const { return 1; } -void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TLSGD16: { // addi rT, rA, x@got@tlsgd --> lwz rT, x@got@tprel(rA) uint32_t insn = readFromHalf16(loc); writeFromHalf16(loc, 0x80000000 | (insn & 0x03ff0000)); - relocateOne(loc, R_PPC_GOT_TPREL16, val); + relocateNoSym(loc, R_PPC_GOT_TPREL16, val); break; } case R_PPC_TLSGD: @@ -386,8 +402,9 @@ void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { } } -void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TLSGD16: // addi r3, r31, x@got@tlsgd --> addis r3, r2, x@tprel@ha writeFromHalf16(loc, 0x3c620000 | ha(val)); @@ -401,8 +418,9 @@ void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TLSLD16: // addi r3, rA, x@got@tlsgd --> addis r3, r2, 0 writeFromHalf16(loc, 0x3c620000); @@ -417,15 +435,16 @@ void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC_DTPREL16_HA: case R_PPC_DTPREL16_HI: case R_PPC_DTPREL16_LO: - relocateOne(loc, type, val); + relocate(loc, rel, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } -void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TPREL16: { // lwz rT, x@got@tprel(rA) --> addis rT, r2, x@tprel@ha uint32_t rt = readFromHalf16(loc) & 0x03e00000; @@ -448,10 +467,7 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -TargetInfo *getPPCTargetInfo() { +TargetInfo *elf::getPPCTargetInfo() { static PPC target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index da77a4ddaddf..71c568088fb9 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -6,20 +6,21 @@ // //===----------------------------------------------------------------------===// +#include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; static uint64_t ppc64TocOffset = 0x8000; static uint64_t dynamicThreadPointerOffset = 0x8000; @@ -61,7 +62,7 @@ enum DFormOpcd { ADDI = 14 }; -uint64_t getPPC64TocBase() { +uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a // .got when we see a relocation that uses it, so for us the start is always @@ -75,7 +76,7 @@ uint64_t getPPC64TocBase() { return tocVA + ppc64TocOffset; } -unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { +unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { // The offset is encoded into the 3 most significant bits of the st_other // field, with some special values described in section 3.4.1 of the ABI: // 0 --> Zero offset between the GEP and LEP, and the function does NOT use @@ -100,11 +101,89 @@ unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { return 0; } -bool isPPC64SmallCodeModelTocReloc(RelType type) { +bool elf::isPPC64SmallCodeModelTocReloc(RelType type) { // The only small code model relocations that access the .toc section. return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } +static bool addOptional(StringRef name, uint64_t value, + std::vector<Defined *> &defined) { + Symbol *sym = symtab->find(name); + if (!sym || sym->isDefined()) + return false; + sym->resolve(Defined{/*file=*/nullptr, saver.save(name), STB_GLOBAL, + STV_HIDDEN, STT_FUNC, value, + /*size=*/0, /*section=*/nullptr}); + defined.push_back(cast<Defined>(sym)); + return true; +} + +// If from is 14, write ${prefix}14: firstInsn; ${prefix}15: +// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)*0x200008; $tail +// The labels are defined only if they exist in the symbol table. +static void writeSequence(MutableArrayRef<uint32_t> buf, const char *prefix, + int from, uint32_t firstInsn, + ArrayRef<uint32_t> tail) { + std::vector<Defined *> defined; + char name[16]; + int first; + uint32_t *ptr = buf.data(); + for (int r = from; r < 32; ++r) { + format("%s%d", prefix, r).snprint(name, sizeof(name)); + if (addOptional(name, 4 * (r - from), defined) && defined.size() == 1) + first = r - from; + write32(ptr++, firstInsn + 0x200008 * (r - from)); + } + for (uint32_t insn : tail) + write32(ptr++, insn); + assert(ptr == &*buf.end()); + + if (defined.empty()) + return; + // The full section content has the extent of [begin, end). We drop unused + // instructions and write [first,end). + auto *sec = make<InputSection>( + nullptr, SHF_ALLOC, SHT_PROGBITS, 4, + makeArrayRef(reinterpret_cast<uint8_t *>(buf.data() + first), + 4 * (buf.size() - first)), + ".text"); + inputSections.push_back(sec); + for (Defined *sym : defined) { + sym->section = sec; + sym->value -= 4 * first; + } +} + +// Implements some save and restore functions as described by ELF V2 ABI to be +// compatible with GCC. With GCC -Os, when the number of call-saved registers +// exceeds a certain threshold, GCC generates _savegpr0_* _restgpr0_* calls and +// expects the linker to define them. See +// https://sourceware.org/pipermail/binutils/2002-February/017444.html and +// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is +// weird because libgcc.a would be the natural place. The linker generation +// approach has the advantage that the linker can generate multiple copies to +// avoid long branch thunks. However, we don't consider the advantage +// significant enough to complicate our trunk implementation, so we take the +// simple approach and synthesize .text sections providing the implementation. +void elf::addPPC64SaveRestore() { + static uint32_t savegpr0[20], restgpr0[21], savegpr1[19], restgpr1[19]; + constexpr uint32_t blr = 0x4e800020, mtlr_0 = 0x7c0803a6; + + // _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ... + // Tail: ld 0, 16(1); mtlr 0; blr + writeSequence(restgpr0, "_restgpr0_", 14, 0xe9c1ff70, + {0xe8010010, mtlr_0, blr}); + // _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ... + // Tail: blr + writeSequence(restgpr1, "_restgpr1_", 14, 0xe9ccff70, {blr}); + // _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ... + // Tail: std 0, 16(1); blr + writeSequence(savegpr0, "_savegpr0_", 14, 0xf9c1ff70, {0xf8010010, blr}); + // _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ... + // Tail: blr + writeSequence(savegpr1, "_savegpr1_", 14, 0xf9ccff70, {blr}); +} + // Find the R_PPC64_ADDR64 in .rela.toc with matching offset. template <typename ELFT> static std::pair<Defined *, int64_t> @@ -137,7 +216,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { // When accessing a symbol defined in another translation unit, compilers // reserve a .toc entry, allocate a local label and generate toc-indirect -// instuctions: +// instructions: // // addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA // ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry @@ -155,8 +234,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { // ld/lwa 3, 0(3) # load the value from the address // // Returns true if the relaxation is performed. -bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, - uint8_t *bufLoc) { +bool elf::tryRelaxPPC64TocIndirection(const Relocation &rel, uint8_t *bufLoc) { assert(config->tocOptimize); if (rel.addend < 0) return false; @@ -186,8 +264,8 @@ bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, if (!isInt<32>(tocRelative)) return false; - // Add PPC64TocOffset that will be subtracted by relocateOne(). - target->relaxGot(bufLoc, type, tocRelative + ppc64TocOffset); + // Add PPC64TocOffset that will be subtracted by PPC64::relocate(). + target->relaxGot(bufLoc, rel, tocRelative + ppc64TocOffset); return true; } @@ -205,7 +283,8 @@ public: uint64_t pltEntryAddr) const override; void writeIplt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; void writeGotHeader(uint8_t *buf) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, @@ -214,11 +293,16 @@ public: bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; @@ -292,7 +376,22 @@ static uint32_t readFromHalf16(const uint8_t *loc) { return read32(config->isLE ? loc : loc - 2); } +// The prefixed instruction is always a 4 byte prefix followed by a 4 byte +// instruction. Therefore, the prefix is always in lower memory than the +// instruction (regardless of endianness). +// As a result, we need to shift the pieces around on little endian machines. +static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) { + insn = config->isLE ? insn << 32 | insn >> 32 : insn; + write64(loc, insn); +} + +static uint64_t readPrefixedInstruction(const uint8_t *loc) { + uint64_t fullInstr = read64(loc); + return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr; +} + PPC64::PPC64() { + copyRel = R_PPC64_COPY; gotRel = R_PPC64_GLOB_DAT; noneRel = R_PPC64_NONE; pltRel = R_PPC64_JMP_SLOT; @@ -364,11 +463,11 @@ uint32_t PPC64::calcEFlags() const { return 2; } -void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_PPC64_TOC16_HA: // Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop". - relocateOne(loc, type, val); + relocate(loc, rel, val); break; case R_PPC64_TOC16_LO_DS: { // Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or @@ -377,7 +476,7 @@ void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { if (getPrimaryOpCode(insn) != LD) error("expected a 'ld' for got-indirect to toc-relative relaxing"); writeFromHalf16(loc, (insn & 0x03ffffff) | 0x38000000); - relocateOne(loc, R_PPC64_TOC16_LO, val); + relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } default: @@ -385,7 +484,8 @@ void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { } } -void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: // Instruction Relocation Symbol @@ -401,14 +501,14 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, x@tprel@l - switch (type) { + switch (rel.type) { case R_PPC64_GOT_TLSGD16_HA: writeFromHalf16(loc, 0x60000000); // nop break; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13 - relocateOne(loc, R_PPC64_TPREL16_HA, val); + relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; case R_PPC64_TLSGD: write32(loc, 0x60000000); // nop @@ -416,15 +516,16 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // Since we are relocating a half16 type relocation and Loc + 4 points to // the start of an instruction we need to advance the buffer by an extra // 2 bytes on BE. - relocateOne(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), - R_PPC64_TPREL16_LO, val); + relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), + R_PPC64_TPREL16_LO, val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } -void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement. // The local dynamic code sequence for a global `x` will look like: // Instruction Relocation Symbol @@ -440,7 +541,7 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, 4096 - switch (type) { + switch (rel.type) { case R_PPC64_GOT_TLSLD16_HA: writeFromHalf16(loc, 0x60000000); // nop break; @@ -457,14 +558,14 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC64_DTPREL16_DS: case R_PPC64_DTPREL16_LO: case R_PPC64_DTPREL16_LO_DS: - relocateOne(loc, type, val); + relocate(loc, rel, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } -unsigned getPPCDFormOp(unsigned secondaryOp) { +unsigned elf::getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: return LBZ; @@ -489,7 +590,8 @@ unsigned getPPCDFormOp(unsigned secondaryOp) { } } -void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // The initial exec code sequence for a global `x` will look like: // Instruction Relocation Symbol // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x @@ -510,7 +612,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { // indexed load or store instructions. unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0; - switch (type) { + switch (rel.type) { case R_PPC64_GOT_TPREL16_HA: write32(loc - offset, 0x60000000); // nop break; @@ -518,7 +620,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC64_GOT_TPREL16_DS: { uint32_t regNo = read32(loc - offset) & 0x03E00000; // bits 6-10 write32(loc - offset, 0x3C0D0000 | regNo); // addis RegNo, r13 - relocateOne(loc, R_PPC64_TPREL16_HA, val); + relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; } case R_PPC64_TLS: { @@ -530,7 +632,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { if (dFormOp == 0) error("unrecognized instruction for IE to LE R_PPC64_TLS"); write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF))); - relocateOne(loc + offset, R_PPC64_TPREL16_LO, val); + relocateNoSym(loc + offset, R_PPC64_TPREL16_LO, val); break; } default: @@ -569,6 +671,8 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: return R_GOTREL; + case R_PPC64_GOT_PCREL34: + return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: return config->tocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; @@ -577,11 +681,14 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_REL14: case R_PPC64_REL24: return R_PPC64_CALL_PLT; + case R_PPC64_REL24_NOTOC: + return R_PLT_PC; case R_PPC64_REL16_LO: case R_PPC64_REL16_HA: case R_PPC64_REL16_HI: case R_PPC64_REL32: case R_PPC64_REL64: + case R_PPC64_PCREL34: return R_PC; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_HA: @@ -769,11 +876,8 @@ static bool isTocOptType(RelType type) { } } -void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - // We need to save the original relocation type to use in diagnostics, and - // use the original type to determine if we should toc-optimize the - // instructions being relocated. - RelType originalType = type; +void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + RelType type = rel.type; bool shouldTocOptimize = isTocOptType(type); // For dynamic thread pointer relative, toc-relative, and got-indirect // relocations, proceed in terms of the corresponding ADDR16 relocation type. @@ -781,27 +885,27 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC64_ADDR14: { - checkAlignment(loc, val, 4, type); + checkAlignment(loc, val, 4, rel); // Preserve the AA/LK bits in the branch instruction uint8_t aalk = loc[3]; write16(loc + 2, (aalk & 3) | (val & 0xfffc)); break; } case R_PPC64_ADDR16: - checkIntUInt(loc, val, 16, originalType); + checkIntUInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC64_ADDR32: - checkIntUInt(loc, val, 32, originalType); + checkIntUInt(loc, val, 32, rel); write32(loc, val); break; case R_PPC64_ADDR16_DS: case R_PPC64_TPREL16_DS: { - checkInt(loc, val, 16, originalType); + checkInt(loc, val, 16, rel); // DQ-form instructions use bits 28-31 as part of the instruction encoding // DS-form instructions only use bits 30-31. uint16_t mask = isDQFormInstruction(readFromHalf16(loc)) ? 0xf : 0x3; - checkAlignment(loc, lo(val), mask + 1, originalType); + checkAlignment(loc, lo(val), mask + 1, rel); write16(loc, (read16(loc) & mask) | lo(val)); } break; case R_PPC64_ADDR16_HA: @@ -856,7 +960,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // DS-form instructions only use bits 30-31. uint32_t insn = readFromHalf16(loc); uint16_t mask = isDQFormInstruction(insn) ? 0xf : 0x3; - checkAlignment(loc, lo(val), mask + 1, originalType); + checkAlignment(loc, lo(val), mask + 1, rel); if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) { // When the high-adjusted part of a toc relocation evaluates to 0, it is // changed into a nop. The lo part then needs to be updated to use the toc @@ -872,11 +976,11 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } break; case R_PPC64_TPREL16: - checkInt(loc, val, 16, originalType); + checkInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC64_REL32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32(loc, val); break; case R_PPC64_ADDR64: @@ -886,21 +990,44 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_PPC64_REL14: { uint32_t mask = 0x0000FFFC; - checkInt(loc, val, 16, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 16, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } - case R_PPC64_REL24: { + case R_PPC64_REL24: + case R_PPC64_REL24_NOTOC: { uint32_t mask = 0x03FFFFFC; - checkInt(loc, val, 26, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 26, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } case R_PPC64_DTPREL64: write64(loc, val - dynamicThreadPointerOffset); break; + case R_PPC64_PCREL34: { + const uint64_t si0Mask = 0x00000003ffff0000; + const uint64_t si1Mask = 0x000000000000ffff; + const uint64_t fullMask = 0x0003ffff0000ffff; + checkInt(loc, val, 34, rel); + + uint64_t instr = readPrefixedInstruction(loc) & ~fullMask; + writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) | + (val & si1Mask)); + break; + } + case R_PPC64_GOT_PCREL34: { + const uint64_t si0Mask = 0x00000003ffff0000; + const uint64_t si1Mask = 0x000000000000ffff; + const uint64_t fullMask = 0x0003ffff0000ffff; + checkInt(loc, val, 34, rel); + + uint64_t instr = readPrefixedInstruction(loc) & ~fullMask; + writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) | + (val & si1Mask)); + break; + } default: llvm_unreachable("unknown relocation"); } @@ -908,13 +1035,30 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, int64_t a) const { - if (type != R_PPC64_REL14 && type != R_PPC64_REL24) + if (type != R_PPC64_REL14 && type != R_PPC64_REL24 && + type != R_PPC64_REL24_NOTOC) return false; + // FIXME: Remove the fatal error once the call protocol is implemented. + if (type == R_PPC64_REL24_NOTOC && s.isInPlt()) + fatal("unimplemented feature: external function call with the reltype" + " R_PPC64_REL24_NOTOC"); + // If a function is in the Plt it needs to be called with a call-stub. if (s.isInPlt()) return true; + // FIXME: Remove the fatal error once the call protocol is implemented. + if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) + fatal("unimplemented feature: local function call with the reltype" + " R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup"); + + // This check looks at the st_other bits of the callee with relocation + // R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee + // clobbers the TOC and we need an R2 save stub. + if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1) + return true; + // If a symbol is a weak undefined and we are compiling an executable // it doesn't need a range-extending thunk since it can't be called. if (s.isUndefWeak() && !config->shared) @@ -940,7 +1084,7 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { int64_t offset = dst - src; if (type == R_PPC64_REL14) return isInt<16>(offset); - if (type == R_PPC64_REL24) + if (type == R_PPC64_REL24 || type == R_PPC64_REL24_NOTOC) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } @@ -971,12 +1115,13 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, // thread pointer. // Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is // used as the relaxation hint for both steps 2 and 3. -void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC64_GOT_TLSGD16_HA: // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to // addis rT, r2, sym@got@tprel@ha. - relocateOne(loc, R_PPC64_GOT_TPREL16_HA, val); + relocateNoSym(loc, R_PPC64_GOT_TPREL16_HA, val); return; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: { @@ -984,7 +1129,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // ld r3, sym@got@tprel@l(rA) uint32_t ra = (readFromHalf16(loc) & (0x1f << 16)); writeFromHalf16(loc, 0xe8600000 | ra); - relocateOne(loc, R_PPC64_GOT_TPREL16_LO_DS, val); + relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val); return; } case R_PPC64_TLSGD: @@ -1103,10 +1248,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, return true; } -TargetInfo *getPPC64TargetInfo() { +TargetInfo *elf::getPPC64TargetInfo() { static PPC64 target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 42db8e08162d..b340fd00deee 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -15,9 +15,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { @@ -33,7 +32,8 @@ public: RelType getDynRel(RelType type) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // end anonymous namespace @@ -76,6 +76,7 @@ RISCV::RISCV() { noneRel = R_RISCV_NONE; pltRel = R_RISCV_JUMP_SLOT; relativeRel = R_RISCV_RELATIVE; + iRelativeRel = R_RISCV_IRELATIVE; if (config->is64) { symbolicRel = R_RISCV_64; tlsModuleIndexRel = R_RISCV_TLS_DTPMOD64; @@ -236,9 +237,15 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_LO12_S: return R_TLS; case R_RISCV_RELAX: - case R_RISCV_ALIGN: case R_RISCV_TPREL_ADD: return R_NONE; + case R_RISCV_ALIGN: + // Not just a hint; always padded to the worst-case number of NOPs, so may + // not currently be aligned, and without linker relaxation support we can't + // delete NOPs to realign. + errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " + "unimplemented linker relaxation; recompile with -mno-relax"); + return R_NONE; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -251,11 +258,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { return (v & ((1ULL << (begin + 1)) - 1)) >> end; } -void RISCV::relocateOne(uint8_t *loc, const RelType type, - const uint64_t val) const { +void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { const unsigned bits = config->wordsize * 8; - switch (type) { + switch (rel.type) { case R_RISCV_32: write32le(loc, val); return; @@ -264,8 +270,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, return; case R_RISCV_RVC_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 8, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 8, rel); + checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE383; uint16_t imm8 = extractBits(val, 8, 8) << 12; uint16_t imm4_3 = extractBits(val, 4, 3) << 10; @@ -279,8 +285,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } case R_RISCV_RVC_JUMP: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 11, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 11, rel); + checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE003; uint16_t imm11 = extractBits(val, 11, 11) << 12; uint16_t imm4 = extractBits(val, 4, 4) << 11; @@ -298,7 +304,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_RVC_LUI: { int64_t imm = SignExtend64(val + 0x800, bits) >> 12; - checkInt(loc, imm, 6, type); + checkInt(loc, imm, 6, rel); if (imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0` write16le(loc, (read16le(loc) & 0x0F83) | 0x4000); } else { @@ -310,8 +316,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } case R_RISCV_JAL: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 20, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 20, rel); + checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0xFFF; uint32_t imm20 = extractBits(val, 20, 20) << 31; @@ -325,8 +331,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } case R_RISCV_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 12, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 12, rel); + checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0x1FFF07F; uint32_t imm12 = extractBits(val, 12, 12) << 31; @@ -343,10 +349,10 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_CALL: case R_RISCV_CALL_PLT: { int64_t hi = SignExtend64(val + 0x800, bits) >> 12; - checkInt(loc, hi, 20, type); + checkInt(loc, hi, 20, rel); if (isInt<20>(hi)) { - relocateOne(loc, R_RISCV_PCREL_HI20, val); - relocateOne(loc + 4, R_RISCV_PCREL_LO12_I, val); + relocateNoSym(loc, R_RISCV_PCREL_HI20, val); + relocateNoSym(loc + 4, R_RISCV_PCREL_LO12_I, val); } return; } @@ -358,7 +364,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_TPREL_HI20: case R_RISCV_HI20: { uint64_t hi = val + 0x800; - checkInt(loc, SignExtend64(hi, bits) >> 12, 20, type); + checkInt(loc, SignExtend64(hi, bits) >> 12, 20, rel); write32le(loc, (read32le(loc) & 0xFFF) | (hi & 0xFFFFF000)); return; } @@ -431,7 +437,6 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, write64le(loc, val - dtpOffset); break; - case R_RISCV_ALIGN: case R_RISCV_RELAX: return; // Ignored (for now) @@ -440,10 +445,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } } -TargetInfo *getRISCVTargetInfo() { +TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp index 08ef52099de9..f137c21fc898 100644 --- a/lld/ELF/Arch/SPARCV9.cpp +++ b/lld/ELF/Arch/SPARCV9.cpp @@ -16,9 +16,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class SPARCV9 final : public TargetInfo { @@ -28,7 +27,8 @@ public: const uint8_t *loc) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -54,6 +54,14 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s, case R_SPARC_UA32: case R_SPARC_64: case R_SPARC_UA64: + case R_SPARC_H44: + case R_SPARC_M44: + case R_SPARC_L44: + case R_SPARC_HH22: + case R_SPARC_HM10: + case R_SPARC_LM22: + case R_SPARC_HI22: + case R_SPARC_LO10: return R_ABS; case R_SPARC_PC10: case R_SPARC_PC22: @@ -68,6 +76,9 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s, return R_PLT_PC; case R_SPARC_NONE: return R_NONE; + case R_SPARC_TLS_LE_HIX22: + case R_SPARC_TLS_LE_LOX10: + return R_TLS; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -75,38 +86,45 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s, } } -void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void SPARCV9::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_SPARC_32: case R_SPARC_UA32: // V-word32 - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); write32be(loc, val); break; case R_SPARC_DISP32: // V-disp32 - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32be(loc, val); break; case R_SPARC_WDISP30: case R_SPARC_WPLT30: // V-disp30 - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32be(loc, (read32be(loc) & ~0x3fffffff) | ((val >> 2) & 0x3fffffff)); break; case R_SPARC_22: // V-imm22 - checkUInt(loc, val, 22, type); + checkUInt(loc, val, 22, rel); write32be(loc, (read32be(loc) & ~0x003fffff) | (val & 0x003fffff)); break; case R_SPARC_GOT22: case R_SPARC_PC22: + case R_SPARC_LM22: // T-imm22 write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff)); break; + case R_SPARC_HI22: + // V-imm22 + checkUInt(loc, val >> 10, 22, rel); + write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff)); + break; case R_SPARC_WDISP19: // V-disp19 - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write32be(loc, (read32be(loc) & ~0x0007ffff) | ((val >> 2) & 0x0007ffff)); break; case R_SPARC_GOT10: @@ -114,11 +132,45 @@ void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // T-simm10 write32be(loc, (read32be(loc) & ~0x000003ff) | (val & 0x000003ff)); break; + case R_SPARC_LO10: + // T-simm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff)); + break; case R_SPARC_64: case R_SPARC_UA64: // V-xword64 write64be(loc, val); break; + case R_SPARC_HH22: + // V-imm22 + checkUInt(loc, val >> 42, 22, rel); + write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 42) & 0x003fffff)); + break; + case R_SPARC_HM10: + // T-simm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | ((val >> 32) & 0x000003ff)); + break; + case R_SPARC_H44: + // V-imm22 + checkUInt(loc, val >> 22, 22, rel); + write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 22) & 0x003fffff)); + break; + case R_SPARC_M44: + // T-imm10 + write32be(loc, (read32be(loc) & ~0x000003ff) | ((val >> 12) & 0x000003ff)); + break; + case R_SPARC_L44: + // T-imm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x00000fff)); + break; + case R_SPARC_TLS_LE_HIX22: + // T-imm22 + write32be(loc, (read32be(loc) & ~0x003fffff) | ((~val >> 10) & 0x003fffff)); + break; + case R_SPARC_TLS_LE_LOX10: + // T-simm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff) | 0x1C00); + break; default: llvm_unreachable("unknown relocation"); } @@ -139,14 +191,11 @@ void SPARCV9::writePlt(uint8_t *buf, const Symbol & /*sym*/, memcpy(buf, pltData, sizeof(pltData)); uint64_t off = pltEntryAddr - in.plt->getVA(); - relocateOne(buf, R_SPARC_22, off); - relocateOne(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize)); + relocateNoSym(buf, R_SPARC_22, off); + relocateNoSym(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize)); } -TargetInfo *getSPARCV9TargetInfo() { +TargetInfo *elf::getSPARCV9TargetInfo() { static SPARCV9 target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp index b4daedc0f5dc..8c8824d53cce 100644 --- a/lld/ELF/Arch/X86.cpp +++ b/lld/ELF/Arch/X86.cpp @@ -16,9 +16,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class X86 : public TargetInfo { @@ -35,14 +34,19 @@ public: void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -262,21 +266,21 @@ int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { } } -void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_386_8: // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are // being used for some 16-bit programs such as boot loaders, so // we want to support them. - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_386_PC8: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); *loc = val; break; case R_386_16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_386_PC16: @@ -290,7 +294,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // current location subtracted from it. // We just check that Val fits in 17 bits. This misses some cases, but // should have no false positives. - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); write16le(loc, val); break; case R_386_32: @@ -312,7 +316,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_386_TLS_LE_32: case R_386_TLS_TPOFF: case R_386_TLS_TPOFF32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32le(loc, val); break; default: @@ -320,7 +324,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const { // Convert // leal x@tlsgd(, %ebx, 1), // call __tls_get_addr@plt @@ -335,7 +339,7 @@ void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc + 5, val); } -void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { +void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const { // Convert // leal x@tlsgd(, %ebx, 1), // call __tls_get_addr@plt @@ -352,14 +356,15 @@ void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // In some conditions, relocations can be optimized to avoid using GOT. // This function does that for Initial Exec to Local Exec case. -void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // Ulrich's document section 6.2 says that @gotntpoff can // be used with MOVL or ADDL instructions. // @indntpoff is similar to @gotntpoff, but for use in // position dependent code. uint8_t reg = (loc[-1] >> 3) & 7; - if (type == R_386_TLS_IE) { + if (rel.type == R_386_TLS_IE) { if (loc[-1] == 0xa1) { // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" // This case is different from the generic case below because @@ -375,7 +380,7 @@ void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { loc[-1] = 0xc0 | reg; } } else { - assert(type == R_386_TLS_GOTIE); + assert(rel.type == R_386_TLS_GOTIE); if (loc[-2] == 0x8b) { // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" loc[-2] = 0xc7; @@ -389,8 +394,9 @@ void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val); } -void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_386_TLS_LDO_32) { +void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_386_TLS_LDO_32) { write32le(loc, val); return; } @@ -608,7 +614,7 @@ void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, write32le(buf + 22, -off - 26); } -TargetInfo *getX86TargetInfo() { +TargetInfo *elf::getX86TargetInfo() { if (config->zRetpolineplt) { if (config->isPic) { static RetpolinePic t; @@ -626,6 +632,3 @@ TargetInfo *getX86TargetInfo() { static X86 t; return &t; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 74b72eb91293..24711ec210a4 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -18,9 +19,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class X86_64 : public TargetInfo { @@ -35,20 +35,44 @@ public: void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; + bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const override; }; } // namespace +// This is vector of NOP instructions of sizes from 1 to 8 bytes. The +// appropriately sized instructions are used to fill the gaps between sections +// which are executed during fall through. +static const std::vector<std::vector<uint8_t>> nopInstructions = { + {0x90}, + {0x66, 0x90}, + {0x0f, 0x1f, 0x00}, + {0x0f, 0x1f, 0x40, 0x00}, + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}}; + X86_64::X86_64() { copyRel = R_X86_64_COPY; gotRel = R_X86_64_GLOB_DAT; @@ -65,6 +89,7 @@ X86_64::X86_64() { pltEntrySize = 16; ipltEntrySize = 16; trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 + nopInstrs = nopInstructions; // Align to the large page size (known as a superpage or huge page). // FreeBSD automatically promotes large, superpage-aligned allocations. @@ -73,6 +98,216 @@ X86_64::X86_64() { int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +// Opcodes for the different X86_64 jmp instructions. +enum JmpInsnOpcode : uint32_t { + J_JMP_32, + J_JNE_32, + J_JE_32, + J_JG_32, + J_JGE_32, + J_JB_32, + J_JBE_32, + J_JL_32, + J_JLE_32, + J_JA_32, + J_JAE_32, + J_UNKNOWN, +}; + +// Given the first (optional) and second byte of the insn's opcode, this +// returns the corresponding enum value. +static JmpInsnOpcode getJmpInsnType(const uint8_t *first, + const uint8_t *second) { + if (*second == 0xe9) + return J_JMP_32; + + if (first == nullptr) + return J_UNKNOWN; + + if (*first == 0x0f) { + switch (*second) { + case 0x84: + return J_JE_32; + case 0x85: + return J_JNE_32; + case 0x8f: + return J_JG_32; + case 0x8d: + return J_JGE_32; + case 0x82: + return J_JB_32; + case 0x86: + return J_JBE_32; + case 0x8c: + return J_JL_32; + case 0x8e: + return J_JLE_32; + case 0x87: + return J_JA_32; + case 0x83: + return J_JAE_32; + } + } + return J_UNKNOWN; +} + +// Return the relocation index for input section IS with a specific Offset. +// Returns the maximum size of the vector if no such relocation is found. +static unsigned getRelocationWithOffset(const InputSection &is, + uint64_t offset) { + unsigned size = is.relocations.size(); + for (unsigned i = size - 1; i + 1 > 0; --i) { + if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE) + return i; + } + return size; +} + +// Returns true if R corresponds to a relocation used for a jump instruction. +// TODO: Once special relocations for relaxable jump instructions are available, +// this should be modified to use those relocations. +static bool isRelocationForJmpInsn(Relocation &R) { + return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 || + R.type == R_X86_64_PC8; +} + +// Return true if Relocation R points to the first instruction in the +// next section. +// TODO: Delete this once psABI reserves a new relocation type for fall thru +// jumps. +static bool isFallThruRelocation(InputSection &is, InputFile *file, + InputSection *nextIS, Relocation &r) { + if (!isRelocationForJmpInsn(r)) + return false; + + uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset; + uint64_t targetOffset = InputSectionBase::getRelocTargetVA( + file, r.type, r.addend, addrLoc, *r.sym, r.expr); + + // If this jmp is a fall thru, the target offset is the beginning of the + // next section. + uint64_t nextSectionOffset = + nextIS->getOutputSection()->addr + nextIS->outSecOff; + return (addrLoc + 4 + targetOffset) == nextSectionOffset; +} + +// Return the jmp instruction opcode that is the inverse of the given +// opcode. For example, JE inverted is JNE. +static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) { + switch (opcode) { + case J_JE_32: + return J_JNE_32; + case J_JNE_32: + return J_JE_32; + case J_JG_32: + return J_JLE_32; + case J_JGE_32: + return J_JL_32; + case J_JB_32: + return J_JAE_32; + case J_JBE_32: + return J_JA_32; + case J_JL_32: + return J_JGE_32; + case J_JLE_32: + return J_JG_32; + case J_JA_32: + return J_JBE_32; + case J_JAE_32: + return J_JB_32; + default: + return J_UNKNOWN; + } +} + +// Deletes direct jump instruction in input sections that jumps to the +// following section as it is not required. If there are two consecutive jump +// instructions, it checks if they can be flipped and one can be deleted. +// For example: +// .section .text +// a.BB.foo: +// ... +// 10: jne aa.BB.foo +// 16: jmp bar +// aa.BB.foo: +// ... +// +// can be converted to: +// a.BB.foo: +// ... +// 10: je bar #jne flipped to je and the jmp is deleted. +// aa.BB.foo: +// ... +bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const { + const unsigned sizeOfDirectJmpInsn = 5; + + if (nextIS == nullptr) + return false; + + if (is.getSize() < sizeOfDirectJmpInsn) + return false; + + // If this jmp insn can be removed, it is the last insn and the + // relocation is 4 bytes before the end. + unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4); + if (rIndex == is.relocations.size()) + return false; + + Relocation &r = is.relocations[rIndex]; + + // Check if the relocation corresponds to a direct jmp. + const uint8_t *secContents = is.data().data(); + // If it is not a direct jmp instruction, there is nothing to do here. + if (*(secContents + r.offset - 1) != 0xe9) + return false; + + if (isFallThruRelocation(is, file, nextIS, r)) { + // This is a fall thru and can be deleted. + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; + } + + // Now, check if flip and delete is possible. + const unsigned sizeOfJmpCCInsn = 6; + // To flip, there must be atleast one JmpCC and one direct jmp. + if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn) + return 0; + + unsigned rbIndex = + getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4)); + if (rbIndex == is.relocations.size()) + return 0; + + Relocation &rB = is.relocations[rbIndex]; + + const uint8_t *jmpInsnB = secContents + rB.offset - 1; + JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB); + if (jmpOpcodeB == J_UNKNOWN) + return false; + + if (!isFallThruRelocation(is, file, nextIS, rB)) + return false; + + // jmpCC jumps to the fall thru block, the branch can be flipped and the + // jmp can be deleted. + JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB); + if (jInvert == J_UNKNOWN) + return false; + is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4}); + // Move R's values to rB except the offset. + rB = {r.expr, r.type, rB.offset, r.addend, r.sym}; + // Cancel R + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) @@ -177,8 +412,9 @@ RelType X86_64::getDynRel(RelType type) const { return R_X86_64_NONE; } -void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_TLSGD) { +void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_TLSGD) { // Convert // .byte 0x66 // leaq x@tlsgd(%rip), %rdi @@ -201,7 +437,7 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // lea x@tlsgd(%rip), %rax // call *(%rax) // to the following two instructions. - assert(type == R_X86_64_GOTPC32_TLSDESC); + assert(rel.type == R_X86_64_GOTPC32_TLSDESC); if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " "in callq *x@tlsdesc(%rip), %rax"); @@ -217,8 +453,9 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_TLSGD) { +void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_TLSGD) { // Convert // .byte 0x66 // leaq x@tlsgd(%rip), %rdi @@ -241,7 +478,7 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // lea x@tlsgd(%rip), %rax // call *(%rax) // to the following two instructions. - assert(type == R_X86_64_GOTPC32_TLSDESC); + assert(rel.type == R_X86_64_GOTPC32_TLSDESC); if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " "in callq *x@tlsdesc(%rip), %rax"); @@ -258,7 +495,8 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // R_X86_64_TPOFF32 so that it does not use GOT. -void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &, + uint64_t val) const { uint8_t *inst = loc - 3; uint8_t reg = loc[-1] >> 3; uint8_t *regSlot = loc - 1; @@ -299,12 +537,13 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val + 4); } -void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_DTPOFF64) { +void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_DTPOFF64) { write64le(loc, val); return; } - if (type == R_X86_64_DTPOFF32) { + if (rel.type == R_X86_64_DTPOFF32) { write32le(loc, val); return; } @@ -347,26 +586,114 @@ void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } -void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +// A JumpInstrMod at a specific offset indicates that the jump instruction +// opcode at that offset must be modified. This is specifically used to relax +// jump instructions with basic block sections. This function looks at the +// JumpMod and effects the change. +void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const { switch (type) { + case J_JMP_32: + if (size == 4) + *loc = 0xe9; + else + *loc = 0xeb; + break; + case J_JE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x84; + } else + *loc = 0x74; + break; + case J_JNE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x85; + } else + *loc = 0x75; + break; + case J_JG_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8f; + } else + *loc = 0x7f; + break; + case J_JGE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8d; + } else + *loc = 0x7d; + break; + case J_JB_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x82; + } else + *loc = 0x72; + break; + case J_JBE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x86; + } else + *loc = 0x76; + break; + case J_JL_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8c; + } else + *loc = 0x7c; + break; + case J_JLE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8e; + } else + *loc = 0x7e; + break; + case J_JA_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x87; + } else + *loc = 0x77; + break; + case J_JAE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x83; + } else + *loc = 0x73; + break; + case J_UNKNOWN: + llvm_unreachable("Unknown Jump Relocation"); + } +} + +void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_X86_64_8: - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_X86_64_PC8: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); *loc = val; break; case R_X86_64_16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_X86_64_PC16: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); write16le(loc, val); break; case R_X86_64_32: - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); write32le(loc, val); break; case R_X86_64_32S: @@ -384,7 +711,7 @@ void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_X86_64_TLSLD: case R_X86_64_DTPOFF32: case R_X86_64_SIZE32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32le(loc, val); break; case R_X86_64_64: @@ -495,7 +822,7 @@ static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, write32le(loc, val); } -void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { +void X86_64::relaxGot(uint8_t *loc, const Relocation &, uint64_t val) const { const uint8_t op = loc[-2]; const uint8_t modRm = loc[-1]; @@ -758,7 +1085,4 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); } - -} // namespace elf -} // namespace lld +TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); } |