diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /lld/ELF | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'lld/ELF')
52 files changed, 3734 insertions, 1510 deletions
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 398320af71e3..724d668449b7 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -44,9 +44,8 @@ using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; // Helper functions to identify instructions and conditions needed to trigger // the Cortex-A53-843419 erratum. @@ -371,7 +370,7 @@ static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off, return patchOff; } -class Patch843419Section : public SyntheticSection { +class elf::Patch843419Section : public SyntheticSection { public: Patch843419Section(InputSection *p, uint64_t off); @@ -421,7 +420,7 @@ void Patch843419Section::writeTo(uint8_t *buf) { // Return address is the next instruction after the one we have just copied. uint64_t s = getLDSTAddr() + 4; uint64_t p = patchSym->getVA() + 4; - target->relocateOne(buf + 4, R_AARCH64_JUMP26, s - p); + target->relocateNoSym(buf + 4, R_AARCH64_JUMP26, s - p); } void AArch64Err843419Patcher::init() { @@ -645,5 +644,3 @@ bool AArch64Err843419Patcher::createFixes() { } return addressesChanged; } -} // namespace elf -} // namespace lld diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 91cd2b5a2f5f..bd6f689b5844 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -33,9 +33,8 @@ using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; // The documented title for Erratum 657417 is: // "A 32bit branch instruction that spans two 4K regions can result in an @@ -71,7 +70,7 @@ namespace elf { // 00001002 2 - bytes padding // 00001004 __CortexA8657417_00000FFE: B.w func -class Patch657417Section : public SyntheticSection { +class elf::Patch657417Section : public SyntheticSection { public: Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM); @@ -189,7 +188,7 @@ void Patch657417Section::writeTo(uint8_t *buf) { // been altered to point to us! uint64_t s = getThumbDestAddr(getBranchAddr(), instr); uint64_t p = getVA(4); - target->relocateOne(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p); + target->relocateNoSym(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p); } // Given a branch instruction spanning two 4KiB regions, at offset off from the @@ -527,6 +526,3 @@ bool ARMErr657417Patcher::createFixes() { } return addressesChanged; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index df41a12f7454..637046e90bbd 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -17,14 +17,13 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; // Page(Expr) is the page address of the expression Expr, defined // as (Expr & ~0xFFF). (This applies even if the machine page size // supported by the platform has a different value.) -uint64_t getAArch64Page(uint64_t expr) { +uint64_t elf::getAArch64Page(uint64_t expr) { return expr & ~static_cast<uint64_t>(0xFFF); } @@ -45,12 +44,16 @@ public: uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; bool usesOnlyLowPageBits(RelType type) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -123,6 +126,7 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_CONDBR19: case R_AARCH64_JUMP26: case R_AARCH64_TSTBR14: + case R_AARCH64_PLT32: return R_PLT_PC; case R_AARCH64_PREL16: case R_AARCH64_PREL32: @@ -208,10 +212,10 @@ void AArch64::writePltHeader(uint8_t *buf) const { uint64_t got = in.gotPlt->getVA(); uint64_t plt = in.plt->getVA(); - relocateOne(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(got + 16) - getAArch64Page(plt + 4)); - relocateOne(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); - relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(got + 16) - getAArch64Page(plt + 4)); + relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); } void AArch64::writePlt(uint8_t *buf, const Symbol &sym, @@ -225,10 +229,10 @@ void AArch64::writePlt(uint8_t *buf, const Symbol &sym, memcpy(buf, inst, sizeof(inst)); uint64_t gotPltEntryAddr = sym.getGotPltVA(); - relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); - relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); - relocateOne(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); + relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); } bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, @@ -241,7 +245,8 @@ bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, // ELF for the ARM 64-bit architecture, section Call and Jump relocations // only permits range extension thunks for R_AARCH64_CALL26 and // R_AARCH64_JUMP26 relocation types. - if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) + if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && + type != R_AARCH64_PLT32) return false; uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a); return !inBranchRange(type, branchAddr, dst); @@ -255,11 +260,13 @@ uint32_t AArch64::getThunkSectionSpacing() const { } bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { - if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) + if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && + type != R_AARCH64_PLT32) return true; // The AArch64 call and unconditional branch instructions have a range of - // +/- 128 MiB. - uint64_t range = 128 * 1024 * 1024; + // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB. + uint64_t range = + type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024); if (dst > src) { // Immediate of branch is signed. range -= 4; @@ -309,16 +316,21 @@ static void writeSMovWImm(uint8_t *loc, uint32_t imm) { write32le(loc, inst | ((imm & 0xFFFF) << 5)); } -void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void AArch64::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_AARCH64_ABS16: case R_AARCH64_PREL16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_AARCH64_ABS32: case R_AARCH64_PREL32: - checkIntUInt(loc, val, 32, type); + checkIntUInt(loc, val, 32, rel); + write32le(loc, val); + break; + case R_AARCH64_PLT32: + checkInt(loc, val, 32, rel); write32le(loc, val); break; case R_AARCH64_ABS64: @@ -332,13 +344,13 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: - checkInt(loc, val, 33, type); + checkInt(loc, val, 33, rel); LLVM_FALLTHROUGH; case R_AARCH64_ADR_PREL_PG_HI21_NC: write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write32AArch64Addr(loc, val); break; case R_AARCH64_JUMP26: @@ -352,13 +364,13 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, 0x14000000); LLVM_FALLTHROUGH; case R_AARCH64_CALL26: - checkInt(loc, val, 28, type); + checkInt(loc, val, 28, rel); or32le(loc, (val & 0x0FFFFFFC) >> 2); break; case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 21, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 21, rel); or32le(loc, (val & 0x1FFFFC) << 3); break; case R_AARCH64_LDST8_ABS_LO12_NC: @@ -367,12 +379,12 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_AARCH64_LDST16_ABS_LO12_NC: case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: - checkAlignment(loc, val, 2, type); + checkAlignment(loc, val, 2, rel); or32AArch64Imm(loc, getBits(val, 1, 11)); break; case R_AARCH64_LDST32_ABS_LO12_NC: case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: - checkAlignment(loc, val, 4, type); + checkAlignment(loc, val, 4, rel); or32AArch64Imm(loc, getBits(val, 2, 11)); break; case R_AARCH64_LDST64_ABS_LO12_NC: @@ -380,28 +392,28 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12: - checkAlignment(loc, val, 8, type); + checkAlignment(loc, val, 8, rel); or32AArch64Imm(loc, getBits(val, 3, 11)); break; case R_AARCH64_LDST128_ABS_LO12_NC: case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: - checkAlignment(loc, val, 16, type); + checkAlignment(loc, val, 16, rel); or32AArch64Imm(loc, getBits(val, 4, 11)); break; case R_AARCH64_MOVW_UABS_G0: - checkUInt(loc, val, 16, type); + checkUInt(loc, val, 16, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_UABS_G0_NC: or32le(loc, (val & 0xFFFF) << 5); break; case R_AARCH64_MOVW_UABS_G1: - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_UABS_G1_NC: or32le(loc, (val & 0xFFFF0000) >> 11); break; case R_AARCH64_MOVW_UABS_G2: - checkUInt(loc, val, 48, type); + checkUInt(loc, val, 48, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_UABS_G2_NC: or32le(loc, (val & 0xFFFF00000000) >> 27); @@ -412,7 +424,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_MOVW_PREL_G0: case R_AARCH64_MOVW_SABS_G0: case R_AARCH64_TLSLE_MOVW_TPREL_G0: - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_PREL_G0_NC: case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: @@ -421,7 +433,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_MOVW_PREL_G1: case R_AARCH64_MOVW_SABS_G1: case R_AARCH64_TLSLE_MOVW_TPREL_G1: - checkInt(loc, val, 33, type); + checkInt(loc, val, 33, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_PREL_G1_NC: case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: @@ -430,7 +442,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_AARCH64_MOVW_PREL_G2: case R_AARCH64_MOVW_SABS_G2: case R_AARCH64_TLSLE_MOVW_TPREL_G2: - checkInt(loc, val, 49, type); + checkInt(loc, val, 49, rel); LLVM_FALLTHROUGH; case R_AARCH64_MOVW_PREL_G2_NC: writeSMovWImm(loc, val >> 32); @@ -439,11 +451,11 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { writeSMovWImm(loc, val >> 48); break; case R_AARCH64_TSTBR14: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); or32le(loc, (val & 0xFFFC) << 3); break; case R_AARCH64_TLSLE_ADD_TPREL_HI12: - checkUInt(loc, val, 24, type); + checkUInt(loc, val, 24, rel); or32AArch64Imm(loc, val >> 12); break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: @@ -455,7 +467,8 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // TLSDESC Global-Dynamic relocation are in the form: // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] @@ -467,9 +480,9 @@ void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // movk x0, #0x10 // nop // nop - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); - switch (type) { + switch (rel.type) { case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_CALL: write32le(loc, 0xd503201f); // nop @@ -485,7 +498,8 @@ void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void AArch64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { +void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // TLSDESC Global-Dynamic relocation are in the form: // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] @@ -498,34 +512,35 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // nop // nop - switch (type) { + switch (rel.type) { case R_AARCH64_TLSDESC_ADD_LO12: case R_AARCH64_TLSDESC_CALL: write32le(loc, 0xd503201f); // nop break; case R_AARCH64_TLSDESC_ADR_PAGE21: write32le(loc, 0x90000000); // adrp - relocateOne(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val); + relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val); break; case R_AARCH64_TLSDESC_LD64_LO12: write32le(loc, 0xf9400000); // ldr - relocateOne(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); + relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } -void AArch64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { - checkUInt(loc, val, 32, type); +void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + checkUInt(loc, val, 32, rel); - if (type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { + if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { // Generate MOVZ. uint32_t regNo = read32le(loc) & 0x1f; write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5)); return; } - if (type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { + if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { // Generate MOVK. uint32_t regNo = read32le(loc) & 0x1f; write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5)); @@ -593,8 +608,10 @@ AArch64BtiPac::AArch64BtiPac() { // the function in an executable being taken by a shared library. // FIXME: There is a potential optimization to omit the BTI if we detect // that the address of the PLT entry isn't taken. + // The PAC PLT entries require dynamic loader support and this isn't known + // from properties in the objects, so we use the command line flag. btiEntry = btiHeader && !config->shared; - pacEntry = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_PAC); + pacEntry = config->zPacPlt; if (btiEntry || pacEntry) { pltEntrySize = 24; @@ -627,10 +644,10 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const { } memcpy(buf, pltData, sizeof(pltData)); - relocateOne(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(got + 16) - getAArch64Page(plt + 8)); - relocateOne(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); - relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(got + 16) - getAArch64Page(plt + 8)); + relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); + relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); if (!btiHeader) // We didn't add the BTI c instruction so round out size with NOP. memcpy(buf + sizeof(pltData), nopData, sizeof(nopData)); @@ -664,11 +681,10 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, uint64_t gotPltEntryAddr = sym.getGotPltVA(); memcpy(buf, addrInst, sizeof(addrInst)); - relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(gotPltEntryAddr) - - getAArch64Page(pltEntryAddr)); - relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); - relocateOne(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); + relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); + relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); if (pacEntry) memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr)); @@ -689,7 +705,4 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *getAArch64TargetInfo() { return getTargetInfo(); } - -} // namespace elf -} // namespace lld +TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); } diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index b42ca7746742..3610a38692d6 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -17,16 +17,16 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class AMDGPU final : public TargetInfo { public: AMDGPU(); uint32_t calcEFlags() const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; @@ -58,8 +58,8 @@ uint32_t AMDGPU::calcEFlags() const { return ret; } -void AMDGPU::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void AMDGPU::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_AMDGPU_ABS32: case R_AMDGPU_GOTPCREL: case R_AMDGPU_GOTPCREL32_LO: @@ -108,10 +108,7 @@ RelType AMDGPU::getDynRel(RelType type) const { return R_AMDGPU_NONE; } -TargetInfo *getAMDGPUTargetInfo() { +TargetInfo *elf::getAMDGPUTargetInfo() { static AMDGPU target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index de1023346aa5..fd90557cc4f6 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -18,9 +18,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class ARM final : public TargetInfo { @@ -43,7 +42,8 @@ public: int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -64,6 +64,7 @@ ARM::ARM() { ipltEntrySize = 16; trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; needsThunks = true; + defaultMaxPageSize = 65536; } uint32_t ARM::calcEFlags() const { @@ -120,6 +121,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, return R_TLSGD_PC; case R_ARM_TLS_LDM32: return R_TLSLD_PC; + case R_ARM_TLS_LDO32: + return R_DTPREL; case R_ARM_BASE_PREL: // B(S) + A - P // FIXME: currently B(S) assumed to be .got, this may not hold for all @@ -131,6 +134,19 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, case R_ARM_THM_MOVW_PREL_NC: case R_ARM_THM_MOVT_PREL: return R_PC; + case R_ARM_ALU_PC_G0: + case R_ARM_LDR_PC_G0: + case R_ARM_THM_ALU_PREL_11_0: + case R_ARM_THM_PC8: + case R_ARM_THM_PC12: + return R_ARM_PCA; + case R_ARM_MOVW_BREL_NC: + case R_ARM_MOVW_BREL: + case R_ARM_MOVT_BREL: + case R_ARM_THM_MOVW_BREL_NC: + case R_ARM_THM_MOVW_BREL: + case R_ARM_THM_MOVT_BREL: + return R_ARM_SBREL; case R_ARM_NONE: return R_NONE; case R_ARM_TLS_LE32: @@ -262,7 +278,8 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { + uint64_t branchAddr, const Symbol &s, + int64_t /*a*/) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) @@ -275,8 +292,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_PLT32: case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. - // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). - if (expr == R_PC && ((s.getVA() & 1) == 1)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). + if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; LLVM_FALLTHROUGH; case R_ARM_CALL: { @@ -286,8 +303,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: // Source is Thumb, all PLT entries are ARM so interworking is required. - // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). - if (expr == R_PLT_PC || ((s.getVA() & 1) == 0)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). + if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) return true; LLVM_FALLTHROUGH; case R_ARM_THM_CALL: { @@ -375,8 +392,82 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { return distance <= range; } -void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +// Helper to produce message text when LLD detects that a CALL relocation to +// a non STT_FUNC symbol that may result in incorrect interworking between ARM +// or Thumb. +static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) { + assert(!s.isFunc()); + if (s.isSection()) { + // Section symbols must be defined and in a section. Users cannot change + // the type. Use the section name as getName() returns an empty string. + warn(getErrorLocation(loc) + "branch and link relocation: " + + toString(relt) + " to STT_SECTION symbol " + + cast<Defined>(s).section->name + " ; interworking not performed"); + } else { + // Warn with hint on how to alter the symbol type. + warn(getErrorLocation(loc) + "branch and link relocation: " + + toString(relt) + " to non STT_FUNC symbol: " + s.getName() + + " interworking not performed; consider using directive '.type " + + s.getName() + + ", %function' to give symbol type STT_FUNC if" + " interworking between ARM and Thumb is required"); + } +} + +// Utility functions taken from ARMAddressingModes.h, only changes are LLD +// coding style. + +// Rotate a 32-bit unsigned value right by a specified amt of bits. +static uint32_t rotr32(uint32_t val, uint32_t amt) { + assert(amt < 32 && "Invalid rotate amount"); + return (val >> amt) | (val << ((32 - amt) & 31)); +} + +// Rotate a 32-bit unsigned value left by a specified amt of bits. +static uint32_t rotl32(uint32_t val, uint32_t amt) { + assert(amt < 32 && "Invalid rotate amount"); + return (val << amt) | (val >> ((32 - amt) & 31)); +} + +// Try to encode a 32-bit unsigned immediate imm with an immediate shifter +// operand, this form is an 8-bit immediate rotated right by an even number of +// bits. We compute the rotate amount to use. If this immediate value cannot be +// handled with a single shifter-op, determine a good rotate amount that will +// take a maximal chunk of bits out of the immediate. +static uint32_t getSOImmValRotate(uint32_t imm) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((imm & ~255U) == 0) + return 0; + + // Use CTZ to compute the rotate amount. + unsigned tz = llvm::countTrailingZeros(imm); + + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, + // not 9. + unsigned rotAmt = tz & ~1; + + // If we can handle this spread, return it. + if ((rotr32(imm, rotAmt) & ~255U) == 0) + return (32 - rotAmt) & 31; // HW rotates right, not left. + + // For values like 0xF000000F, we should ignore the low 6 bits, then + // retry the hunt. + if (imm & 63U) { + unsigned tz2 = countTrailingZeros(imm & ~63U); + unsigned rotAmt2 = tz2 & ~1; + if ((rotr32(imm, rotAmt2) & ~255U) == 0) + return (32 - rotAmt2) & 31; // HW rotates right, not left. + } + + // Otherwise, we have no way to cover this span of bits with a single + // shifter_op immediate. Return a chunk of bits that will be useful to + // handle. + return (32 - rotAmt) & 31; // HW rotates right, not left. +} + +void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_ARM_ABS32: case R_ARM_BASE_PREL: case R_ARM_GOTOFF32: @@ -397,40 +488,49 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val); break; case R_ARM_PREL31: - checkInt(loc, val, 31, type); + checkInt(loc, val, 31, rel); write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000)); break; - case R_ARM_CALL: - // R_ARM_CALL is used for BL and BLX instructions, depending on the - // value of bit 0 of Val, we must select a BL or BLX instruction - if (val & 1) { - // If bit 0 of Val is 1 the target is Thumb, we must select a BLX. + case R_ARM_CALL: { + // R_ARM_CALL is used for BL and BLX instructions, for symbols of type + // STT_FUNC we choose whether to write a BL or BLX depending on the + // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is + // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we don't need to interwork. + assert(rel.sym); // R_ARM_CALL is always reached via relocate(). + bool bit0Thumb = val & 1; + bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000; + // lld 10.0 and before always used bit0Thumb when deciding to write a BLX + // even when type not STT_FUNC. + if (!rel.sym->isFunc() && isBlx != bit0Thumb) + stateChangeWarning(loc, rel.type, *rel.sym); + if (rel.sym->isFunc() ? bit0Thumb : isBlx) { // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' - checkInt(loc, val, 26, type); + checkInt(loc, val, 26, rel); write32le(loc, 0xfa000000 | // opcode ((val & 2) << 23) | // H ((val >> 2) & 0x00ffffff)); // imm24 break; } - if ((read32le(loc) & 0xfe000000) == 0xfa000000) - // BLX (always unconditional) instruction to an ARM Target, select an - // unconditional BL. - write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff)); + // BLX (always unconditional) instruction to an ARM Target, select an + // unconditional BL. + write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff)); // fall through as BL encoding is shared with B + } LLVM_FALLTHROUGH; case R_ARM_JUMP24: case R_ARM_PC24: case R_ARM_PLT32: - checkInt(loc, val, 26, type); + checkInt(loc, val, 26, rel); write32le(loc, (read32le(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff)); break; case R_ARM_THM_JUMP11: - checkInt(loc, val, 12, type); + checkInt(loc, val, 12, rel); write16le(loc, (read32le(loc) & 0xf800) | ((val >> 1) & 0x07ff)); break; case R_ARM_THM_JUMP19: // Encoding T3: Val = S:J2:J1:imm6:imm11:0 - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write16le(loc, (read16le(loc) & 0xfbc0) | // opcode cond ((val >> 10) & 0x0400) | // S @@ -441,20 +541,32 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val >> 5) & 0x2000) | // J1 ((val >> 1) & 0x07ff)); // imm11 break; - case R_ARM_THM_CALL: - // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the - // value of bit 0 of Val, we must select a BL or BLX instruction - if ((val & 1) == 0) { - // Ensure BLX destination is 4-byte aligned. As BLX instruction may - // only be two byte aligned. This must be done before overflow check + case R_ARM_THM_CALL: { + // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type + // STT_FUNC we choose whether to write a BL or BLX depending on the + // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is + // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we need to interwork. + assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). + bool bit0Thumb = val & 1; + bool isBlx = (read16le(loc + 2) & 0x1000) == 0; + // lld 10.0 and before always used bit0Thumb when deciding to write a BLX + // even when type not STT_FUNC. PLT entries generated by LLD are always ARM. + if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb) + stateChangeWarning(loc, rel.type, *rel.sym); + if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) { + // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As + // the BLX instruction may only be two byte aligned. This must be done + // before overflow check. val = alignTo(val, 4); + write16le(loc + 2, read16le(loc + 2) & ~0x1000); + } else { + write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12); } - // Bit 12 is 0 for BLX, 1 for BL - write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12); if (!config->armJ1J2BranchEncoding) { // Older Arm architectures do not support R_ARM_THM_JUMP24 and have // different encoding rules and range due to J1 and J2 always being 1. - checkInt(loc, val, 23, type); + checkInt(loc, val, 23, rel); write16le(loc, 0xf000 | // opcode ((val >> 12) & 0x07ff)); // imm11 @@ -464,11 +576,12 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val >> 1) & 0x07ff)); // imm11 break; } + } // Fall through as rest of encoding is the same as B.W LLVM_FALLTHROUGH; case R_ARM_THM_JUMP24: // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 - checkInt(loc, val, 25, type); + checkInt(loc, val, 25, rel); write16le(loc, 0xf000 | // opcode ((val >> 14) & 0x0400) | // S @@ -481,16 +594,19 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_ARM_MOVW_ABS_NC: case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVW_BREL_NC: write32le(loc, (read32le(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) | (val & 0x0fff)); break; case R_ARM_MOVT_ABS: case R_ARM_MOVT_PREL: + case R_ARM_MOVT_BREL: write32le(loc, (read32le(loc) & ~0x000f0fff) | (((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff)); break; case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVT_BREL: // Encoding T1: A = imm4:i:imm3:imm8 write16le(loc, 0xf2c0 | // opcode @@ -503,6 +619,7 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVW_BREL_NC: // Encoding T3: A = imm4:i:imm3:imm8 write16le(loc, 0xf240 | // opcode @@ -513,8 +630,92 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val << 4) & 0x7000) | // imm3 (val & 0x00ff)); // imm8 break; + case R_ARM_ALU_PC_G0: { + // ADR (literal) add = bit23, sub = bit22 + // literal is a 12-bit modified immediate, made up of a 4-bit even rotate + // right and an 8-bit immediate. The code-sequence here is derived from + // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we + // want to give an error if we cannot encode the constant. + uint32_t opcode = 0x00800000; + if (val >> 63) { + opcode = 0x00400000; + val = ~val + 1; + } + if ((val & ~255U) != 0) { + uint32_t rotAmt = getSOImmValRotate(val); + // Error if we cannot encode this with a single shift + if (rotr32(~255U, rotAmt) & val) + error(getErrorLocation(loc) + "unencodeable immediate " + + Twine(val).str() + " for relocation " + toString(rel.type)); + val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8); + } + write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val); + break; + } + case R_ARM_LDR_PC_G0: { + // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - P. + if (rel.sym->isFunc()) + val &= ~0x1; + // LDR (literal) u = bit23 + int64_t imm = val; + uint32_t u = 0x00800000; + if (imm < 0) { + imm = -imm; + u = 0; + } + checkUInt(loc, imm, 12, rel); + write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm); + break; + } + case R_ARM_THM_ALU_PREL_11_0: { + // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 + int64_t imm = val; + uint16_t sub = 0; + if (imm < 0) { + imm = -imm; + sub = 0x00a0; + } + checkUInt(loc, imm, 12, rel); + write16le(loc, (read16le(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1); + write16le(loc + 2, + (read16le(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff)); + break; + } + case R_ARM_THM_PC8: + // ADR and LDR literal encoding T1 positive offset only imm8:00 + // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - Pa. + if (rel.sym->isFunc()) + val &= ~0x1; + checkUInt(loc, val, 10, rel); + checkAlignment(loc, val, 4, rel); + write16le(loc, (read16le(loc) & 0xff00) | (val & 0x3fc) >> 2); + break; + case R_ARM_THM_PC12: { + // LDR (literal) encoding T2, add = (U == '1') imm12 + // imm12 is unsigned + // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - Pa. + if (rel.sym->isFunc()) + val &= ~0x1; + int64_t imm12 = val; + uint16_t u = 0x0080; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; + } + checkUInt(loc, imm12, 12, rel); + write16le(loc, read16le(loc) | u); + write16le(loc + 2, (read16le(loc + 2) & 0xf000) | imm12); + break; + } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } @@ -582,14 +783,18 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVT_PREL: { + case R_ARM_MOVT_PREL: + case R_ARM_MOVW_BREL_NC: + case R_ARM_MOVT_BREL: { uint64_t val = read32le(buf) & 0x000f0fff; return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff)); } case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVT_PREL: { + case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVW_BREL_NC: + case R_ARM_THM_MOVT_BREL: { // Encoding T3: A = imm4:i:imm3:imm8 uint16_t hi = read16le(buf); uint16_t lo = read16le(buf + 2); @@ -598,13 +803,50 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { ((lo & 0x7000) >> 4) | // imm3 (lo & 0x00ff)); // imm8 } + case R_ARM_ALU_PC_G0: { + // 12-bit immediate is a modified immediate made up of a 4-bit even + // right rotation and 8-bit constant. After the rotation the value + // is zero-extended. When bit 23 is set the instruction is an add, when + // bit 22 is set it is a sub. + uint32_t instr = read32le(buf); + uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2); + return (instr & 0x00400000) ? -val : val; + } + case R_ARM_LDR_PC_G0: { + // ADR (literal) add = bit23, sub = bit22 + // LDR (literal) u = bit23 unsigned imm12 + bool u = read32le(buf) & 0x00800000; + uint32_t imm12 = read32le(buf) & 0xfff; + return u ? imm12 : -imm12; + } + case R_ARM_THM_ALU_PREL_11_0: { + // Thumb2 ADR, which is an alias for a sub or add instruction with an + // unsigned immediate. + // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 + uint16_t hi = read16le(buf); + uint16_t lo = read16le(buf + 2); + uint64_t imm = (hi & 0x0400) << 1 | // i + (lo & 0x7000) >> 4 | // imm3 + (lo & 0x00ff); // imm8 + // For sub, addend is negative, add is positive. + return (hi & 0x00f0) ? -imm : imm; + } + case R_ARM_THM_PC8: + // ADR and LDR (literal) encoding T1 + // From ELF for the ARM Architecture the initial signed addend is formed + // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4) + // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff + return ((((read16le(buf) & 0xff) << 2) + 4) & 0x3ff) - 4; + case R_ARM_THM_PC12: { + // LDR (literal) encoding T2, add = (U == '1') imm12 + bool u = read16le(buf) & 0x0080; + uint64_t imm12 = read16le(buf + 2) & 0x0fff; + return u ? imm12 : -imm12; + } } } -TargetInfo *getARMTargetInfo() { +TargetInfo *elf::getARMTargetInfo() { static ARM target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp index cb33ff448ba4..4513a970b32d 100644 --- a/lld/ELF/Arch/AVR.cpp +++ b/lld/ELF/Arch/AVR.cpp @@ -36,9 +36,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class AVR final : public TargetInfo { @@ -46,7 +45,8 @@ public: AVR(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -54,11 +54,131 @@ AVR::AVR() { noneRel = R_AVR_NONE; } RelExpr AVR::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { - return R_ABS; + switch (type) { + case R_AVR_7_PCREL: + case R_AVR_13_PCREL: + return R_PC; + default: + return R_ABS; + } } -void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +static void writeLDI(uint8_t *loc, uint64_t val) { + write16le(loc, (read16le(loc) & 0xf0f0) | (val & 0xf0) << 4 | (val & 0x0f)); +} + +void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { + case R_AVR_8: + checkUInt(loc, val, 8, rel); + *loc = val; + break; + case R_AVR_16: + // Note: this relocation is often used between code and data space, which + // are 0x800000 apart in the output ELF file. The bitmask cuts off the high + // bit. + write16le(loc, val & 0xffff); + break; + case R_AVR_16_PM: + checkAlignment(loc, val, 2, rel); + checkUInt(loc, val >> 1, 16, rel); + write16le(loc, val >> 1); + break; + case R_AVR_32: + checkUInt(loc, val, 32, rel); + write32le(loc, val); + break; + + case R_AVR_LDI: + checkUInt(loc, val, 8, rel); + writeLDI(loc, val & 0xff); + break; + + case R_AVR_LO8_LDI_NEG: + writeLDI(loc, -val & 0xff); + break; + case R_AVR_LO8_LDI: + writeLDI(loc, val & 0xff); + break; + case R_AVR_HI8_LDI_NEG: + writeLDI(loc, (-val >> 8) & 0xff); + break; + case R_AVR_HI8_LDI: + writeLDI(loc, (val >> 8) & 0xff); + break; + case R_AVR_HH8_LDI_NEG: + writeLDI(loc, (-val >> 16) & 0xff); + break; + case R_AVR_HH8_LDI: + writeLDI(loc, (val >> 16) & 0xff); + break; + case R_AVR_MS8_LDI_NEG: + writeLDI(loc, (-val >> 24) & 0xff); + break; + case R_AVR_MS8_LDI: + writeLDI(loc, (val >> 24) & 0xff); + break; + + case R_AVR_LO8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 1) & 0xff); + break; + case R_AVR_HI8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 9) & 0xff); + break; + case R_AVR_HH8_LDI_PM: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (val >> 17) & 0xff); + break; + + case R_AVR_LO8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 1) & 0xff); + break; + case R_AVR_HI8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 9) & 0xff); + break; + case R_AVR_HH8_LDI_PM_NEG: + checkAlignment(loc, val, 2, rel); + writeLDI(loc, (-val >> 17) & 0xff); + break; + + case R_AVR_PORT5: + checkUInt(loc, val, 5, rel); + write16le(loc, (read16le(loc) & 0xff07) | (val << 3)); + break; + case R_AVR_PORT6: + checkUInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xf9f0) | (val & 0x30) << 5 | (val & 0x0f)); + break; + + // Since every jump destination is word aligned we gain an extra bit + case R_AVR_7_PCREL: { + checkInt(loc, val, 7, rel); + checkAlignment(loc, val, 2, rel); + const uint16_t target = (val - 2) >> 1; + write16le(loc, (read16le(loc) & 0xfc07) | ((target & 0x7f) << 3)); + break; + } + case R_AVR_13_PCREL: { + checkAlignment(loc, val, 2, rel); + const uint16_t target = (val - 2) >> 1; + write16le(loc, (read16le(loc) & 0xf000) | (target & 0xfff)); + break; + } + + case R_AVR_6: + checkInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xd3f8) | (val & 0x20) << 8 | + (val & 0x18) << 7 | (val & 0x07)); + break; + case R_AVR_6_ADIW: + checkInt(loc, val, 6, rel); + write16le(loc, (read16le(loc) & 0xff30) | (val & 0x30) << 2 | (val & 0x0F)); + break; + case R_AVR_CALL: { uint16_t hi = val >> 17; uint16_t lo = val >> 1; @@ -67,14 +187,12 @@ void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } -TargetInfo *getAVRTargetInfo() { +TargetInfo *elf::getAVRTargetInfo() { static AVR target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 106bc9bab5bd..7740ce9a71e0 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -19,9 +19,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class Hexagon final : public TargetInfo { @@ -31,7 +30,8 @@ public: RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; @@ -55,6 +55,8 @@ Hexagon::Hexagon() { defaultMaxPageSize = 0x10000; noneRel = R_HEX_NONE; tlsGotRel = R_HEX_TPREL_32; + tlsModuleIndexRel = R_HEX_DTPMOD_32; + tlsOffsetRel = R_HEX_DTPREL_32; } uint32_t Hexagon::calcEFlags() const { @@ -102,6 +104,7 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_32_6_X: case R_HEX_HI16: case R_HEX_LO16: + case R_HEX_DTPREL_32: return R_ABS; case R_HEX_B9_PCREL: case R_HEX_B13_PCREL: @@ -115,12 +118,19 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_PLT_B22_PCREL: case R_HEX_B22_PCREL_X: case R_HEX_B32_PCREL_X: + case R_HEX_GD_PLT_B22_PCREL: + case R_HEX_GD_PLT_B22_PCREL_X: + case R_HEX_GD_PLT_B32_PCREL_X: return R_PLT_PC; case R_HEX_IE_32_6_X: case R_HEX_IE_16_X: case R_HEX_IE_HI16: case R_HEX_IE_LO16: return R_GOT; + case R_HEX_GD_GOT_11_X: + case R_HEX_GD_GOT_16_X: + case R_HEX_GD_GOT_32_6_X: + return R_TLSGD_GOTPLT; case R_HEX_GOTREL_11_X: case R_HEX_GOTREL_16_X: case R_HEX_GOTREL_32_6_X: @@ -152,6 +162,13 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, } } +static bool isDuplex(uint32_t insn) { + // Duplex forms have a fixed mask and parse bits 15:14 are always + // zero. Non-duplex insns will always have at least one bit set in the + // parse field. + return (0xC000 & insn) == 0; +} + static uint32_t findMaskR6(uint32_t insn) { // There are (arguably too) many relocation masks for the DSP's // R_HEX_6_X type. The table below is used to select the correct mask @@ -176,10 +193,7 @@ static uint32_t findMaskR6(uint32_t insn) { {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0}, {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}}; - // Duplex forms have a fixed mask and parse bits 15:14 are always - // zero. Non-duplex insns will always have at least one bit set in the - // parse field. - if ((0xC000 & insn) == 0x0) + if (isDuplex(insn)) return 0x03f00000; for (InstructionMask i : r6) @@ -215,6 +229,9 @@ static uint32_t findMaskR16(uint32_t insn) { if ((0xff000000 & insn) == 0xb0000000) return 0x0fe03fe0; + if (isDuplex(insn)) + return 0x03f00000; + error("unrecognized instruction for R_HEX_16_X relocation: 0x" + utohexstr(insn)); return 0; @@ -222,8 +239,9 @@ static uint32_t findMaskR16(uint32_t insn) { static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) | v); } -void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void Hexagon::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_HEX_NONE: break; case R_HEX_6_PCREL_X: @@ -240,6 +258,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x00203fe0, val & 0x3f)); break; case R_HEX_11_X: + case R_HEX_GD_GOT_11_X: case R_HEX_IE_GOT_11_X: case R_HEX_GOT_11_X: case R_HEX_GOTREL_11_X: @@ -252,6 +271,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_HEX_16_X: // These relocs only have 6 effective bits. case R_HEX_IE_16_X: case R_HEX_IE_GOT_16_X: + case R_HEX_GD_GOT_16_X: case R_HEX_GOT_16_X: case R_HEX_GOTREL_16_X: case R_HEX_TPREL_16_X: @@ -262,9 +282,11 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_HEX_32: case R_HEX_32_PCREL: + case R_HEX_DTPREL_32: or32le(loc, val); break; case R_HEX_32_6_X: + case R_HEX_GD_GOT_32_6_X: case R_HEX_GOT_32_6_X: case R_HEX_GOTREL_32_6_X: case R_HEX_IE_GOT_32_6_X: @@ -273,32 +295,35 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; case R_HEX_B9_PCREL: - checkInt(loc, val, 11, type); + checkInt(loc, val, 11, rel); or32le(loc, applyMask(0x003000fe, val >> 2)); break; case R_HEX_B9_PCREL_X: or32le(loc, applyMask(0x003000fe, val & 0x3f)); break; case R_HEX_B13_PCREL: - checkInt(loc, val, 15, type); + checkInt(loc, val, 15, rel); or32le(loc, applyMask(0x00202ffe, val >> 2)); break; case R_HEX_B15_PCREL: - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); or32le(loc, applyMask(0x00df20fe, val >> 2)); break; case R_HEX_B15_PCREL_X: or32le(loc, applyMask(0x00df20fe, val & 0x3f)); break; case R_HEX_B22_PCREL: + case R_HEX_GD_PLT_B22_PCREL: case R_HEX_PLT_B22_PCREL: - checkInt(loc, val, 22, type); + checkInt(loc, val, 22, rel); or32le(loc, applyMask(0x1ff3ffe, val >> 2)); break; case R_HEX_B22_PCREL_X: + case R_HEX_GD_PLT_B22_PCREL_X: or32le(loc, applyMask(0x1ff3ffe, val & 0x3f)); break; case R_HEX_B32_PCREL_X: + case R_HEX_GD_PLT_B32_PCREL_X: or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; case R_HEX_GOTREL_HI16: @@ -335,8 +360,8 @@ void Hexagon::writePltHeader(uint8_t *buf) const { // Offset from PLT0 to the GOT. uint64_t off = in.gotPlt->getVA() - in.plt->getVA(); - relocateOne(buf, R_HEX_B32_PCREL_X, off); - relocateOne(buf + 4, R_HEX_6_PCREL_X, off); + relocateNoSym(buf, R_HEX_B32_PCREL_X, off); + relocateNoSym(buf + 4, R_HEX_6_PCREL_X, off); } void Hexagon::writePlt(uint8_t *buf, const Symbol &sym, @@ -350,8 +375,8 @@ void Hexagon::writePlt(uint8_t *buf, const Symbol &sym, memcpy(buf, inst, sizeof(inst)); uint64_t gotPltEntryAddr = sym.getGotPltVA(); - relocateOne(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr); - relocateOne(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr); } RelType Hexagon::getDynRel(RelType type) const { @@ -360,10 +385,7 @@ RelType Hexagon::getDynRel(RelType type) const { return R_HEX_NONE; } -TargetInfo *getHexagonTargetInfo() { +TargetInfo *elf::getHexagonTargetInfo() { static Hexagon target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/MSP430.cpp b/lld/ELF/Arch/MSP430.cpp index f03e8181923b..4af90b40a346 100644 --- a/lld/ELF/Arch/MSP430.cpp +++ b/lld/ELF/Arch/MSP430.cpp @@ -26,9 +26,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class MSP430 final : public TargetInfo { @@ -36,7 +35,8 @@ public: MSP430(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -60,38 +60,36 @@ RelExpr MSP430::getRelExpr(RelType type, const Symbol &s, } } -void MSP430::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void MSP430::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_MSP430_8: - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_MSP430_16: case R_MSP430_16_PCREL: case R_MSP430_16_BYTE: case R_MSP430_16_PCREL_BYTE: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_MSP430_32: - checkIntUInt(loc, val, 32, type); + checkIntUInt(loc, val, 32, rel); write32le(loc, val); break; case R_MSP430_10_PCREL: { int16_t offset = ((int16_t)val >> 1) - 1; - checkInt(loc, offset, 10, type); + checkInt(loc, offset, 10, rel); write16le(loc, (read16le(loc) & 0xFC00) | (offset & 0x3FF)); break; } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } -TargetInfo *getMSP430TargetInfo() { +TargetInfo *elf::getMSP430TargetInfo() { static MSP430 target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index ed6f4ca24130..fd1c5f507734 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -18,9 +18,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { namespace { template <class ELFT> class MIPS final : public TargetInfo { public: @@ -37,7 +37,8 @@ public: bool needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, int64_t a) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool usesOnlyLowPageBits(RelType type) const override; }; } // namespace @@ -274,12 +275,12 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *buf) const { write16(buf + 18, 0x0f83); // move $28, $3 write16(buf + 20, 0x472b); // jalrc $25 write16(buf + 22, 0x0c00); // nop - relocateOne(buf, R_MICROMIPS_PC19_S2, gotPlt - plt); + relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPlt - plt); } else { write16(buf + 18, 0x45f9); // jalrc $25 write16(buf + 20, 0x0f83); // move $28, $3 write16(buf + 22, 0x0c00); // nop - relocateOne(buf, R_MICROMIPS_PC23_S2, gotPlt - plt); + relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPlt - plt); } return; } @@ -330,13 +331,13 @@ void MIPS<ELFT>::writePlt(uint8_t *buf, const Symbol &sym, write16(buf + 4, 0xff22); // lw $25, 0($2) write16(buf + 8, 0x0f02); // move $24, $2 write16(buf + 10, 0x4723); // jrc $25 / jr16 $25 - relocateOne(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr); } else { write16(buf, 0x7900); // addiupc $2, (GOTPLT) - . write16(buf + 4, 0xff22); // lw $25, 0($2) write16(buf + 8, 0x4599); // jrc $25 / jr16 $25 write16(buf + 10, 0x0f02); // move $24, $2 - relocateOne(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr); + relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr); } return; } @@ -537,8 +538,10 @@ static uint64_t fixupCrossModeJump(uint8_t *loc, RelType type, uint64_t val) { } template <class ELFT> -void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +void MIPS<ELFT>::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { const endianness e = ELFT::TargetEndianness; + RelType type = rel.type; if (ELFT::Is64Bits || config->mipsN32Abi) std::tie(type, val) = calculateMipsRelChain(loc, type, val); @@ -577,7 +580,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { if (config->relocatable) { writeValue(loc, val + 0x8000, 16, 16); } else { - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); writeValue(loc, val, 16, 0); } break; @@ -585,7 +588,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { if (config->relocatable) { writeShuffleValue<e>(loc, val + 0x8000, 16, 16); } else { - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); writeShuffleValue<e>(loc, val, 16, 0); } break; @@ -596,7 +599,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MIPS_TLS_GD: case R_MIPS_TLS_GOTTPREL: case R_MIPS_TLS_LDM: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); LLVM_FALLTHROUGH; case R_MIPS_CALL_LO16: case R_MIPS_GOT_LO16: @@ -610,7 +613,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MICROMIPS_GPREL16: case R_MICROMIPS_TLS_GD: case R_MICROMIPS_TLS_LDM: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); writeShuffleValue<e>(loc, val, 16, 0); break; case R_MICROMIPS_CALL16: @@ -622,7 +625,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { writeShuffleValue<e>(loc, val, 16, 0); break; case R_MICROMIPS_GPREL7_S2: - checkInt(loc, val, 7, type); + checkInt(loc, val, 7, rel); writeShuffleValue<e>(loc, val, 7, 2); break; case R_MIPS_CALL_HI16: @@ -665,23 +668,23 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // Ignore this optimization relocation for now break; case R_MIPS_PC16: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 18, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 18, rel); writeValue(loc, val, 16, 2); break; case R_MIPS_PC19_S2: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 21, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 21, rel); writeValue(loc, val, 19, 2); break; case R_MIPS_PC21_S2: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 23, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 23, rel); writeValue(loc, val, 21, 2); break; case R_MIPS_PC26_S2: - checkAlignment(loc, val, 4, type); - checkInt(loc, val, 28, type); + checkAlignment(loc, val, 4, rel); + checkInt(loc, val, 28, rel); writeValue(loc, val, 26, 2); break; case R_MIPS_PC32: @@ -689,35 +692,35 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_MICROMIPS_26_S1: case R_MICROMIPS_PC26_S1: - checkInt(loc, val, 27, type); + checkInt(loc, val, 27, rel); writeShuffleValue<e>(loc, val, 26, 1); break; case R_MICROMIPS_PC7_S1: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); writeMicroRelocation16<e>(loc, val, 7, 1); break; case R_MICROMIPS_PC10_S1: - checkInt(loc, val, 11, type); + checkInt(loc, val, 11, rel); writeMicroRelocation16<e>(loc, val, 10, 1); break; case R_MICROMIPS_PC16_S1: - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); writeShuffleValue<e>(loc, val, 16, 1); break; case R_MICROMIPS_PC18_S3: - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); writeShuffleValue<e>(loc, val, 18, 3); break; case R_MICROMIPS_PC19_S2: - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); writeShuffleValue<e>(loc, val, 19, 2); break; case R_MICROMIPS_PC21_S1: - checkInt(loc, val, 22, type); + checkInt(loc, val, 22, rel); writeShuffleValue<e>(loc, val, 21, 1); break; case R_MICROMIPS_PC23_S2: - checkInt(loc, val, 25, type); + checkInt(loc, val, 25, rel); writeShuffleValue<e>(loc, val, 23, 2); break; default: @@ -731,7 +734,7 @@ template <class ELFT> bool MIPS<ELFT>::usesOnlyLowPageBits(RelType type) const { } // Return true if the symbol is a PIC function. -template <class ELFT> bool isMipsPIC(const Defined *sym) { +template <class ELFT> bool elf::isMipsPIC(const Defined *sym) { if (!sym->isFunc()) return false; @@ -749,20 +752,17 @@ template <class ELFT> bool isMipsPIC(const Defined *sym) { return file->getObj().getHeader()->e_flags & EF_MIPS_PIC; } -template <class ELFT> TargetInfo *getMipsTargetInfo() { +template <class ELFT> TargetInfo *elf::getMipsTargetInfo() { static MIPS<ELFT> target; return ⌖ } -template TargetInfo *getMipsTargetInfo<ELF32LE>(); -template TargetInfo *getMipsTargetInfo<ELF32BE>(); -template TargetInfo *getMipsTargetInfo<ELF64LE>(); -template TargetInfo *getMipsTargetInfo<ELF64BE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF32LE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF32BE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF64LE>(); +template TargetInfo *elf::getMipsTargetInfo<ELF64BE>(); -template bool isMipsPIC<ELF32LE>(const Defined *); -template bool isMipsPIC<ELF32BE>(const Defined *); -template bool isMipsPIC<ELF64LE>(const Defined *); -template bool isMipsPIC<ELF64BE>(const Defined *); - -} // namespace elf -} // namespace lld +template bool elf::isMipsPIC<ELF32LE>(const Defined *); +template bool elf::isMipsPIC<ELF32BE>(const Defined *); +template bool elf::isMipsPIC<ELF64LE>(const Defined *); +template bool elf::isMipsPIC<ELF64BE>(const Defined *); diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp index 923458afae0d..85329c3bef53 100644 --- a/lld/ELF/Arch/MipsArchTree.cpp +++ b/lld/ELF/Arch/MipsArchTree.cpp @@ -23,8 +23,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { struct ArchTreeEdge { @@ -294,7 +294,7 @@ static uint32_t getArchFlags(ArrayRef<FileFlags> files) { return ret; } -template <class ELFT> uint32_t calcMipsEFlags() { +template <class ELFT> uint32_t elf::calcMipsEFlags() { std::vector<FileFlags> v; for (InputFile *f : objectFiles) v.push_back({f, cast<ObjFile<ELFT>>(f)->getObj().getHeader()->e_flags}); @@ -350,7 +350,8 @@ static StringRef getMipsFpAbiName(uint8_t fpAbi) { } } -uint8_t getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, StringRef fileName) { +uint8_t elf::getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, + StringRef fileName) { if (compareMipsFpAbi(newFlag, oldFlag) >= 0) return newFlag; if (compareMipsFpAbi(oldFlag, newFlag) < 0) @@ -366,7 +367,7 @@ template <class ELFT> static bool isN32Abi(const InputFile *f) { return false; } -bool isMipsN32Abi(const InputFile *f) { +bool elf::isMipsN32Abi(const InputFile *f) { switch (config->ekind) { case ELF32LEKind: return isN32Abi<ELF32LE>(f); @@ -381,17 +382,14 @@ bool isMipsN32Abi(const InputFile *f) { } } -bool isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; } +bool elf::isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; } -bool isMipsR6() { +bool elf::isMipsR6() { uint32_t arch = config->eflags & EF_MIPS_ARCH; return arch == EF_MIPS_ARCH_32R6 || arch == EF_MIPS_ARCH_64R6; } -template uint32_t calcMipsEFlags<ELF32LE>(); -template uint32_t calcMipsEFlags<ELF32BE>(); -template uint32_t calcMipsEFlags<ELF64LE>(); -template uint32_t calcMipsEFlags<ELF64BE>(); - -} // namespace elf -} // namespace lld +template uint32_t elf::calcMipsEFlags<ELF32LE>(); +template uint32_t elf::calcMipsEFlags<ELF32BE>(); +template uint32_t elf::calcMipsEFlags<ELF64LE>(); +template uint32_t elf::calcMipsEFlags<ELF64BE>(); diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 3c0b0c290b58..a004cf74ddd8 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -17,9 +17,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class PPC final : public TargetInfo { @@ -44,14 +43,19 @@ public: int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; int getTlsGdRelaxSkip(RelType type) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -66,7 +70,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) { write32(config->isLE ? loc : loc - 2, insn); } -void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { +void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { + // Create canonical PLT entries for non-PIE code. Compilers don't generate + // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE. + uint32_t glink = in.plt->getVA(); // VA of .glink + if (!config->isPic) { + for (const Symbol *sym : cast<PPC32GlinkSection>(in.plt)->canonical_plts) { + writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); + buf += 16; + glink += 16; + } + } + // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an // absolute address from a specific .plt slot (usually called .got.plt on // other targets) and jumps there. @@ -85,15 +100,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // computes the PLT index (by computing the distance from the landing b to // itself) and calls _dl_runtime_resolve() (in glibc). uint32_t got = in.got->getVA(); - uint32_t glink = in.plt->getVA(); // VA of .glink const uint8_t *end = buf + 64; if (config->isPic) { - uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12; + uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12; uint32_t gotBcl = got + 4 - (glink + afterBcl); write32(buf + 0, 0x3d6b0000 | ha(afterBcl)); // addis r11,r11,1f-glink@ha write32(buf + 4, 0x7c0802a6); // mflr r0 write32(buf + 8, 0x429f0005); // bcl 20,30,.+4 - write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l + write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l write32(buf + 16, 0x7d8802a6); // mflr r12 write32(buf + 20, 0x7c0803a6); // mtlr r0 write32(buf + 24, 0x7d6c5850); // sub r11,r11,r12 @@ -113,16 +127,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { buf += 56; } else { write32(buf + 0, 0x3d800000 | ha(got + 4)); // lis r12,GOT+4@ha - write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-Glink@ha + write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-glink@ha if (ha(got + 4) == ha(got + 8)) write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12) else write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12) - write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-Glink@l + write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-glink@l write32(buf + 16, 0x7c0903a6); // mtctr r0 write32(buf + 20, 0x7c0b5a14); // add r0,r11,r11 if (ha(got + 4) == ha(got + 8)) - write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12) + write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12) else write32(buf + 24, 0x818c0000 | 4); // lwz r12,4(r12) write32(buf + 28, 0x7d605a14); // add r11,r0,r11 @@ -136,6 +150,7 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { } PPC::PPC() { + copyRel = R_PPC_COPY; gotRel = R_PPC_GLOB_DAT; noneRel = R_PPC_NONE; pltRel = R_PPC_JMP_SLOT; @@ -145,7 +160,7 @@ PPC::PPC() { gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 3; gotPltHeaderEntriesNum = 0; - pltHeaderSize = 64; // size of PLTresolve in .glink + pltHeaderSize = 0; pltEntrySize = 4; ipltEntrySize = 16; @@ -177,25 +192,25 @@ void PPC::writeGotHeader(uint8_t *buf) const { void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { // Address of the symbol resolver stub in .glink . - write32(buf, in.plt->getVA() + 4 * s.pltIndex); + write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex); } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { - if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) + uint64_t branchAddr, const Symbol &s, int64_t a) const { + if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) return true; if (s.isUndefWeak()) return false; - return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA())); + return !PPC::inBranchRange(type, branchAddr, s.getVA(a)); } uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; } bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { uint64_t offset = dst - src; - if (type == R_PPC_REL24 || type == R_PPC_PLTREL24) + if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } @@ -218,13 +233,13 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s, return R_DTPREL; case R_PPC_REL14: case R_PPC_REL32: - case R_PPC_LOCAL24PC: case R_PPC_REL16_LO: case R_PPC_REL16_HI: case R_PPC_REL16_HA: return R_PC; case R_PPC_GOT16: return R_GOT_OFF; + case R_PPC_LOCAL24PC: case R_PPC_REL24: return R_PLT_PC; case R_PPC_PLTREL24: @@ -277,12 +292,12 @@ static std::pair<RelType, uint64_t> fromDTPREL(RelType type, uint64_t val) { } } -void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { RelType newType; - std::tie(newType, val) = fromDTPREL(type, val); + std::tie(newType, val) = fromDTPREL(rel.type, val); switch (newType) { case R_PPC_ADDR16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC_GOT16: @@ -290,7 +305,7 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC_GOT_TLSLD16: case R_PPC_GOT_TPREL16: case R_PPC_TPREL16: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC_ADDR16_HA: @@ -326,8 +341,8 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_PPC_REL14: { uint32_t mask = 0x0000FFFC; - checkInt(loc, val, 16, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 16, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } @@ -335,8 +350,8 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC_LOCAL24PC: case R_PPC_PLTREL24: { uint32_t mask = 0x03FFFFFC; - checkInt(loc, val, 26, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 26, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } @@ -368,13 +383,14 @@ int PPC::getTlsGdRelaxSkip(RelType type) const { return 1; } -void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TLSGD16: { // addi rT, rA, x@got@tlsgd --> lwz rT, x@got@tprel(rA) uint32_t insn = readFromHalf16(loc); writeFromHalf16(loc, 0x80000000 | (insn & 0x03ff0000)); - relocateOne(loc, R_PPC_GOT_TPREL16, val); + relocateNoSym(loc, R_PPC_GOT_TPREL16, val); break; } case R_PPC_TLSGD: @@ -386,8 +402,9 @@ void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { } } -void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TLSGD16: // addi r3, r31, x@got@tlsgd --> addis r3, r2, x@tprel@ha writeFromHalf16(loc, 0x3c620000 | ha(val)); @@ -401,8 +418,9 @@ void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TLSLD16: // addi r3, rA, x@got@tlsgd --> addis r3, r2, 0 writeFromHalf16(loc, 0x3c620000); @@ -417,15 +435,16 @@ void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC_DTPREL16_HA: case R_PPC_DTPREL16_HI: case R_PPC_DTPREL16_LO: - relocateOne(loc, type, val); + relocate(loc, rel, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } -void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC_GOT_TPREL16: { // lwz rT, x@got@tprel(rA) --> addis rT, r2, x@tprel@ha uint32_t rt = readFromHalf16(loc) & 0x03e00000; @@ -448,10 +467,7 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -TargetInfo *getPPCTargetInfo() { +TargetInfo *elf::getPPCTargetInfo() { static PPC target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index da77a4ddaddf..71c568088fb9 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -6,20 +6,21 @@ // //===----------------------------------------------------------------------===// +#include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "Thunks.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" #include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; static uint64_t ppc64TocOffset = 0x8000; static uint64_t dynamicThreadPointerOffset = 0x8000; @@ -61,7 +62,7 @@ enum DFormOpcd { ADDI = 14 }; -uint64_t getPPC64TocBase() { +uint64_t elf::getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a // .got when we see a relocation that uses it, so for us the start is always @@ -75,7 +76,7 @@ uint64_t getPPC64TocBase() { return tocVA + ppc64TocOffset; } -unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { +unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { // The offset is encoded into the 3 most significant bits of the st_other // field, with some special values described in section 3.4.1 of the ABI: // 0 --> Zero offset between the GEP and LEP, and the function does NOT use @@ -100,11 +101,89 @@ unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { return 0; } -bool isPPC64SmallCodeModelTocReloc(RelType type) { +bool elf::isPPC64SmallCodeModelTocReloc(RelType type) { // The only small code model relocations that access the .toc section. return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } +static bool addOptional(StringRef name, uint64_t value, + std::vector<Defined *> &defined) { + Symbol *sym = symtab->find(name); + if (!sym || sym->isDefined()) + return false; + sym->resolve(Defined{/*file=*/nullptr, saver.save(name), STB_GLOBAL, + STV_HIDDEN, STT_FUNC, value, + /*size=*/0, /*section=*/nullptr}); + defined.push_back(cast<Defined>(sym)); + return true; +} + +// If from is 14, write ${prefix}14: firstInsn; ${prefix}15: +// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)*0x200008; $tail +// The labels are defined only if they exist in the symbol table. +static void writeSequence(MutableArrayRef<uint32_t> buf, const char *prefix, + int from, uint32_t firstInsn, + ArrayRef<uint32_t> tail) { + std::vector<Defined *> defined; + char name[16]; + int first; + uint32_t *ptr = buf.data(); + for (int r = from; r < 32; ++r) { + format("%s%d", prefix, r).snprint(name, sizeof(name)); + if (addOptional(name, 4 * (r - from), defined) && defined.size() == 1) + first = r - from; + write32(ptr++, firstInsn + 0x200008 * (r - from)); + } + for (uint32_t insn : tail) + write32(ptr++, insn); + assert(ptr == &*buf.end()); + + if (defined.empty()) + return; + // The full section content has the extent of [begin, end). We drop unused + // instructions and write [first,end). + auto *sec = make<InputSection>( + nullptr, SHF_ALLOC, SHT_PROGBITS, 4, + makeArrayRef(reinterpret_cast<uint8_t *>(buf.data() + first), + 4 * (buf.size() - first)), + ".text"); + inputSections.push_back(sec); + for (Defined *sym : defined) { + sym->section = sec; + sym->value -= 4 * first; + } +} + +// Implements some save and restore functions as described by ELF V2 ABI to be +// compatible with GCC. With GCC -Os, when the number of call-saved registers +// exceeds a certain threshold, GCC generates _savegpr0_* _restgpr0_* calls and +// expects the linker to define them. See +// https://sourceware.org/pipermail/binutils/2002-February/017444.html and +// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is +// weird because libgcc.a would be the natural place. The linker generation +// approach has the advantage that the linker can generate multiple copies to +// avoid long branch thunks. However, we don't consider the advantage +// significant enough to complicate our trunk implementation, so we take the +// simple approach and synthesize .text sections providing the implementation. +void elf::addPPC64SaveRestore() { + static uint32_t savegpr0[20], restgpr0[21], savegpr1[19], restgpr1[19]; + constexpr uint32_t blr = 0x4e800020, mtlr_0 = 0x7c0803a6; + + // _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ... + // Tail: ld 0, 16(1); mtlr 0; blr + writeSequence(restgpr0, "_restgpr0_", 14, 0xe9c1ff70, + {0xe8010010, mtlr_0, blr}); + // _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ... + // Tail: blr + writeSequence(restgpr1, "_restgpr1_", 14, 0xe9ccff70, {blr}); + // _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ... + // Tail: std 0, 16(1); blr + writeSequence(savegpr0, "_savegpr0_", 14, 0xf9c1ff70, {0xf8010010, blr}); + // _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ... + // Tail: blr + writeSequence(savegpr1, "_savegpr1_", 14, 0xf9ccff70, {blr}); +} + // Find the R_PPC64_ADDR64 in .rela.toc with matching offset. template <typename ELFT> static std::pair<Defined *, int64_t> @@ -137,7 +216,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { // When accessing a symbol defined in another translation unit, compilers // reserve a .toc entry, allocate a local label and generate toc-indirect -// instuctions: +// instructions: // // addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA // ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry @@ -155,8 +234,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { // ld/lwa 3, 0(3) # load the value from the address // // Returns true if the relaxation is performed. -bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, - uint8_t *bufLoc) { +bool elf::tryRelaxPPC64TocIndirection(const Relocation &rel, uint8_t *bufLoc) { assert(config->tocOptimize); if (rel.addend < 0) return false; @@ -186,8 +264,8 @@ bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, if (!isInt<32>(tocRelative)) return false; - // Add PPC64TocOffset that will be subtracted by relocateOne(). - target->relaxGot(bufLoc, type, tocRelative + ppc64TocOffset); + // Add PPC64TocOffset that will be subtracted by PPC64::relocate(). + target->relaxGot(bufLoc, rel, tocRelative + ppc64TocOffset); return true; } @@ -205,7 +283,8 @@ public: uint64_t pltEntryAddr) const override; void writeIplt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; void writeGotHeader(uint8_t *buf) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, @@ -214,11 +293,16 @@ public: bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; @@ -292,7 +376,22 @@ static uint32_t readFromHalf16(const uint8_t *loc) { return read32(config->isLE ? loc : loc - 2); } +// The prefixed instruction is always a 4 byte prefix followed by a 4 byte +// instruction. Therefore, the prefix is always in lower memory than the +// instruction (regardless of endianness). +// As a result, we need to shift the pieces around on little endian machines. +static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) { + insn = config->isLE ? insn << 32 | insn >> 32 : insn; + write64(loc, insn); +} + +static uint64_t readPrefixedInstruction(const uint8_t *loc) { + uint64_t fullInstr = read64(loc); + return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr; +} + PPC64::PPC64() { + copyRel = R_PPC64_COPY; gotRel = R_PPC64_GLOB_DAT; noneRel = R_PPC64_NONE; pltRel = R_PPC64_JMP_SLOT; @@ -364,11 +463,11 @@ uint32_t PPC64::calcEFlags() const { return 2; } -void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_PPC64_TOC16_HA: // Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop". - relocateOne(loc, type, val); + relocate(loc, rel, val); break; case R_PPC64_TOC16_LO_DS: { // Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or @@ -377,7 +476,7 @@ void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { if (getPrimaryOpCode(insn) != LD) error("expected a 'ld' for got-indirect to toc-relative relaxing"); writeFromHalf16(loc, (insn & 0x03ffffff) | 0x38000000); - relocateOne(loc, R_PPC64_TOC16_LO, val); + relocateNoSym(loc, R_PPC64_TOC16_LO, val); break; } default: @@ -385,7 +484,8 @@ void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { } } -void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement. // The general dynamic code sequence for a global `x` will look like: // Instruction Relocation Symbol @@ -401,14 +501,14 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, x@tprel@l - switch (type) { + switch (rel.type) { case R_PPC64_GOT_TLSGD16_HA: writeFromHalf16(loc, 0x60000000); // nop break; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13 - relocateOne(loc, R_PPC64_TPREL16_HA, val); + relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; case R_PPC64_TLSGD: write32(loc, 0x60000000); // nop @@ -416,15 +516,16 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // Since we are relocating a half16 type relocation and Loc + 4 points to // the start of an instruction we need to advance the buffer by an extra // 2 bytes on BE. - relocateOne(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), - R_PPC64_TPREL16_LO, val); + relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0), + R_PPC64_TPREL16_LO, val); break; default: llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); } } -void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement. // The local dynamic code sequence for a global `x` will look like: // Instruction Relocation Symbol @@ -440,7 +541,7 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { // bl __tls_get_addr(x@tlsgd) into nop // nop into addi r3, r3, 4096 - switch (type) { + switch (rel.type) { case R_PPC64_GOT_TLSLD16_HA: writeFromHalf16(loc, 0x60000000); // nop break; @@ -457,14 +558,14 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC64_DTPREL16_DS: case R_PPC64_DTPREL16_LO: case R_PPC64_DTPREL16_LO_DS: - relocateOne(loc, type, val); + relocate(loc, rel, val); break; default: llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); } } -unsigned getPPCDFormOp(unsigned secondaryOp) { +unsigned elf::getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: return LBZ; @@ -489,7 +590,8 @@ unsigned getPPCDFormOp(unsigned secondaryOp) { } } -void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // The initial exec code sequence for a global `x` will look like: // Instruction Relocation Symbol // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x @@ -510,7 +612,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { // indexed load or store instructions. unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0; - switch (type) { + switch (rel.type) { case R_PPC64_GOT_TPREL16_HA: write32(loc - offset, 0x60000000); // nop break; @@ -518,7 +620,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC64_GOT_TPREL16_DS: { uint32_t regNo = read32(loc - offset) & 0x03E00000; // bits 6-10 write32(loc - offset, 0x3C0D0000 | regNo); // addis RegNo, r13 - relocateOne(loc, R_PPC64_TPREL16_HA, val); + relocateNoSym(loc, R_PPC64_TPREL16_HA, val); break; } case R_PPC64_TLS: { @@ -530,7 +632,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { if (dFormOp == 0) error("unrecognized instruction for IE to LE R_PPC64_TLS"); write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF))); - relocateOne(loc + offset, R_PPC64_TPREL16_LO, val); + relocateNoSym(loc + offset, R_PPC64_TPREL16_LO, val); break; } default: @@ -569,6 +671,8 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_TOC16_HI: case R_PPC64_TOC16_LO: return R_GOTREL; + case R_PPC64_GOT_PCREL34: + return R_GOT_PC; case R_PPC64_TOC16_HA: case R_PPC64_TOC16_LO_DS: return config->tocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL; @@ -577,11 +681,14 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s, case R_PPC64_REL14: case R_PPC64_REL24: return R_PPC64_CALL_PLT; + case R_PPC64_REL24_NOTOC: + return R_PLT_PC; case R_PPC64_REL16_LO: case R_PPC64_REL16_HA: case R_PPC64_REL16_HI: case R_PPC64_REL32: case R_PPC64_REL64: + case R_PPC64_PCREL34: return R_PC; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_HA: @@ -769,11 +876,8 @@ static bool isTocOptType(RelType type) { } } -void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - // We need to save the original relocation type to use in diagnostics, and - // use the original type to determine if we should toc-optimize the - // instructions being relocated. - RelType originalType = type; +void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + RelType type = rel.type; bool shouldTocOptimize = isTocOptType(type); // For dynamic thread pointer relative, toc-relative, and got-indirect // relocations, proceed in terms of the corresponding ADDR16 relocation type. @@ -781,27 +885,27 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { switch (type) { case R_PPC64_ADDR14: { - checkAlignment(loc, val, 4, type); + checkAlignment(loc, val, 4, rel); // Preserve the AA/LK bits in the branch instruction uint8_t aalk = loc[3]; write16(loc + 2, (aalk & 3) | (val & 0xfffc)); break; } case R_PPC64_ADDR16: - checkIntUInt(loc, val, 16, originalType); + checkIntUInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC64_ADDR32: - checkIntUInt(loc, val, 32, originalType); + checkIntUInt(loc, val, 32, rel); write32(loc, val); break; case R_PPC64_ADDR16_DS: case R_PPC64_TPREL16_DS: { - checkInt(loc, val, 16, originalType); + checkInt(loc, val, 16, rel); // DQ-form instructions use bits 28-31 as part of the instruction encoding // DS-form instructions only use bits 30-31. uint16_t mask = isDQFormInstruction(readFromHalf16(loc)) ? 0xf : 0x3; - checkAlignment(loc, lo(val), mask + 1, originalType); + checkAlignment(loc, lo(val), mask + 1, rel); write16(loc, (read16(loc) & mask) | lo(val)); } break; case R_PPC64_ADDR16_HA: @@ -856,7 +960,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // DS-form instructions only use bits 30-31. uint32_t insn = readFromHalf16(loc); uint16_t mask = isDQFormInstruction(insn) ? 0xf : 0x3; - checkAlignment(loc, lo(val), mask + 1, originalType); + checkAlignment(loc, lo(val), mask + 1, rel); if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) { // When the high-adjusted part of a toc relocation evaluates to 0, it is // changed into a nop. The lo part then needs to be updated to use the toc @@ -872,11 +976,11 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } break; case R_PPC64_TPREL16: - checkInt(loc, val, 16, originalType); + checkInt(loc, val, 16, rel); write16(loc, val); break; case R_PPC64_REL32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32(loc, val); break; case R_PPC64_ADDR64: @@ -886,21 +990,44 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_PPC64_REL14: { uint32_t mask = 0x0000FFFC; - checkInt(loc, val, 16, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 16, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } - case R_PPC64_REL24: { + case R_PPC64_REL24: + case R_PPC64_REL24_NOTOC: { uint32_t mask = 0x03FFFFFC; - checkInt(loc, val, 26, type); - checkAlignment(loc, val, 4, type); + checkInt(loc, val, 26, rel); + checkAlignment(loc, val, 4, rel); write32(loc, (read32(loc) & ~mask) | (val & mask)); break; } case R_PPC64_DTPREL64: write64(loc, val - dynamicThreadPointerOffset); break; + case R_PPC64_PCREL34: { + const uint64_t si0Mask = 0x00000003ffff0000; + const uint64_t si1Mask = 0x000000000000ffff; + const uint64_t fullMask = 0x0003ffff0000ffff; + checkInt(loc, val, 34, rel); + + uint64_t instr = readPrefixedInstruction(loc) & ~fullMask; + writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) | + (val & si1Mask)); + break; + } + case R_PPC64_GOT_PCREL34: { + const uint64_t si0Mask = 0x00000003ffff0000; + const uint64_t si1Mask = 0x000000000000ffff; + const uint64_t fullMask = 0x0003ffff0000ffff; + checkInt(loc, val, 34, rel); + + uint64_t instr = readPrefixedInstruction(loc) & ~fullMask; + writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) | + (val & si1Mask)); + break; + } default: llvm_unreachable("unknown relocation"); } @@ -908,13 +1035,30 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint64_t branchAddr, const Symbol &s, int64_t a) const { - if (type != R_PPC64_REL14 && type != R_PPC64_REL24) + if (type != R_PPC64_REL14 && type != R_PPC64_REL24 && + type != R_PPC64_REL24_NOTOC) return false; + // FIXME: Remove the fatal error once the call protocol is implemented. + if (type == R_PPC64_REL24_NOTOC && s.isInPlt()) + fatal("unimplemented feature: external function call with the reltype" + " R_PPC64_REL24_NOTOC"); + // If a function is in the Plt it needs to be called with a call-stub. if (s.isInPlt()) return true; + // FIXME: Remove the fatal error once the call protocol is implemented. + if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1) + fatal("unimplemented feature: local function call with the reltype" + " R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup"); + + // This check looks at the st_other bits of the callee with relocation + // R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee + // clobbers the TOC and we need an R2 save stub. + if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1) + return true; + // If a symbol is a weak undefined and we are compiling an executable // it doesn't need a range-extending thunk since it can't be called. if (s.isUndefWeak() && !config->shared) @@ -940,7 +1084,7 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { int64_t offset = dst - src; if (type == R_PPC64_REL14) return isInt<16>(offset); - if (type == R_PPC64_REL24) + if (type == R_PPC64_REL24 || type == R_PPC64_REL24_NOTOC) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } @@ -971,12 +1115,13 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data, // thread pointer. // Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is // used as the relaxation hint for both steps 2 and 3. -void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_PPC64_GOT_TLSGD16_HA: // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to // addis rT, r2, sym@got@tprel@ha. - relocateOne(loc, R_PPC64_GOT_TPREL16_HA, val); + relocateNoSym(loc, R_PPC64_GOT_TPREL16_HA, val); return; case R_PPC64_GOT_TLSGD16: case R_PPC64_GOT_TLSGD16_LO: { @@ -984,7 +1129,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // ld r3, sym@got@tprel@l(rA) uint32_t ra = (readFromHalf16(loc) & (0x1f << 16)); writeFromHalf16(loc, 0xe8600000 | ra); - relocateOne(loc, R_PPC64_GOT_TPREL16_LO_DS, val); + relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val); return; } case R_PPC64_TLSGD: @@ -1103,10 +1248,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, return true; } -TargetInfo *getPPC64TargetInfo() { +TargetInfo *elf::getPPC64TargetInfo() { static PPC64 target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 42db8e08162d..b340fd00deee 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -15,9 +15,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { @@ -33,7 +32,8 @@ public: RelType getDynRel(RelType type) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // end anonymous namespace @@ -76,6 +76,7 @@ RISCV::RISCV() { noneRel = R_RISCV_NONE; pltRel = R_RISCV_JUMP_SLOT; relativeRel = R_RISCV_RELATIVE; + iRelativeRel = R_RISCV_IRELATIVE; if (config->is64) { symbolicRel = R_RISCV_64; tlsModuleIndexRel = R_RISCV_TLS_DTPMOD64; @@ -236,9 +237,15 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_LO12_S: return R_TLS; case R_RISCV_RELAX: - case R_RISCV_ALIGN: case R_RISCV_TPREL_ADD: return R_NONE; + case R_RISCV_ALIGN: + // Not just a hint; always padded to the worst-case number of NOPs, so may + // not currently be aligned, and without linker relaxation support we can't + // delete NOPs to realign. + errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " + "unimplemented linker relaxation; recompile with -mno-relax"); + return R_NONE; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -251,11 +258,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { return (v & ((1ULL << (begin + 1)) - 1)) >> end; } -void RISCV::relocateOne(uint8_t *loc, const RelType type, - const uint64_t val) const { +void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { const unsigned bits = config->wordsize * 8; - switch (type) { + switch (rel.type) { case R_RISCV_32: write32le(loc, val); return; @@ -264,8 +270,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, return; case R_RISCV_RVC_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 8, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 8, rel); + checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE383; uint16_t imm8 = extractBits(val, 8, 8) << 12; uint16_t imm4_3 = extractBits(val, 4, 3) << 10; @@ -279,8 +285,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } case R_RISCV_RVC_JUMP: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 11, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 11, rel); + checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE003; uint16_t imm11 = extractBits(val, 11, 11) << 12; uint16_t imm4 = extractBits(val, 4, 4) << 11; @@ -298,7 +304,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_RVC_LUI: { int64_t imm = SignExtend64(val + 0x800, bits) >> 12; - checkInt(loc, imm, 6, type); + checkInt(loc, imm, 6, rel); if (imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0` write16le(loc, (read16le(loc) & 0x0F83) | 0x4000); } else { @@ -310,8 +316,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } case R_RISCV_JAL: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 20, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 20, rel); + checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0xFFF; uint32_t imm20 = extractBits(val, 20, 20) << 31; @@ -325,8 +331,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } case R_RISCV_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 12, type); - checkAlignment(loc, val, 2, type); + checkInt(loc, static_cast<int64_t>(val) >> 1, 12, rel); + checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0x1FFF07F; uint32_t imm12 = extractBits(val, 12, 12) << 31; @@ -343,10 +349,10 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_CALL: case R_RISCV_CALL_PLT: { int64_t hi = SignExtend64(val + 0x800, bits) >> 12; - checkInt(loc, hi, 20, type); + checkInt(loc, hi, 20, rel); if (isInt<20>(hi)) { - relocateOne(loc, R_RISCV_PCREL_HI20, val); - relocateOne(loc + 4, R_RISCV_PCREL_LO12_I, val); + relocateNoSym(loc, R_RISCV_PCREL_HI20, val); + relocateNoSym(loc + 4, R_RISCV_PCREL_LO12_I, val); } return; } @@ -358,7 +364,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_TPREL_HI20: case R_RISCV_HI20: { uint64_t hi = val + 0x800; - checkInt(loc, SignExtend64(hi, bits) >> 12, 20, type); + checkInt(loc, SignExtend64(hi, bits) >> 12, 20, rel); write32le(loc, (read32le(loc) & 0xFFF) | (hi & 0xFFFFF000)); return; } @@ -431,7 +437,6 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, write64le(loc, val - dtpOffset); break; - case R_RISCV_ALIGN: case R_RISCV_RELAX: return; // Ignored (for now) @@ -440,10 +445,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } } -TargetInfo *getRISCVTargetInfo() { +TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp index 08ef52099de9..f137c21fc898 100644 --- a/lld/ELF/Arch/SPARCV9.cpp +++ b/lld/ELF/Arch/SPARCV9.cpp @@ -16,9 +16,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class SPARCV9 final : public TargetInfo { @@ -28,7 +27,8 @@ public: const uint8_t *loc) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -54,6 +54,14 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s, case R_SPARC_UA32: case R_SPARC_64: case R_SPARC_UA64: + case R_SPARC_H44: + case R_SPARC_M44: + case R_SPARC_L44: + case R_SPARC_HH22: + case R_SPARC_HM10: + case R_SPARC_LM22: + case R_SPARC_HI22: + case R_SPARC_LO10: return R_ABS; case R_SPARC_PC10: case R_SPARC_PC22: @@ -68,6 +76,9 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s, return R_PLT_PC; case R_SPARC_NONE: return R_NONE; + case R_SPARC_TLS_LE_HIX22: + case R_SPARC_TLS_LE_LOX10: + return R_TLS; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -75,38 +86,45 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s, } } -void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void SPARCV9::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { case R_SPARC_32: case R_SPARC_UA32: // V-word32 - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); write32be(loc, val); break; case R_SPARC_DISP32: // V-disp32 - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32be(loc, val); break; case R_SPARC_WDISP30: case R_SPARC_WPLT30: // V-disp30 - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32be(loc, (read32be(loc) & ~0x3fffffff) | ((val >> 2) & 0x3fffffff)); break; case R_SPARC_22: // V-imm22 - checkUInt(loc, val, 22, type); + checkUInt(loc, val, 22, rel); write32be(loc, (read32be(loc) & ~0x003fffff) | (val & 0x003fffff)); break; case R_SPARC_GOT22: case R_SPARC_PC22: + case R_SPARC_LM22: // T-imm22 write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff)); break; + case R_SPARC_HI22: + // V-imm22 + checkUInt(loc, val >> 10, 22, rel); + write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff)); + break; case R_SPARC_WDISP19: // V-disp19 - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write32be(loc, (read32be(loc) & ~0x0007ffff) | ((val >> 2) & 0x0007ffff)); break; case R_SPARC_GOT10: @@ -114,11 +132,45 @@ void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // T-simm10 write32be(loc, (read32be(loc) & ~0x000003ff) | (val & 0x000003ff)); break; + case R_SPARC_LO10: + // T-simm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff)); + break; case R_SPARC_64: case R_SPARC_UA64: // V-xword64 write64be(loc, val); break; + case R_SPARC_HH22: + // V-imm22 + checkUInt(loc, val >> 42, 22, rel); + write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 42) & 0x003fffff)); + break; + case R_SPARC_HM10: + // T-simm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | ((val >> 32) & 0x000003ff)); + break; + case R_SPARC_H44: + // V-imm22 + checkUInt(loc, val >> 22, 22, rel); + write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 22) & 0x003fffff)); + break; + case R_SPARC_M44: + // T-imm10 + write32be(loc, (read32be(loc) & ~0x000003ff) | ((val >> 12) & 0x000003ff)); + break; + case R_SPARC_L44: + // T-imm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x00000fff)); + break; + case R_SPARC_TLS_LE_HIX22: + // T-imm22 + write32be(loc, (read32be(loc) & ~0x003fffff) | ((~val >> 10) & 0x003fffff)); + break; + case R_SPARC_TLS_LE_LOX10: + // T-simm13 + write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff) | 0x1C00); + break; default: llvm_unreachable("unknown relocation"); } @@ -139,14 +191,11 @@ void SPARCV9::writePlt(uint8_t *buf, const Symbol & /*sym*/, memcpy(buf, pltData, sizeof(pltData)); uint64_t off = pltEntryAddr - in.plt->getVA(); - relocateOne(buf, R_SPARC_22, off); - relocateOne(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize)); + relocateNoSym(buf, R_SPARC_22, off); + relocateNoSym(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize)); } -TargetInfo *getSPARCV9TargetInfo() { +TargetInfo *elf::getSPARCV9TargetInfo() { static SPARCV9 target; return ⌖ } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp index b4daedc0f5dc..8c8824d53cce 100644 --- a/lld/ELF/Arch/X86.cpp +++ b/lld/ELF/Arch/X86.cpp @@ -16,9 +16,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class X86 : public TargetInfo { @@ -35,14 +34,19 @@ public: void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -262,21 +266,21 @@ int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { } } -void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_386_8: // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are // being used for some 16-bit programs such as boot loaders, so // we want to support them. - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_386_PC8: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); *loc = val; break; case R_386_16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_386_PC16: @@ -290,7 +294,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // current location subtracted from it. // We just check that Val fits in 17 bits. This misses some cases, but // should have no false positives. - checkInt(loc, val, 17, type); + checkInt(loc, val, 17, rel); write16le(loc, val); break; case R_386_32: @@ -312,7 +316,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_386_TLS_LE_32: case R_386_TLS_TPOFF: case R_386_TLS_TPOFF32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32le(loc, val); break; default: @@ -320,7 +324,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const { // Convert // leal x@tlsgd(, %ebx, 1), // call __tls_get_addr@plt @@ -335,7 +339,7 @@ void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc + 5, val); } -void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { +void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const { // Convert // leal x@tlsgd(, %ebx, 1), // call __tls_get_addr@plt @@ -352,14 +356,15 @@ void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // In some conditions, relocations can be optimized to avoid using GOT. // This function does that for Initial Exec to Local Exec case. -void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { // Ulrich's document section 6.2 says that @gotntpoff can // be used with MOVL or ADDL instructions. // @indntpoff is similar to @gotntpoff, but for use in // position dependent code. uint8_t reg = (loc[-1] >> 3) & 7; - if (type == R_386_TLS_IE) { + if (rel.type == R_386_TLS_IE) { if (loc[-1] == 0xa1) { // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" // This case is different from the generic case below because @@ -375,7 +380,7 @@ void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { loc[-1] = 0xc0 | reg; } } else { - assert(type == R_386_TLS_GOTIE); + assert(rel.type == R_386_TLS_GOTIE); if (loc[-2] == 0x8b) { // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" loc[-2] = 0xc7; @@ -389,8 +394,9 @@ void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val); } -void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_386_TLS_LDO_32) { +void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_386_TLS_LDO_32) { write32le(loc, val); return; } @@ -608,7 +614,7 @@ void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, write32le(buf + 22, -off - 26); } -TargetInfo *getX86TargetInfo() { +TargetInfo *elf::getX86TargetInfo() { if (config->zRetpolineplt) { if (config->isPic) { static RetpolinePic t; @@ -626,6 +632,3 @@ TargetInfo *getX86TargetInfo() { static X86 t; return &t; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 74b72eb91293..24711ec210a4 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -18,9 +19,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class X86_64 : public TargetInfo { @@ -35,20 +35,44 @@ public: void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; + bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const override; }; } // namespace +// This is vector of NOP instructions of sizes from 1 to 8 bytes. The +// appropriately sized instructions are used to fill the gaps between sections +// which are executed during fall through. +static const std::vector<std::vector<uint8_t>> nopInstructions = { + {0x90}, + {0x66, 0x90}, + {0x0f, 0x1f, 0x00}, + {0x0f, 0x1f, 0x40, 0x00}, + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}}; + X86_64::X86_64() { copyRel = R_X86_64_COPY; gotRel = R_X86_64_GLOB_DAT; @@ -65,6 +89,7 @@ X86_64::X86_64() { pltEntrySize = 16; ipltEntrySize = 16; trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 + nopInstrs = nopInstructions; // Align to the large page size (known as a superpage or huge page). // FreeBSD automatically promotes large, superpage-aligned allocations. @@ -73,6 +98,216 @@ X86_64::X86_64() { int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +// Opcodes for the different X86_64 jmp instructions. +enum JmpInsnOpcode : uint32_t { + J_JMP_32, + J_JNE_32, + J_JE_32, + J_JG_32, + J_JGE_32, + J_JB_32, + J_JBE_32, + J_JL_32, + J_JLE_32, + J_JA_32, + J_JAE_32, + J_UNKNOWN, +}; + +// Given the first (optional) and second byte of the insn's opcode, this +// returns the corresponding enum value. +static JmpInsnOpcode getJmpInsnType(const uint8_t *first, + const uint8_t *second) { + if (*second == 0xe9) + return J_JMP_32; + + if (first == nullptr) + return J_UNKNOWN; + + if (*first == 0x0f) { + switch (*second) { + case 0x84: + return J_JE_32; + case 0x85: + return J_JNE_32; + case 0x8f: + return J_JG_32; + case 0x8d: + return J_JGE_32; + case 0x82: + return J_JB_32; + case 0x86: + return J_JBE_32; + case 0x8c: + return J_JL_32; + case 0x8e: + return J_JLE_32; + case 0x87: + return J_JA_32; + case 0x83: + return J_JAE_32; + } + } + return J_UNKNOWN; +} + +// Return the relocation index for input section IS with a specific Offset. +// Returns the maximum size of the vector if no such relocation is found. +static unsigned getRelocationWithOffset(const InputSection &is, + uint64_t offset) { + unsigned size = is.relocations.size(); + for (unsigned i = size - 1; i + 1 > 0; --i) { + if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE) + return i; + } + return size; +} + +// Returns true if R corresponds to a relocation used for a jump instruction. +// TODO: Once special relocations for relaxable jump instructions are available, +// this should be modified to use those relocations. +static bool isRelocationForJmpInsn(Relocation &R) { + return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 || + R.type == R_X86_64_PC8; +} + +// Return true if Relocation R points to the first instruction in the +// next section. +// TODO: Delete this once psABI reserves a new relocation type for fall thru +// jumps. +static bool isFallThruRelocation(InputSection &is, InputFile *file, + InputSection *nextIS, Relocation &r) { + if (!isRelocationForJmpInsn(r)) + return false; + + uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset; + uint64_t targetOffset = InputSectionBase::getRelocTargetVA( + file, r.type, r.addend, addrLoc, *r.sym, r.expr); + + // If this jmp is a fall thru, the target offset is the beginning of the + // next section. + uint64_t nextSectionOffset = + nextIS->getOutputSection()->addr + nextIS->outSecOff; + return (addrLoc + 4 + targetOffset) == nextSectionOffset; +} + +// Return the jmp instruction opcode that is the inverse of the given +// opcode. For example, JE inverted is JNE. +static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) { + switch (opcode) { + case J_JE_32: + return J_JNE_32; + case J_JNE_32: + return J_JE_32; + case J_JG_32: + return J_JLE_32; + case J_JGE_32: + return J_JL_32; + case J_JB_32: + return J_JAE_32; + case J_JBE_32: + return J_JA_32; + case J_JL_32: + return J_JGE_32; + case J_JLE_32: + return J_JG_32; + case J_JA_32: + return J_JBE_32; + case J_JAE_32: + return J_JB_32; + default: + return J_UNKNOWN; + } +} + +// Deletes direct jump instruction in input sections that jumps to the +// following section as it is not required. If there are two consecutive jump +// instructions, it checks if they can be flipped and one can be deleted. +// For example: +// .section .text +// a.BB.foo: +// ... +// 10: jne aa.BB.foo +// 16: jmp bar +// aa.BB.foo: +// ... +// +// can be converted to: +// a.BB.foo: +// ... +// 10: je bar #jne flipped to je and the jmp is deleted. +// aa.BB.foo: +// ... +bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const { + const unsigned sizeOfDirectJmpInsn = 5; + + if (nextIS == nullptr) + return false; + + if (is.getSize() < sizeOfDirectJmpInsn) + return false; + + // If this jmp insn can be removed, it is the last insn and the + // relocation is 4 bytes before the end. + unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4); + if (rIndex == is.relocations.size()) + return false; + + Relocation &r = is.relocations[rIndex]; + + // Check if the relocation corresponds to a direct jmp. + const uint8_t *secContents = is.data().data(); + // If it is not a direct jmp instruction, there is nothing to do here. + if (*(secContents + r.offset - 1) != 0xe9) + return false; + + if (isFallThruRelocation(is, file, nextIS, r)) { + // This is a fall thru and can be deleted. + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; + } + + // Now, check if flip and delete is possible. + const unsigned sizeOfJmpCCInsn = 6; + // To flip, there must be atleast one JmpCC and one direct jmp. + if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn) + return 0; + + unsigned rbIndex = + getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4)); + if (rbIndex == is.relocations.size()) + return 0; + + Relocation &rB = is.relocations[rbIndex]; + + const uint8_t *jmpInsnB = secContents + rB.offset - 1; + JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB); + if (jmpOpcodeB == J_UNKNOWN) + return false; + + if (!isFallThruRelocation(is, file, nextIS, rB)) + return false; + + // jmpCC jumps to the fall thru block, the branch can be flipped and the + // jmp can be deleted. + JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB); + if (jInvert == J_UNKNOWN) + return false; + is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4}); + // Move R's values to rB except the offset. + rB = {r.expr, r.type, rB.offset, r.addend, r.sym}; + // Cancel R + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) @@ -177,8 +412,9 @@ RelType X86_64::getDynRel(RelType type) const { return R_X86_64_NONE; } -void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_TLSGD) { +void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_TLSGD) { // Convert // .byte 0x66 // leaq x@tlsgd(%rip), %rdi @@ -201,7 +437,7 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // lea x@tlsgd(%rip), %rax // call *(%rax) // to the following two instructions. - assert(type == R_X86_64_GOTPC32_TLSDESC); + assert(rel.type == R_X86_64_GOTPC32_TLSDESC); if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " "in callq *x@tlsdesc(%rip), %rax"); @@ -217,8 +453,9 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_TLSGD) { +void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_TLSGD) { // Convert // .byte 0x66 // leaq x@tlsgd(%rip), %rdi @@ -241,7 +478,7 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // lea x@tlsgd(%rip), %rax // call *(%rax) // to the following two instructions. - assert(type == R_X86_64_GOTPC32_TLSDESC); + assert(rel.type == R_X86_64_GOTPC32_TLSDESC); if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " "in callq *x@tlsdesc(%rip), %rax"); @@ -258,7 +495,8 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // R_X86_64_TPOFF32 so that it does not use GOT. -void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &, + uint64_t val) const { uint8_t *inst = loc - 3; uint8_t reg = loc[-1] >> 3; uint8_t *regSlot = loc - 1; @@ -299,12 +537,13 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val + 4); } -void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_DTPOFF64) { +void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_DTPOFF64) { write64le(loc, val); return; } - if (type == R_X86_64_DTPOFF32) { + if (rel.type == R_X86_64_DTPOFF32) { write32le(loc, val); return; } @@ -347,26 +586,114 @@ void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } -void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +// A JumpInstrMod at a specific offset indicates that the jump instruction +// opcode at that offset must be modified. This is specifically used to relax +// jump instructions with basic block sections. This function looks at the +// JumpMod and effects the change. +void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const { switch (type) { + case J_JMP_32: + if (size == 4) + *loc = 0xe9; + else + *loc = 0xeb; + break; + case J_JE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x84; + } else + *loc = 0x74; + break; + case J_JNE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x85; + } else + *loc = 0x75; + break; + case J_JG_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8f; + } else + *loc = 0x7f; + break; + case J_JGE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8d; + } else + *loc = 0x7d; + break; + case J_JB_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x82; + } else + *loc = 0x72; + break; + case J_JBE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x86; + } else + *loc = 0x76; + break; + case J_JL_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8c; + } else + *loc = 0x7c; + break; + case J_JLE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8e; + } else + *loc = 0x7e; + break; + case J_JA_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x87; + } else + *loc = 0x77; + break; + case J_JAE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x83; + } else + *loc = 0x73; + break; + case J_UNKNOWN: + llvm_unreachable("Unknown Jump Relocation"); + } +} + +void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_X86_64_8: - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_X86_64_PC8: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); *loc = val; break; case R_X86_64_16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_X86_64_PC16: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); write16le(loc, val); break; case R_X86_64_32: - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); write32le(loc, val); break; case R_X86_64_32S: @@ -384,7 +711,7 @@ void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_X86_64_TLSLD: case R_X86_64_DTPOFF32: case R_X86_64_SIZE32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32le(loc, val); break; case R_X86_64_64: @@ -495,7 +822,7 @@ static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, write32le(loc, val); } -void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { +void X86_64::relaxGot(uint8_t *loc, const Relocation &, uint64_t val) const { const uint8_t op = loc[-2]; const uint8_t modRm = loc[-1]; @@ -758,7 +1085,4 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); } - -} // namespace elf -} // namespace lld +TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); } diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index 6dad7c965f1a..21c641b5161f 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -48,9 +48,8 @@ #include <numeric> using namespace llvm; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { struct Edge { @@ -263,11 +262,8 @@ DenseMap<const InputSectionBase *, int> CallGraphSort::run() { // Sort sections by the profile data provided by -callgraph-profile-file // // This first builds a call graph based on the profile data then merges sections -// according to the C³ huristic. All clusters are then sorted by a density +// according to the C³ heuristic. All clusters are then sorted by a density // metric to further improve locality. -DenseMap<const InputSectionBase *, int> computeCallGraphProfileOrder() { +DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() { return CallGraphSort().run(); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 06ba88a83dd4..e74a4a0c5b22 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -17,6 +17,7 @@ #include "llvm/Support/CachePruning.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/GlobPattern.h" #include <atomic> #include <vector> @@ -90,6 +91,7 @@ struct Configuration { uint32_t andFeatures = 0; llvm::CachePruningPolicy thinLTOCachePolicy; llvm::StringMap<uint64_t> sectionStartMap; + llvm::StringRef bfdname; llvm::StringRef chroot; llvm::StringRef dynamicLinker; llvm::StringRef dwoDir; @@ -108,11 +110,13 @@ struct Configuration { llvm::StringRef optRemarksPasses; llvm::StringRef optRemarksFormat; llvm::StringRef progName; + llvm::StringRef printArchiveStats; llvm::StringRef printSymbolOrder; llvm::StringRef soName; llvm::StringRef sysroot; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; + llvm::StringRef ltoBasicBlockSections; std::pair<llvm::StringRef, llvm::StringRef> thinLTOObjectSuffixReplace; std::pair<llvm::StringRef, llvm::StringRef> thinLTOPrefixReplace; std::string rpath; @@ -121,6 +125,7 @@ struct Configuration { std::vector<llvm::StringRef> filterList; std::vector<llvm::StringRef> searchPaths; std::vector<llvm::StringRef> symbolOrderingFile; + std::vector<llvm::StringRef> thinLTOModulesToCompile; std::vector<llvm::StringRef> undefined; std::vector<SymbolVersion> dynamicList; std::vector<uint8_t> buildIdVector; @@ -140,6 +145,7 @@ struct Configuration { bool checkSections; bool compressDebugSections; bool cref; + std::vector<std::pair<llvm::GlobPattern, uint64_t>> deadRelocInNonAlloc; bool defineCommon; bool demangle = true; bool dependentLibraries; @@ -152,29 +158,31 @@ struct Configuration { bool exportDynamic; bool fixCortexA53Errata843419; bool fixCortexA8; - bool forceBTI; bool formatBinary = false; bool gcSections; bool gdbIndex; bool gnuHash = false; bool gnuUnique; - bool hasDynamicList = false; bool hasDynSymTab; bool ignoreDataAddressEquality; bool ignoreFunctionAddressEquality; bool ltoCSProfileGenerate; bool ltoDebugPassManager; + bool ltoEmitAsm; bool ltoNewPassManager; + bool ltoUniqueBasicBlockSectionNames; + bool ltoWholeProgramVisibility; bool mergeArmExidx; bool mipsN32Abi = false; bool mmapOutputFile; bool nmagic; + bool noDynamicLinker = false; bool noinhibitExec; bool nostdlib; bool oFormatBinary; bool omagic; + bool optimizeBBJumps; bool optRemarksWithHotness; - bool pacPlt; bool picThunk; bool pie; bool printGcSections; @@ -182,18 +190,23 @@ struct Configuration { bool relocatable; bool relrPackDynRelocs; bool saveTemps; + llvm::Optional<uint32_t> shuffleSectionSeed; bool singleRoRx; bool shared; + bool symbolic; bool isStatic = false; bool sysvHash = false; bool target1Rel; bool trace; bool thinLTOEmitImportsFiles; bool thinLTOIndexOnly; + bool timeTraceEnabled; bool tocOptimize; bool undefinedVersion; + bool unique; bool useAndroidRelrTags = false; bool warnBackrefs; + std::vector<llvm::GlobPattern> warnBackrefsExclude; bool warnCommon; bool warnIfuncTextrel; bool warnMissingEntry; @@ -201,6 +214,7 @@ struct Configuration { bool writeAddends; bool zCombreloc; bool zCopyreloc; + bool zForceBti; bool zForceIbt; bool zGlobal; bool zHazardplt; @@ -213,9 +227,11 @@ struct Configuration { bool zNodlopen; bool zNow; bool zOrigin; + bool zPacPlt; bool zRelro; bool zRodynamic; bool zShstk; + uint8_t zStartStopVisibility; bool zText; bool zRetpolineplt; bool zWxneeded; @@ -240,7 +256,8 @@ struct Configuration { unsigned ltoPartitions; unsigned ltoo; unsigned optimize; - unsigned thinLTOJobs; + StringRef thinLTOJobs; + unsigned timeTraceGranularity; int32_t splitStackAdjustSize; // The following config options do not directly correspond to any diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index a00189a0e3a2..24c44730bf64 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -22,9 +22,9 @@ using namespace llvm; using namespace llvm::object; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) { for (InputSectionBase *sec : obj->getSections()) { if (!sec) @@ -36,6 +36,7 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) { .Case(".debug_gnu_pubnames", &gnuPubnamesSection) .Case(".debug_gnu_pubtypes", &gnuPubtypesSection) .Case(".debug_info", &infoSection) + .Case(".debug_loclists", &loclistsSection) .Case(".debug_ranges", &rangesSection) .Case(".debug_rnglists", &rnglistsSection) .Case(".debug_str_offsets", &strOffsetsSection) @@ -99,15 +100,9 @@ LLDDwarfObj<ELFT>::findAux(const InputSectionBase &sec, uint64_t pos, // its zero value will terminate the decoding of .debug_ranges prematurely. Symbol &s = file->getRelocTargetSym(rel); uint64_t val = 0; - if (auto *dr = dyn_cast<Defined>(&s)) { + if (auto *dr = dyn_cast<Defined>(&s)) val = dr->value; - // FIXME: We should be consistent about always adding the file - // offset or not. - if (dr->section->flags & ELF::SHF_ALLOC) - val += cast<InputSection>(dr->section)->getOffsetInFile(); - } - DataRefImpl d; d.p = getAddend<ELFT>(rel); return RelocAddrEntry{secIndex, RelocationRef(d, nullptr), @@ -124,10 +119,7 @@ Optional<RelocAddrEntry> LLDDwarfObj<ELFT>::find(const llvm::DWARFSection &s, return findAux(*sec.sec, pos, sec.sec->template rels<ELFT>()); } -template class LLDDwarfObj<ELF32LE>; -template class LLDDwarfObj<ELF32BE>; -template class LLDDwarfObj<ELF64LE>; -template class LLDDwarfObj<ELF64BE>; - -} // namespace elf -} // namespace lld +template class elf::LLDDwarfObj<ELF32LE>; +template class elf::LLDDwarfObj<ELF32BE>; +template class elf::LLDDwarfObj<ELF64LE>; +template class elf::LLDDwarfObj<ELF64BE>; diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h index 51ec9092f172..a12dae6e9960 100644 --- a/lld/ELF/DWARF.h +++ b/lld/ELF/DWARF.h @@ -32,6 +32,10 @@ public: f(infoSection); } + const llvm::DWARFSection &getLoclistsSection() const override { + return loclistsSection; + } + const llvm::DWARFSection &getRangesSection() const override { return rangesSection; } @@ -52,11 +56,11 @@ public: return addrSection; } - const llvm::DWARFSection &getGnuPubnamesSection() const override { + const LLDDWARFSection &getGnuPubnamesSection() const override { return gnuPubnamesSection; } - const llvm::DWARFSection &getGnuPubtypesSection() const override { + const LLDDWARFSection &getGnuPubtypesSection() const override { return gnuPubtypesSection; } @@ -81,6 +85,7 @@ private: LLDDWARFSection gnuPubnamesSection; LLDDWARFSection gnuPubtypesSection; LLDDWARFSection infoSection; + LLDDWARFSection loclistsSection; LLDDWARFSection rangesSection; LLDDWARFSection rnglistsSection; LLDDWARFSection strOffsetsSection; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 23da749d3078..4637a3b306da 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -43,7 +43,6 @@ #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" -#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" @@ -53,9 +52,11 @@ #include "llvm/Support/Compression.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> #include <utility> @@ -65,18 +66,17 @@ using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::sys; using namespace llvm::support; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { - -Configuration *config; -LinkerDriver *driver; +Configuration *elf::config; +LinkerDriver *elf::driver; static void setConfigs(opt::InputArgList &args); static void readConfigs(opt::InputArgList &args); -bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS, - raw_ostream &stderrOS) { +bool elf::link(ArrayRef<const char *> args, bool canExitEarly, + raw_ostream &stdoutOS, raw_ostream &stderrOS) { lld::stdoutOS = &stdoutOS; lld::stderrOS = &stderrOS; @@ -89,10 +89,13 @@ bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS, inputSections.clear(); outputSections.clear(); + archiveFiles.clear(); binaryFiles.clear(); bitcodeFiles.clear(); + lazyObjFiles.clear(); objectFiles.clear(); sharedFiles.clear(); + backwardReferences.clear(); config = make<Configuration>(); driver = make<LinkerDriver>(); @@ -147,6 +150,7 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) { .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64}) .Case("elf_i386", {ELF32LEKind, EM_386}) .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) + .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) @@ -350,9 +354,9 @@ static void checkOptions() { error("-z force-ibt may not be used with -z retpolineplt"); if (config->emachine != EM_AARCH64) { - if (config->pacPlt) + if (config->zPacPlt) error("-z pac-plt only supported on AArch64"); - if (config->forceBTI) + if (config->zForceBti) error("-z force-bti only supported on AArch64"); } } @@ -407,6 +411,24 @@ static GnuStackKind getZGnuStack(opt::InputArgList &args) { return GnuStackKind::NoExec; } +static uint8_t getZStartStopVisibility(opt::InputArgList &args) { + for (auto *arg : args.filtered_reverse(OPT_z)) { + std::pair<StringRef, StringRef> kv = StringRef(arg->getValue()).split('='); + if (kv.first == "start-stop-visibility") { + if (kv.second == "default") + return STV_DEFAULT; + else if (kv.second == "internal") + return STV_INTERNAL; + else if (kv.second == "hidden") + return STV_HIDDEN; + else if (kv.second == "protected") + return STV_PROTECTED; + error("unknown -z start-stop-visibility= value: " + StringRef(kv.second)); + } + } + return STV_PROTECTED; +} + static bool isKnownZFlag(StringRef s) { return s == "combreloc" || s == "copyreloc" || s == "defs" || s == "execstack" || s == "force-bti" || s == "force-ibt" || @@ -418,11 +440,13 @@ static bool isKnownZFlag(StringRef s) { s == "nodelete" || s == "nodlopen" || s == "noexecstack" || s == "nognustack" || s == "nokeep-text-section-prefix" || s == "norelro" || s == "noseparate-code" || s == "notext" || - s == "now" || s == "origin" || s == "pac-plt" || s == "relro" || - s == "retpolineplt" || s == "rodynamic" || s == "shstk" || - s == "text" || s == "undefs" || s == "wxneeded" || - s.startswith("common-page-size=") || s.startswith("max-page-size=") || - s.startswith("stack-size="); + s == "now" || s == "origin" || s == "pac-plt" || s == "rel" || + s == "rela" || s == "relro" || s == "retpolineplt" || + s == "rodynamic" || s == "shstk" || s == "text" || s == "undefs" || + s == "wxneeded" || s.startswith("common-page-size=") || + s.startswith("dead-reloc-in-nonalloc=") || + s.startswith("max-page-size=") || s.startswith("stack-size=") || + s.startswith("start-stop-visibility="); } // Report an error for an unknown -z option. @@ -487,37 +511,57 @@ void LinkerDriver::main(ArrayRef<const char *> argsArr) { if (args.hasArg(OPT_version)) return; - initLLVM(); - createFiles(args); - if (errorCount()) - return; + // Initialize time trace profiler. + if (config->timeTraceEnabled) + timeTraceProfilerInitialize(config->timeTraceGranularity, config->progName); - inferMachineType(); - setConfigs(args); - checkOptions(); - if (errorCount()) - return; + { + llvm::TimeTraceScope timeScope("ExecuteLinker"); - // The Target instance handles target-specific stuff, such as applying - // relocations or writing a PLT section. It also contains target-dependent - // values such as a default image base address. - target = getTarget(); + initLLVM(); + createFiles(args); + if (errorCount()) + return; - switch (config->ekind) { - case ELF32LEKind: - link<ELF32LE>(args); - return; - case ELF32BEKind: - link<ELF32BE>(args); - return; - case ELF64LEKind: - link<ELF64LE>(args); - return; - case ELF64BEKind: - link<ELF64BE>(args); - return; - default: - llvm_unreachable("unknown Config->EKind"); + inferMachineType(); + setConfigs(args); + checkOptions(); + if (errorCount()) + return; + + // The Target instance handles target-specific stuff, such as applying + // relocations or writing a PLT section. It also contains target-dependent + // values such as a default image base address. + target = getTarget(); + + switch (config->ekind) { + case ELF32LEKind: + link<ELF32LE>(args); + break; + case ELF32BEKind: + link<ELF32BE>(args); + break; + case ELF64LEKind: + link<ELF64LE>(args); + break; + case ELF64BEKind: + link<ELF64BE>(args); + break; + default: + llvm_unreachable("unknown Config->EKind"); + } + } + + if (config->timeTraceEnabled) { + if (auto E = timeTraceProfilerWrite(args.getLastArgValue(OPT_time_trace_file_eq).str(), + config->outputFile)) { + handleAllErrors(std::move(E), [&](const StringError &SE) { + error(SE.getMessage()); + }); + return; + } + + timeTraceProfilerCleanup(); } } @@ -586,9 +630,6 @@ static bool isOutputFormatBinary(opt::InputArgList &args) { } static DiscardPolicy getDiscard(opt::InputArgList &args) { - if (args.hasArg(OPT_relocatable)) - return DiscardPolicy::None; - auto *arg = args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); if (!arg) @@ -602,8 +643,13 @@ static DiscardPolicy getDiscard(opt::InputArgList &args) { static StringRef getDynamicLinker(opt::InputArgList &args) { auto *arg = args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker); - if (!arg || arg->getOption().getID() == OPT_no_dynamic_linker) + if (!arg) + return ""; + if (arg->getOption().getID() == OPT_no_dynamic_linker) { + // --no-dynamic-linker suppresses undefined weak symbols in .dynsym + config->noDynamicLinker = true; return ""; + } return arg->getValue(); } @@ -816,6 +862,22 @@ static std::vector<StringRef> getSymbolOrderingFile(MemoryBufferRef mb) { return names.takeVector(); } +static bool getIsRela(opt::InputArgList &args) { + // If -z rel or -z rela is specified, use the last option. + for (auto *arg : args.filtered_reverse(OPT_z)) { + StringRef s(arg->getValue()); + if (s == "rel") + return false; + if (s == "rela") + return true; + } + + // Otherwise use the psABI defined relocation entry format. + uint16_t m = config->emachine; + return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC || + m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64; +} + static void parseClangOption(StringRef opt, const Twine &msg) { std::string err; raw_string_ostream os(err); @@ -834,7 +896,6 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); errorHandler().vsDiagnostics = args.hasArg(OPT_visual_studio_diagnostics_format, false); - threadsEnabled = args.hasFlag(OPT_threads, OPT_no_threads, true); config->allowMultipleDefinition = args.hasFlag(OPT_allow_multiple_definition, @@ -853,6 +914,8 @@ static void readConfigs(opt::InputArgList &args) { config->cref = args.hasFlag(OPT_cref, OPT_no_cref, false); config->defineCommon = args.hasFlag(OPT_define_common, OPT_no_define_common, !args.hasArg(OPT_relocatable)); + config->optimizeBBJumps = + args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false); config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); config->disableVerify = args.hasArg(OPT_disable_verify); @@ -874,9 +937,10 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false); config->filterList = args::getStrings(args, OPT_filter); config->fini = args.getLastArgValue(OPT_fini, "_fini"); - config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419); - config->fixCortexA8 = args.hasArg(OPT_fix_cortex_a8); - config->forceBTI = hasZOption(args, "force-bti"); + config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419) && + !args.hasArg(OPT_relocatable); + config->fixCortexA8 = + args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable); config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true); config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false); @@ -890,12 +954,20 @@ static void readConfigs(opt::InputArgList &args) { config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate); config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file); config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); + config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm); config->ltoNewPassManager = args.hasArg(OPT_lto_new_pass_manager); config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); + config->ltoWholeProgramVisibility = + args.hasArg(OPT_lto_whole_program_visibility); config->ltoo = args::getInteger(args, OPT_lto_O, 2); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); + config->ltoBasicBlockSections = + args.getLastArgValue(OPT_lto_basicblock_sections); + config->ltoUniqueBasicBlockSectionNames = + args.hasFlag(OPT_lto_unique_bb_section_names, + OPT_no_lto_unique_bb_section_names, false); config->mapFile = args.getLastArgValue(OPT_Map); config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = @@ -914,21 +986,23 @@ static void readConfigs(opt::InputArgList &args) { config->optimize = args::getInteger(args, OPT_O, 1); config->orphanHandling = getOrphanHandling(args); config->outputFile = args.getLastArgValue(OPT_o); - config->pacPlt = hasZOption(args, "pac-plt"); config->pie = args.hasFlag(OPT_pie, OPT_no_pie, false); config->printIcfSections = args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false); config->printGcSections = args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); + config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); config->rpath = getRpath(args); config->relocatable = args.hasArg(OPT_relocatable); config->saveTemps = args.hasArg(OPT_save_temps); + if (args.hasArg(OPT_shuffle_sections)) + config->shuffleSectionSeed = args::getInteger(args, OPT_shuffle_sections, 0); config->searchPaths = args::getStrings(args, OPT_library_path); config->sectionStartMap = getSectionStartMap(args); config->shared = args.hasArg(OPT_shared); - config->singleRoRx = args.hasArg(OPT_no_rosegment); + config->singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true); config->soName = args.getLastArgValue(OPT_soname); config->sortSection = getSortSection(args); config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384); @@ -944,15 +1018,20 @@ static void readConfigs(opt::InputArgList &args) { config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_eq); config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq); - config->thinLTOJobs = args::getInteger(args, OPT_thinlto_jobs, -1u); config->thinLTOObjectSuffixReplace = getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq); config->thinLTOPrefixReplace = getOldNewOptions(args, OPT_thinlto_prefix_replace_eq); + config->thinLTOModulesToCompile = + args::getStrings(args, OPT_thinlto_single_module_eq); + config->timeTraceEnabled = args.hasArg(OPT_time_trace); + config->timeTraceGranularity = + args::getInteger(args, OPT_time_trace_granularity, 500); config->trace = args.hasArg(OPT_trace); config->undefined = args::getStrings(args, OPT_undefined); config->undefinedVersion = args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, true); + config->unique = args.hasArg(OPT_unique); config->useAndroidRelrTags = args.hasFlag( OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false); config->unresolvedSymbols = getUnresolvedSymbolPolicy(args); @@ -965,6 +1044,7 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); + config->zForceBti = hasZOption(args, "force-bti"); config->zForceIbt = hasZOption(args, "force-ibt"); config->zGlobal = hasZOption(args, "global"); config->zGnustack = getZGnuStack(args); @@ -979,33 +1059,78 @@ static void readConfigs(opt::InputArgList &args) { config->zNodlopen = hasZOption(args, "nodlopen"); config->zNow = getZFlag(args, "now", "lazy", false); config->zOrigin = hasZOption(args, "origin"); + config->zPacPlt = hasZOption(args, "pac-plt"); config->zRelro = getZFlag(args, "relro", "norelro", true); config->zRetpolineplt = hasZOption(args, "retpolineplt"); config->zRodynamic = hasZOption(args, "rodynamic"); config->zSeparate = getZSeparate(args); config->zShstk = hasZOption(args, "shstk"); config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0); + config->zStartStopVisibility = getZStartStopVisibility(args); config->zText = getZFlag(args, "text", "notext", true); config->zWxneeded = hasZOption(args, "wxneeded"); + for (opt::Arg *arg : args.filtered(OPT_z)) { + std::pair<StringRef, StringRef> option = + StringRef(arg->getValue()).split('='); + if (option.first != "dead-reloc-in-nonalloc") + continue; + constexpr StringRef errPrefix = "-z dead-reloc-in-nonalloc=: "; + std::pair<StringRef, StringRef> kv = option.second.split('='); + if (kv.first.empty() || kv.second.empty()) { + error(errPrefix + "expected <section_glob>=<value>"); + continue; + } + uint64_t v; + if (!to_integer(kv.second, v)) + error(errPrefix + "expected a non-negative integer, but got '" + + kv.second + "'"); + else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first)) + config->deadRelocInNonAlloc.emplace_back(std::move(*pat), v); + else + error(errPrefix + toString(pat.takeError())); + } + // Parse LTO options. if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq)) parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())), arg->getSpelling()); - for (auto *arg : args.filtered(OPT_plugin_opt)) - parseClangOption(arg->getValue(), arg->getSpelling()); + for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus)) + parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling()); + + // GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or + // relative path. Just ignore. If not ended with "lto-wrapper", consider it an + // unsupported LLVMgold.so option and error. + for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq)) + if (!StringRef(arg->getValue()).endswith("lto-wrapper")) + error(arg->getSpelling() + ": unknown plugin option '" + arg->getValue() + + "'"); // Parse -mllvm options. for (auto *arg : args.filtered(OPT_mllvm)) parseClangOption(arg->getValue(), arg->getSpelling()); + // --threads= takes a positive integer and provides the default value for + // --thinlto-jobs=. + if (auto *arg = args.getLastArg(OPT_threads)) { + StringRef v(arg->getValue()); + unsigned threads = 0; + if (!llvm::to_integer(v, threads, 0) || threads == 0) + error(arg->getSpelling() + ": expected a positive integer, but got '" + + arg->getValue() + "'"); + parallel::strategy = hardware_concurrency(threads); + config->thinLTOJobs = v; + } + if (auto *arg = args.getLastArg(OPT_thinlto_jobs)) + config->thinLTOJobs = arg->getValue(); + if (config->ltoo > 3) error("invalid optimization level for LTO: " + Twine(config->ltoo)); if (config->ltoPartitions == 0) error("--lto-partitions: number of threads must be > 0"); - if (config->thinLTOJobs == 0) - error("--thinlto-jobs: number of threads must be > 0"); + if (!get_threadpool_strategy(config->thinLTOJobs)) + error("--thinlto-jobs: invalid job count: " + config->thinLTOJobs); if (config->splitStackAdjustSize < 0) error("--split-stack-adjust-size: size must be >= 0"); @@ -1083,25 +1208,30 @@ static void readConfigs(opt::InputArgList &args) { {s, /*isExternCpp=*/false, /*hasWildcard=*/false}); } - // Parses -dynamic-list and -export-dynamic-symbol. They make some - // symbols private. Note that -export-dynamic takes precedence over them - // as it says all symbols should be exported. - if (!config->exportDynamic) { - for (auto *arg : args.filtered(OPT_dynamic_list)) - if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())) - readDynamicList(*buffer); - - for (auto *arg : args.filtered(OPT_export_dynamic_symbol)) - config->dynamicList.push_back( - {arg->getValue(), /*isExternCpp=*/false, /*hasWildcard=*/false}); + for (opt::Arg *arg : args.filtered(OPT_warn_backrefs_exclude)) { + StringRef pattern(arg->getValue()); + if (Expected<GlobPattern> pat = GlobPattern::create(pattern)) + config->warnBackrefsExclude.push_back(std::move(*pat)); + else + error(arg->getSpelling() + ": " + toString(pat.takeError())); } - // If --export-dynamic-symbol=foo is given and symbol foo is defined in - // an object file in an archive file, that object file should be pulled - // out and linked. (It doesn't have to behave like that from technical - // point of view, but this is needed for compatibility with GNU.) + // When producing an executable, --dynamic-list specifies non-local defined + // symbols whith are required to be exported. When producing a shared object, + // symbols not specified by --dynamic-list are non-preemptible. + config->symbolic = + args.hasArg(OPT_Bsymbolic) || args.hasArg(OPT_dynamic_list); + for (auto *arg : args.filtered(OPT_dynamic_list)) + if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())) + readDynamicList(*buffer); + + // --export-dynamic-symbol specifies additional --dynamic-list symbols if any + // other option expresses a symbolic intention: -no-pie, -pie, -Bsymbolic, + // -Bsymbolic-functions (if STT_FUNC), --dynamic-list. for (auto *arg : args.filtered(OPT_export_dynamic_symbol)) - config->undefined.push_back(arg->getValue()); + config->dynamicList.push_back( + {arg->getValue(), /*isExternCpp=*/false, + /*hasWildcard=*/hasWildcard(arg->getValue())}); for (auto *arg : args.filtered(OPT_version_script)) if (Optional<std::string> path = searchScript(arg->getValue())) { @@ -1131,20 +1261,19 @@ static void setConfigs(opt::InputArgList &args) { // ELF defines two different ways to store relocation addends as shown below: // - // Rel: Addends are stored to the location where relocations are applied. + // Rel: Addends are stored to the location where relocations are applied. It + // cannot pack the full range of addend values for all relocation types, but + // this only affects relocation types that we don't support emitting as + // dynamic relocations (see getDynRel). // Rela: Addends are stored as part of relocation entry. // // In other words, Rela makes it easy to read addends at the price of extra - // 4 or 8 byte for each relocation entry. We don't know why ELF defined two - // different mechanisms in the first place, but this is how the spec is - // defined. + // 4 or 8 byte for each relocation entry. // - // You cannot choose which one, Rel or Rela, you want to use. Instead each - // ABI defines which one you need to use. The following expression expresses - // that. - config->isRela = m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || - m == EM_PPC || m == EM_PPC64 || m == EM_RISCV || - m == EM_X86_64; + // We pick the format for dynamic relocations according to the psABI for each + // processor, but a contrary choice can be made if the dynamic loader + // supports. + config->isRela = getIsRela(args); // If the output uses REL relocations we must store the dynamic relocation // addends to the output sections. We also store addends for RELA relocations @@ -1386,7 +1515,7 @@ static void excludeLibs(opt::InputArgList &args) { visit(file); } -// Force Sym to be entered in the output. Used for -u or equivalent. +// Force Sym to be entered in the output. static void handleUndefined(Symbol *sym) { // Since a symbol may not be used inside the program, LTO may // eliminate it. Mark the symbol as "used" to prevent it. @@ -1587,6 +1716,12 @@ static Symbol *addUndefined(StringRef name) { Undefined{nullptr, name, STB_GLOBAL, STV_DEFAULT, 0}); } +static Symbol *addUnusedUndefined(StringRef name) { + Undefined sym{nullptr, name, STB_GLOBAL, STV_DEFAULT, 0}; + sym.isUsedInRegularObj = false; + return symtab->addSymbol(sym); +} + // This function is where all the optimizations of link-time // optimization takes place. When LTO is in use, some input files are // not in native object file format but in the LLVM bitcode format. @@ -1595,6 +1730,7 @@ static Symbol *addUndefined(StringRef name) { // Because all bitcode files that the program consists of are passed to // the compiler at once, it can do a whole-program optimization. template <class ELFT> void LinkerDriver::compileBitcodeFiles() { + llvm::TimeTraceScope timeScope("LTO"); // Compile bitcode files and replace bitcode symbols. lto.reset(new BitcodeCompiler); for (BitcodeFile *file : bitcodeFiles) @@ -1603,8 +1739,11 @@ template <class ELFT> void LinkerDriver::compileBitcodeFiles() { for (InputFile *file : lto->compile()) { auto *obj = cast<ObjFile<ELFT>>(file); obj->parse(/*ignoreComdats=*/true); - for (Symbol *sym : obj->getGlobalSymbols()) - sym->parseSymbolVersion(); + + // Parse '@' in symbol names for non-relocatable output. + if (!config->relocatable) + for (Symbol *sym : obj->getGlobalSymbols()) + sym->parseSymbolVersion(); objectFiles.push_back(file); } } @@ -1698,8 +1837,9 @@ template <class ELFT> static uint32_t getAndFeatures() { uint32_t ret = -1; for (InputFile *f : objectFiles) { uint32_t features = cast<ObjFile<ELFT>>(f)->andFeatures; - if (config->forceBTI && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) { - warn(toString(f) + ": -z force-bti: file does not have BTI property"); + if (config->zForceBti && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) { + warn(toString(f) + ": -z force-bti: file does not have " + "GNU_PROPERTY_AARCH64_FEATURE_1_BTI property"); features |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI; } else if (config->zForceIbt && !(features & GNU_PROPERTY_X86_FEATURE_1_IBT)) { @@ -1707,13 +1847,14 @@ template <class ELFT> static uint32_t getAndFeatures() { "GNU_PROPERTY_X86_FEATURE_1_IBT property"); features |= GNU_PROPERTY_X86_FEATURE_1_IBT; } + if (config->zPacPlt && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) { + warn(toString(f) + ": -z pac-plt: file does not have " + "GNU_PROPERTY_AARCH64_FEATURE_1_PAC property"); + features |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC; + } ret &= features; } - // Force enable pointer authentication Plt, we don't warn in this case as - // this does not require support in the object for correctness. - if (config->pacPlt) - ret |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC; // Force enable Shadow Stack. if (config->zShstk) ret |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; @@ -1724,6 +1865,7 @@ template <class ELFT> static uint32_t getAndFeatures() { // Do actual linking. Note that when this function is called, // all linker scripts have already been parsed. template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { + llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link")); // If a -hash-style option was not given, set to a default value, // which varies depending on the target. if (!args.hasArg(OPT_hash_style)) { @@ -1759,12 +1901,20 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { for (auto *arg : args.filtered(OPT_trace_symbol)) symtab->insert(arg->getValue())->traced = true; + // Handle -u/--undefined before input files. If both a.a and b.so define foo, + // -u foo a.a b.so will fetch a.a. + for (StringRef name : config->undefined) + addUnusedUndefined(name); + // Add all files to the symbol table. This will add almost all // symbols that we need to the symbol table. This process might // add files to the link, via autolinking, these files are always // appended to the Files vector. - for (size_t i = 0; i < files.size(); ++i) - parseFile(files[i]); + { + llvm::TimeTraceScope timeScope("Parse input files"); + for (size_t i = 0; i < files.size(); ++i) + parseFile(files[i]); + } // Now that we have every file, we can decide if we will need a // dynamic symbol table. @@ -1780,10 +1930,10 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { for (StringRef name : script->referencedSymbols) addUndefined(name); - // Handle the `--undefined <sym>` options. - for (StringRef arg : config->undefined) - if (Symbol *sym = symtab->find(arg)) - handleUndefined(sym); + // Prevent LTO from removing any definition referenced by -u. + for (StringRef name : config->undefined) + if (Defined *sym = dyn_cast_or_null<Defined>(symtab->find(name))) + sym->isUsedInRegularObj = true; // If an entry symbol is in a static archive, pull out that file now. if (Symbol *sym = symtab->find(config->entry)) @@ -1794,9 +1944,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { handleUndefinedGlob(pat); // Mark -init and -fini symbols so that the LTO doesn't eliminate them. - if (Symbol *sym = symtab->find(config->init)) + if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->init))) sym->isUsedInRegularObj = true; - if (Symbol *sym = symtab->find(config->fini)) + if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->fini))) sym->isUsedInRegularObj = true; // If any of our inputs are bitcode files, the LTO code generator may create @@ -1824,10 +1974,6 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { if (errorCount()) return; - // Now when we read all script files, we want to finalize order of linker - // script commands, which can be not yet final because of INSERT commands. - script->processInsertCommands(); - // We want to declare linker script's symbols early, // so that we can version them. // They also might be exported if referenced by DSOs. @@ -1863,19 +2009,22 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // With this the symbol table should be complete. After this, no new names // except a few linker-synthesized ones will be added to the symbol table. compileBitcodeFiles<ELFT>(); + + // Symbol resolution finished. Report backward reference problems. + reportBackrefs(); if (errorCount()) return; // If -thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in addCombinedLTOObject, so we are done if that's the case. - if (config->thinLTOIndexOnly) - return; - - // Likewise, --plugin-opt=emit-llvm is an option to make LTO create - // an output file in bitcode and exit, so that you can just get a - // combined bitcode file. - if (config->emitLLVM) + // Likewise, --plugin-opt=emit-llvm and --plugin-opt=emit-asm are the + // options to create output files in bitcode or assembly code + // repsectively. No object files are generated. + // Also bail out here when only certain thinLTO modules are specified for + // compilation. The intermediate object file are the expected output. + if (config->thinLTOIndexOnly || config->emitLLVM || config->ltoEmitAsm || + !config->thinLTOModulesToCompile.empty()) return; // Apply symbol renames for -wrap. @@ -1901,8 +2050,17 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // We do not want to emit debug sections if --strip-all // or -strip-debug are given. - return config->strip != StripPolicy::None && - (s->name.startswith(".debug") || s->name.startswith(".zdebug")); + if (config->strip == StripPolicy::None) + return false; + + if (isDebugSection(*s)) + return true; + if (auto *isec = dyn_cast<InputSection>(s)) + if (InputSectionBase *rel = isec->getRelocatedSection()) + if (isDebugSection(*rel)) + return true; + + return false; }); // Now that the number of partitions is fixed, save a pointer to the main @@ -2006,6 +2164,3 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Write the result to the file. writeResult<ELFT>(); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 9fcb36e81676..e33b07c0c9c9 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -23,15 +23,15 @@ #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" using namespace llvm; using namespace llvm::sys; using namespace llvm::opt; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; // Create OptTable @@ -82,7 +82,7 @@ static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &args) { return cl::TokenizeWindowsCommandLine; return cl::TokenizeGNUCommandLine; } - if (Triple(sys::getProcessTriple()).getOS() == Triple::Win32) + if (Triple(sys::getProcessTriple()).isOSWindows()) return cl::TokenizeWindowsCommandLine; return cl::TokenizeGNUCommandLine; } @@ -143,7 +143,7 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> argv) { return args; } -void printHelp() { +void elf::printHelp() { ELFOptTable().PrintHelp( lld::outs(), (config->progName + " [options] file...").str().c_str(), "lld", false /*ShowHidden*/, true /*ShowAllAliases*/); @@ -160,12 +160,12 @@ void printHelp() { static std::string rewritePath(StringRef s) { if (fs::exists(s)) return relativeToRoot(s); - return s; + return std::string(s); } // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. -std::string createResponseFile(const opt::InputArgList &args) { +std::string elf::createResponseFile(const opt::InputArgList &args) { SmallString<0> data; raw_svector_ostream os(data); os << "--chroot .\n"; @@ -199,7 +199,7 @@ std::string createResponseFile(const opt::InputArgList &args) { os << toString(*arg) << "\n"; } } - return data.str(); + return std::string(data.str()); } // Find a file by concatenating given paths. If a resulting path @@ -212,11 +212,11 @@ static Optional<std::string> findFile(StringRef path1, const Twine &path2) { path::append(s, path1, path2); if (fs::exists(s)) - return s.str().str(); + return std::string(s); return None; } -Optional<std::string> findFromSearchPaths(StringRef path) { +Optional<std::string> elf::findFromSearchPaths(StringRef path) { for (StringRef dir : config->searchPaths) if (Optional<std::string> s = findFile(dir, path)) return s; @@ -225,7 +225,7 @@ Optional<std::string> findFromSearchPaths(StringRef path) { // This is for -l<basename>. We'll look for lib<basename>.so or lib<basename>.a from // search paths. -Optional<std::string> searchLibraryBaseName(StringRef name) { +Optional<std::string> elf::searchLibraryBaseName(StringRef name) { for (StringRef dir : config->searchPaths) { if (!config->isStatic) if (Optional<std::string> s = findFile(dir, "lib" + name + ".so")) @@ -237,7 +237,7 @@ Optional<std::string> searchLibraryBaseName(StringRef name) { } // This is for -l<namespec>. -Optional<std::string> searchLibrary(StringRef name) { +Optional<std::string> elf::searchLibrary(StringRef name) { if (name.startswith(":")) return findFromSearchPaths(name.substr(1)); return searchLibraryBaseName(name); @@ -246,11 +246,8 @@ Optional<std::string> searchLibrary(StringRef name) { // If a linker/version script doesn't exist in the current directory, we also // look for the script in the '-L' search paths. This matches the behaviour of // '-T', --version-script=, and linker script INPUT() command in ld.bfd. -Optional<std::string> searchScript(StringRef name) { +Optional<std::string> elf::searchScript(StringRef name) { if (fs::exists(name)) return name.str(); return findFromSearchPaths(name); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/EhFrame.cpp b/lld/ELF/EhFrame.cpp index a9c66f29446c..f97e3b604eb7 100644 --- a/lld/ELF/EhFrame.cpp +++ b/lld/ELF/EhFrame.cpp @@ -29,9 +29,9 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::dwarf; using namespace llvm::object; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { namespace { class EhReader { public: @@ -56,7 +56,7 @@ private: }; } -size_t readEhRecordSize(InputSectionBase *s, size_t off) { +size_t elf::readEhRecordSize(InputSectionBase *s, size_t off) { return EhReader(s, s->data().slice(off)).readEhRecordSize(); } @@ -148,7 +148,7 @@ void EhReader::skipAugP() { d = d.slice(size); } -uint8_t getFdeEncoding(EhSectionPiece *p) { +uint8_t elf::getFdeEncoding(EhSectionPiece *p) { return EhReader(p->sec, p->data()).getFdeEncoding(); } @@ -194,6 +194,3 @@ uint8_t EhReader::getFdeEncoding() { } return DW_EH_PE_absptr; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 8992b6564a8a..ecf0a282420d 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -80,10 +80,11 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Writer.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include <algorithm> #include <atomic> @@ -91,9 +92,9 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { namespace { template <class ELFT> class ICF { public: @@ -399,7 +400,7 @@ template <class ELFT> void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> fn) { // If threading is disabled or the number of sections are // too small to use threading, call Fn sequentially. - if (!threadsEnabled || sections.size() < 1024) { + if (parallel::strategy.ThreadsRequested == 1 || sections.size() < 1024) { forEachClassRange(0, sections.size(), fn); ++cnt; return; @@ -466,9 +467,8 @@ template <class ELFT> void ICF<ELFT>::run() { } // Initially, we use hash values to partition sections. - parallelForEach(sections, [&](InputSection *s) { - s->eqClass[0] = xxHash64(s->data()); - }); + parallelForEach( + sections, [&](InputSection *s) { s->eqClass[0] = xxHash64(s->data()); }); for (unsigned cnt = 0; cnt != 2; ++cnt) { parallelForEach(sections, [&](InputSection *s) { @@ -525,12 +525,12 @@ template <class ELFT> void ICF<ELFT>::run() { } // ICF entry point function. -template <class ELFT> void doIcf() { ICF<ELFT>().run(); } - -template void doIcf<ELF32LE>(); -template void doIcf<ELF32BE>(); -template void doIcf<ELF64LE>(); -template void doIcf<ELF64BE>(); +template <class ELFT> void elf::doIcf() { + llvm::TimeTraceScope timeScope("ICF"); + ICF<ELFT>().run(); +} -} // namespace elf -} // namespace lld +template void elf::doIcf<ELF32LE>(); +template void elf::doIcf<ELF32BE>(); +template void elf::doIcf<ELF64LE>(); +template void elf::doIcf<ELF64BE>(); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 43978cd66c61..c2f1830a981b 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -36,33 +36,35 @@ using namespace llvm::object; using namespace llvm::sys; using namespace llvm::sys::fs; using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; + +bool InputFile::isInGroup; +uint32_t InputFile::nextGroupId; + +std::vector<ArchiveFile *> elf::archiveFiles; +std::vector<BinaryFile *> elf::binaryFiles; +std::vector<BitcodeFile *> elf::bitcodeFiles; +std::vector<LazyObjFile *> elf::lazyObjFiles; +std::vector<InputFile *> elf::objectFiles; +std::vector<SharedFile *> elf::sharedFiles; + +std::unique_ptr<TarWriter> elf::tar; -namespace lld { // Returns "<internal>", "foo.a(bar.o)" or "baz.o". -std::string toString(const elf::InputFile *f) { +std::string lld::toString(const InputFile *f) { if (!f) return "<internal>"; if (f->toStringCache.empty()) { if (f->archiveName.empty()) - f->toStringCache = f->getName(); + f->toStringCache = std::string(f->getName()); else f->toStringCache = (f->archiveName + "(" + f->getName() + ")").str(); } return f->toStringCache; } -namespace elf { -bool InputFile::isInGroup; -uint32_t InputFile::nextGroupId; -std::vector<BinaryFile *> binaryFiles; -std::vector<BitcodeFile *> bitcodeFiles; -std::vector<LazyObjFile *> lazyObjFiles; -std::vector<InputFile *> objectFiles; -std::vector<SharedFile *> sharedFiles; - -std::unique_ptr<TarWriter> tar; - static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { unsigned char size; unsigned char endian; @@ -101,7 +103,7 @@ InputFile::InputFile(Kind k, MemoryBufferRef m) ++nextGroupId; } -Optional<MemoryBufferRef> readFile(StringRef path) { +Optional<MemoryBufferRef> elf::readFile(StringRef path) { // The --chroot option changes our virtual root directory. // This is useful when you are dealing with files created by --reproduce. if (!config->chroot.empty() && path.startswith("/")) @@ -138,8 +140,10 @@ static bool isCompatible(InputFile *file) { return true; } - if (!config->emulation.empty()) { - error(toString(file) + " is incompatible with " + config->emulation); + StringRef target = + !config->bfdname.empty() ? config->bfdname : config->emulation; + if (!target.empty()) { + error(toString(file) + " is incompatible with " + target); return false; } @@ -148,8 +152,11 @@ static bool isCompatible(InputFile *file) { existing = objectFiles[0]; else if (!sharedFiles.empty()) existing = sharedFiles[0]; - else + else if (!bitcodeFiles.empty()) existing = bitcodeFiles[0]; + else + llvm_unreachable("Must have -m, OUTPUT_FORMAT or existing input file to " + "determine target emulation"); error(toString(file) + " is incompatible with " + toString(existing)); return false; @@ -168,6 +175,7 @@ template <class ELFT> static void doParseFile(InputFile *file) { // .a file if (auto *f = dyn_cast<ArchiveFile>(file)) { + archiveFiles.push_back(f); f->parse(); return; } @@ -201,7 +209,7 @@ template <class ELFT> static void doParseFile(InputFile *file) { } // Add symbols in File to the symbol table. -void parseFile(InputFile *file) { +void elf::parseFile(InputFile *file) { switch (config->ekind) { case ELF32LEKind: doParseFile<ELF32LE>(file); @@ -222,7 +230,7 @@ void parseFile(InputFile *file) { // Concatenates arguments to construct a string representing an error location. static std::string createFileLineMsg(StringRef path, unsigned line) { - std::string filename = path::filename(path); + std::string filename = std::string(path::filename(path)); std::string lineno = ":" + std::to_string(line); if (filename == path) return filename + lineno; @@ -243,7 +251,7 @@ static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, return createFileLineMsg(fileLine->first, fileLine->second); // File.sourceFile contains STT_FILE symbol, and that is a last resort. - return file.sourceFile; + return std::string(file.sourceFile); } std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, @@ -264,9 +272,17 @@ std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, } } -template <class ELFT> void ObjFile<ELFT>::initializeDwarf() { - dwarf = make<DWARFCache>(std::make_unique<DWARFContext>( - std::make_unique<LLDDwarfObj<ELFT>>(this))); +template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { + llvm::call_once(initDwarf, [this]() { + dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( + std::make_unique<LLDDwarfObj<ELFT>>(this), "", + [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, + [&](Error warning) { + warn(getName() + ": " + toString(std::move(warning))); + })); + }); + + return dwarf.get(); } // Returns the pair of file name and line number describing location of data @@ -274,9 +290,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeDwarf() { template <class ELFT> Optional<std::pair<std::string, unsigned>> ObjFile<ELFT>::getVariableLoc(StringRef name) { - llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); - - return dwarf->getVariableLoc(name); + return getDwarf()->getVariableLoc(name); } // Returns source line information for a given offset @@ -284,8 +298,6 @@ ObjFile<ELFT>::getVariableLoc(StringRef name) { template <class ELFT> Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, uint64_t offset) { - llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); - // Detect SectionIndex for specified section. uint64_t sectionIndex = object::SectionedAddress::UndefSection; ArrayRef<InputSectionBase *> sections = s->file->getSections(); @@ -296,9 +308,7 @@ Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, } } - // Use fake address calculated by adding section file offset and offset in - // section. See comments for ObjectInfo class. - return dwarf->getDILineInfo(s->getOffsetInFile() + offset, sectionIndex); + return getDwarf()->getDILineInfo(offset, sectionIndex); } ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) { @@ -417,6 +427,9 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { + if (!(sec.sh_flags & SHF_MERGE)) + return false; + // On a regular link we don't merge sections if -O0 (default is -O1). This // sometimes makes the linker significantly faster, although the output will // be bigger. @@ -452,10 +465,7 @@ bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + Twine(entSize) + ")"); - uint64_t flags = sec.sh_flags; - if (!(flags & SHF_MERGE)) - return false; - if (flags & SHF_WRITE) + if (sec.sh_flags & SHF_WRITE) fatal(toString(this) + ":(" + name + "): writable SHF_MERGE section is not supported"); @@ -622,6 +632,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { break; case SHT_SYMTAB: case SHT_STRTAB: + case SHT_REL: + case SHT_RELA: case SHT_NULL: break; default: @@ -629,11 +641,21 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { } } - // This block handles SHF_LINK_ORDER. + // We have a second loop. It is used to: + // 1) handle SHF_LINK_ORDER sections. + // 2) create SHT_REL[A] sections. In some cases the section header index of a + // relocation section may be smaller than that of the relocated section. In + // such cases, the relocation section would attempt to reference a target + // section that has not yet been created. For simplicity, delay creation of + // relocation sections until now. for (size_t i = 0, e = objSections.size(); i < e; ++i) { if (this->sections[i] == &InputSection::discarded) continue; const Elf_Shdr &sec = objSections[i]; + + if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) + this->sections[i] = createInputSection(sec); + if (!(sec.sh_flags & SHF_LINK_ORDER)) continue; @@ -662,7 +684,9 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { // the input objects have been compiled. static void updateARMVFPArgs(const ARMAttributeParser &attributes, const InputFile *f) { - if (!attributes.hasAttribute(ARMBuildAttrs::ABI_VFP_args)) + Optional<unsigned> attr = + attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); + if (!attr.hasValue()) // If an ABI tag isn't present then it is implicitly given the value of 0 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, // including some in glibc that don't use FP args (and should have value 3) @@ -670,7 +694,7 @@ static void updateARMVFPArgs(const ARMAttributeParser &attributes, // as a clash. return; - unsigned vfpArgs = attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); + unsigned vfpArgs = attr.getValue(); ARMVFPArgKind arg; switch (vfpArgs) { case ARMBuildAttrs::BaseAAPCS: @@ -707,9 +731,11 @@ static void updateARMVFPArgs(const ARMAttributeParser &attributes, // is compiled with an architecture that supports these features then lld is // permitted to use them. static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { - if (!attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) + Optional<unsigned> attr = + attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); + if (!attr.hasValue()) return; - auto arch = attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); + auto arch = attr.getValue(); switch (arch) { case ARMBuildAttrs::Pre_v4: case ARMBuildAttrs::v4: @@ -842,7 +868,13 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { break; ARMAttributeParser attributes; ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(&sec)); - attributes.Parse(contents, /*isLittle*/ config->ekind == ELF32LEKind); + if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind + ? support::little + : support::big)) { + auto *isec = make<InputSection>(*this, sec, name); + warn(toString(isec) + ": " + llvm::toString(std::move(e))); + break; + } updateSupportedARMFeatures(attributes); updateARMVFPArgs(attributes, this); @@ -1029,51 +1061,68 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); this->symbols.resize(eSyms.size()); - // Our symbol table may have already been partially initialized + // Fill in InputFile::symbols. Some entries have been initialized // because of LazyObjFile. - for (size_t i = 0, end = eSyms.size(); i != end; ++i) - if (!this->symbols[i] && eSyms[i].getBinding() != STB_LOCAL) - this->symbols[i] = - symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); - - // Fill this->Symbols. A symbol is either local or global. for (size_t i = 0, end = eSyms.size(); i != end; ++i) { + if (this->symbols[i]) + continue; const Elf_Sym &eSym = eSyms[i]; - - // Read symbol attributes. uint32_t secIdx = getSectionIndex(eSym); if (secIdx >= this->sections.size()) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); + if (eSym.getBinding() != STB_LOCAL) { + if (i < firstGlobal) + error(toString(this) + ": non-local symbol (" + Twine(i) + + ") found at index < .symtab's sh_info (" + Twine(firstGlobal) + + ")"); + this->symbols[i] = + symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); + continue; + } + + // Handle local symbols. Local symbols are not added to the symbol + // table because they are not visible from other object files. We + // allocate symbol instances and add their pointers to symbols. + if (i >= firstGlobal) + errorOrWarn(toString(this) + ": STB_LOCAL symbol (" + Twine(i) + + ") found at index >= .symtab's sh_info (" + + Twine(firstGlobal) + ")"); InputSectionBase *sec = this->sections[secIdx]; + uint8_t type = eSym.getType(); + if (type == STT_FILE) + sourceFile = CHECK(eSym.getName(this->stringTable), this); + if (this->stringTable.size() <= eSym.st_name) + fatal(toString(this) + ": invalid symbol name offset"); + StringRefZ name = this->stringTable.data() + eSym.st_name; + + if (eSym.st_shndx == SHN_UNDEF) + this->symbols[i] = + make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type); + else if (sec == &InputSection::discarded) + this->symbols[i] = + make<Undefined>(this, name, STB_LOCAL, eSym.st_other, type, + /*discardedSecIdx=*/secIdx); + else + this->symbols[i] = make<Defined>(this, name, STB_LOCAL, eSym.st_other, + type, eSym.st_value, eSym.st_size, sec); + } + + // Symbol resolution of non-local symbols. + for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { + const Elf_Sym &eSym = eSyms[i]; uint8_t binding = eSym.getBinding(); + if (binding == STB_LOCAL) + continue; // Errored above. + + uint32_t secIdx = getSectionIndex(eSym); + InputSectionBase *sec = this->sections[secIdx]; uint8_t stOther = eSym.st_other; uint8_t type = eSym.getType(); uint64_t value = eSym.st_value; uint64_t size = eSym.st_size; StringRefZ name = this->stringTable.data() + eSym.st_name; - // Handle local symbols. Local symbols are not added to the symbol - // table because they are not visible from other object files. We - // allocate symbol instances and add their pointers to Symbols. - if (binding == STB_LOCAL) { - if (eSym.getType() == STT_FILE) - sourceFile = CHECK(eSym.getName(this->stringTable), this); - - if (this->stringTable.size() <= eSym.st_name) - fatal(toString(this) + ": invalid symbol name offset"); - - if (eSym.st_shndx == SHN_UNDEF) - this->symbols[i] = make<Undefined>(this, name, binding, stOther, type); - else if (sec == &InputSection::discarded) - this->symbols[i] = make<Undefined>(this, name, binding, stOther, type, - /*DiscardedSecIdx=*/secIdx); - else - this->symbols[i] = - make<Defined>(this, name, binding, stOther, type, value, size, sec); - continue; - } - // Handle global undefined symbols. if (eSym.st_shndx == SHN_UNDEF) { this->symbols[i]->resolve(Undefined{this, name, binding, stOther, type}); @@ -1097,8 +1146,20 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { // COMDAT member sections, and if a comdat group is discarded, some // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { - this->symbols[i]->resolve( - Undefined{this, name, binding, stOther, type, secIdx}); + Undefined und{this, name, binding, stOther, type, secIdx}; + Symbol *sym = this->symbols[i]; + // !ArchiveFile::parsed or LazyObjFile::fetched means that the file + // containing this object has not finished processing, i.e. this symbol is + // a result of a lazy symbol fetch. We should demote the lazy symbol to an + // Undefined so that any relocations outside of the group to it will + // trigger a discarded section error. + if ((sym->symbolKind == Symbol::LazyArchiveKind && + !cast<ArchiveFile>(sym->file)->parsed) || + (sym->symbolKind == Symbol::LazyObjectKind && + cast<LazyObjFile>(sym->file)->fetched)) + sym->replace(und); + else + sym->resolve(und); continue; } @@ -1121,6 +1182,10 @@ ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&file) void ArchiveFile::parse() { for (const Archive::Symbol &sym : file->symbols()) symtab->addSymbol(LazyArchive{*this, sym}); + + // Inform a future invocation of ObjFile<ELFT>::initializeSymbols() that this + // archive has been processed. + parsed = true; } // Returns a buffer pointing to a member file containing a given symbol. @@ -1142,12 +1207,24 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) { if (tar && c.getParent()->isThin()) tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer()); - InputFile *file = createObjectFile( - mb, getName(), c.getParent()->isThin() ? 0 : c.getChildOffset()); + InputFile *file = createObjectFile(mb, getName(), c.getChildOffset()); file->groupId = groupId; parseFile(file); } +size_t ArchiveFile::getMemberCount() const { + size_t count = 0; + Error err = Error::success(); + for (const Archive::Child &c : file->children(err)) { + (void)c; + ++count; + } + // This function is used by --print-archive-stats=, where an error does not + // really matter. + consumeError(std::move(err)); + return count; +} + unsigned SharedFile::vernauxNum; // Parse the version definitions in the object file if present, and return a @@ -1179,6 +1256,40 @@ static std::vector<const void *> parseVerdefs(const uint8_t *base, return verdefs; } +// Parse SHT_GNU_verneed to properly set the name of a versioned undefined +// symbol. We detect fatal issues which would cause vulnerabilities, but do not +// implement sophisticated error checking like in llvm-readobj because the value +// of such diagnostics is low. +template <typename ELFT> +std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, + const typename ELFT::Shdr *sec) { + if (!sec) + return {}; + std::vector<uint32_t> verneeds; + ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(sec), this); + const uint8_t *verneedBuf = data.begin(); + for (unsigned i = 0; i != sec->sh_info; ++i) { + if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) + fatal(toString(this) + " has an invalid Verneed"); + auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); + const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; + for (unsigned j = 0; j != vn->vn_cnt; ++j) { + if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) + fatal(toString(this) + " has an invalid Vernaux"); + auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); + if (aux->vna_name >= this->stringTable.size()) + fatal(toString(this) + " has a Vernaux with an invalid vna_name"); + uint16_t version = aux->vna_other & VERSYM_VERSION; + if (version >= verneeds.size()) + verneeds.resize(version + 1); + verneeds[version] = aux->vna_name; + vernauxBuf += aux->vna_next; + } + verneedBuf += vn->vn_next; + } + return verneeds; +} + // We do not usually care about alignments of data in shared object // files because the loader takes care of it. However, if we promote a // DSO symbol to point to .bss due to copy relocation, we need to keep @@ -1222,6 +1333,7 @@ template <class ELFT> void SharedFile::parse() { const Elf_Shdr *versymSec = nullptr; const Elf_Shdr *verdefSec = nullptr; + const Elf_Shdr *verneedSec = nullptr; // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. for (const Elf_Shdr &sec : sections) { @@ -1238,6 +1350,9 @@ template <class ELFT> void SharedFile::parse() { case SHT_GNU_verdef: verdefSec = &sec; break; + case SHT_GNU_verneed: + verneedSec = &sec; + break; } } @@ -1277,12 +1392,13 @@ template <class ELFT> void SharedFile::parse() { sharedFiles.push_back(this); verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); + std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); // Parse ".gnu.version" section which is a parallel array for the symbol // table. If a given file doesn't have a ".gnu.version" section, we use // VER_NDX_GLOBAL. size_t size = numELFSyms - firstGlobal; - std::vector<uint32_t> versyms(size, VER_NDX_GLOBAL); + std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); if (versymSec) { ArrayRef<Elf_Versym> versym = CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(versymSec), @@ -1313,7 +1429,22 @@ template <class ELFT> void SharedFile::parse() { continue; } + uint16_t idx = versyms[i] & ~VERSYM_HIDDEN; if (sym.isUndefined()) { + // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but + // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. + if (idx != VER_NDX_LOCAL && idx != VER_NDX_GLOBAL) { + if (idx >= verneeds.size()) { + error("corrupt input file: version need index " + Twine(idx) + + " for symbol " + name + " is out of bounds\n>>> defined in " + + toString(this)); + continue; + } + StringRef verName = this->stringTable.data() + verneeds[idx]; + versionedNameBuffer.clear(); + name = + saver.save((name + "@" + verName).toStringRef(versionedNameBuffer)); + } Symbol *s = symtab->addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); s->exportDynamic = true; @@ -1323,7 +1454,6 @@ template <class ELFT> void SharedFile::parse() { // MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly // assigns VER_NDX_LOCAL to this section global symbol. Here is a // workaround for this bug. - uint32_t idx = versyms[i] & ~VERSYM_HIDDEN; if (config->emachine == EM_MIPS && idx == VER_NDX_LOCAL && name == "_gp_disp") continue; @@ -1405,7 +1535,7 @@ static uint8_t getBitcodeMachineKind(StringRef path, const Triple &t) { BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) : InputFile(BitcodeKind, mb) { - this->archiveName = archiveName; + this->archiveName = std::string(archiveName); std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) @@ -1417,10 +1547,11 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, // into consideration at LTO time (which very likely causes undefined // symbols later in the link stage). So we append file offset to make // filename unique. - StringRef name = archiveName.empty() - ? saver.save(path) - : saver.save(archiveName + "(" + path + " at " + - utostr(offsetInArchive) + ")"); + StringRef name = + archiveName.empty() + ? saver.save(path) + : saver.save(archiveName + "(" + path::filename(path) + " at " + + utostr(offsetInArchive) + ")"); MemoryBufferRef mbref(mb.getBuffer(), name); obj = CHECK(lto::InputFile::create(mbref), this); @@ -1509,8 +1640,8 @@ void BinaryFile::parse() { STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr}); } -InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { +InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive) { if (isBitcode(mb)) return make<BitcodeFile>(mb, archiveName, offsetInArchive); @@ -1529,14 +1660,13 @@ InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, } void LazyObjFile::fetch() { - if (mb.getBuffer().empty()) + if (fetched) return; + fetched = true; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; - mb = {}; - // Copy symbol vector so that the new InputFile doesn't have to // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); @@ -1593,21 +1723,22 @@ template <class ELFT> void LazyObjFile::parse() { continue; sym->resolve(LazyObject{*this, sym->getName()}); - // MemoryBuffer is emptied if this file is instantiated as ObjFile. - if (mb.getBuffer().empty()) + // If fetched, stop iterating because this->symbols has been transferred + // to the instantiated ObjFile. + if (fetched) return; } return; } } -std::string replaceThinLTOSuffix(StringRef path) { +std::string elf::replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; if (path.consume_back(suffix)) return (path + repl).str(); - return path; + return std::string(path); } template void BitcodeFile::parse<ELF32LE>(); @@ -1620,15 +1751,12 @@ template void LazyObjFile::parse<ELF32BE>(); template void LazyObjFile::parse<ELF64LE>(); template void LazyObjFile::parse<ELF64BE>(); -template class ObjFile<ELF32LE>; -template class ObjFile<ELF32BE>; -template class ObjFile<ELF64LE>; -template class ObjFile<ELF64BE>; +template class elf::ObjFile<ELF32LE>; +template class elf::ObjFile<ELF32BE>; +template class elf::ObjFile<ELF64LE>; +template class elf::ObjFile<ELF64BE>; template void SharedFile::parse<ELF32LE>(); template void SharedFile::parse<ELF32BE>(); template void SharedFile::parse<ELF64LE>(); template void SharedFile::parse<ELF64BE>(); - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index a310ba551bd4..7af85e417ca5 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -38,8 +38,6 @@ class DWARFCache; std::string toString(const elf::InputFile *f); namespace elf { -class InputFile; -class InputSectionBase; using llvm::object::Archive; @@ -200,7 +198,7 @@ public: ArrayRef<Symbol *> getGlobalSymbols(); ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { - this->archiveName = archiveName; + this->archiveName = std::string(archiveName); } void parse(bool ignoreComdats = false); @@ -250,11 +248,14 @@ public: // SHT_LLVM_CALL_GRAPH_PROFILE table ArrayRef<Elf_CGProfile> cgProfile; + // Get cached DWARF information. + DWARFCache *getDwarf(); + private: void initializeSections(bool ignoreComdats); void initializeSymbols(); void initializeJustSymbols(); - void initializeDwarf(); + InputSectionBase *getRelocTarget(const Elf_Shdr &sec); InputSectionBase *createInputSection(const Elf_Shdr &sec); StringRef getSectionName(const Elf_Shdr &sec); @@ -282,8 +283,8 @@ private: // reporting. Linker may find reasonable number of errors in a // single object file, so we cache debugging information in order to // parse it only once for each object file we link. - DWARFCache *dwarf; - llvm::once_flag initDwarfLine; + std::unique_ptr<DWARFCache> dwarf; + llvm::once_flag initDwarf; }; // LazyObjFile is analogous to ArchiveFile in the sense that @@ -298,7 +299,7 @@ public: LazyObjFile(MemoryBufferRef m, StringRef archiveName, uint64_t offsetInArchive) : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { - this->archiveName = archiveName; + this->archiveName = std::string(archiveName); } static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } @@ -306,6 +307,8 @@ public: template <class ELFT> void parse(); void fetch(); + bool fetched = false; + private: uint64_t offsetInArchive; }; @@ -323,6 +326,11 @@ public: // more than once.) void fetch(const Archive::Symbol &sym); + size_t getMemberCount() const; + size_t getFetchedMemberCount() const { return seen.size(); } + + bool parsed = false; + private: std::unique_ptr<Archive> file; llvm::DenseSet<uint64_t> seen; @@ -341,7 +349,7 @@ public: class SharedFile : public ELFFileBase { public: SharedFile(MemoryBufferRef m, StringRef defaultSoName) - : ELFFileBase(SharedKind, m), soName(defaultSoName), + : ELFFileBase(SharedKind, m), soName(std::string(defaultSoName)), isNeeded(!config->asNeeded) {} // This is actually a vector of Elf_Verdef pointers. @@ -366,6 +374,11 @@ public: // Used for --as-needed bool isNeeded; + +private: + template <typename ELFT> + std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, + const typename ELFT::Shdr *sec); }; class BinaryFile : public InputFile { @@ -384,6 +397,7 @@ inline bool isBitcode(MemoryBufferRef mb) { std::string replaceThinLTOSuffix(StringRef path); +extern std::vector<ArchiveFile *> archiveFiles; extern std::vector<BinaryFile *> binaryFiles; extern std::vector<BitcodeFile *> bitcodeFiles; extern std::vector<LazyObjFile *> lazyObjFiles; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index aab272f53a73..7a7ebd974909 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -36,16 +36,17 @@ using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace llvm::sys; +using namespace lld; +using namespace lld::elf; + +std::vector<InputSectionBase *> elf::inputSections; +DenseSet<std::pair<const Symbol *, uint64_t>> elf::ppc64noTocRelax; -namespace lld { // Returns a string to construct an error message. -std::string toString(const elf::InputSectionBase *sec) { +std::string lld::toString(const InputSectionBase *sec) { return (toString(sec->file) + ":(" + sec->name + ")").str(); } -namespace elf { -std::vector<InputSectionBase *> inputSections; - template <class ELFT> static ArrayRef<uint8_t> getSectionContents(ObjFile<ELFT> &file, const typename ELFT::Shdr &hdr) { @@ -138,7 +139,7 @@ size_t InputSectionBase::getSize() const { return s->getSize(); if (uncompressedSize >= 0) return uncompressedSize; - return rawData.size(); + return rawData.size() - bytesDropped; } void InputSectionBase::uncompress() const { @@ -307,7 +308,7 @@ std::string InputSectionBase::getLocation(uint64_t offset) { // File->sourceFile contains STT_FILE symbol that contains a // source file name. If it's missing, we use an object file name. - std::string srcFile = getFile<ELFT>()->sourceFile; + std::string srcFile = std::string(getFile<ELFT>()->sourceFile); if (srcFile.empty()) srcFile = toString(file); @@ -338,7 +339,7 @@ std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) { // // path/to/foo.o:(function bar) in archive path/to/bar.a std::string InputSectionBase::getObjMsg(uint64_t off) { - std::string filename = file->getName(); + std::string filename = std::string(file->getName()); std::string archive; if (!file->archiveName.empty()) @@ -438,12 +439,12 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { // hopefully creates a frame that is ignored at runtime. Also, don't warn // on .gcc_except_table and debug sections. // - // See the comment in maybeReportUndefined for PPC64 .toc . + // See the comment in maybeReportUndefined for PPC32 .got2 and PPC64 .toc auto *d = dyn_cast<Defined>(&sym); if (!d) { - if (!sec->name.startswith(".debug") && - !sec->name.startswith(".zdebug") && sec->name != ".eh_frame" && - sec->name != ".gcc_except_table" && sec->name != ".toc") { + if (!isDebugSection(*sec) && sec->name != ".eh_frame" && + sec->name != ".gcc_except_table" && sec->name != ".got2" && + sec->name != ".toc") { uint32_t secIdx = cast<Undefined>(sym).discardedSecIdx; Elf_Shdr_Impl<ELFT> sec = CHECK(file->getObj().sections(), file)[secIdx]; @@ -465,7 +466,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { if (!RelTy::IsRela) addend = target->getImplicitAddend(bufLoc, type); - if (config->emachine == EM_MIPS && config->relocatable && + if (config->emachine == EM_MIPS && target->getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But @@ -485,6 +486,14 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; else if (config->relocatable && type != target->noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); + } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && + p->r_addend >= 0x8000) { + // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 + // indicates that r30 is relative to the input section .got2 + // (r_addend>=0x8000), after linking, r30 should be relative to the output + // section .got2 . To compensate for the shift, adjust r_addend by + // ppc32Got2OutSecOff. + p->r_addend += sec->file->ppc32Got2OutSecOff; } } } @@ -518,9 +527,14 @@ static uint32_t getARMUndefinedRelativeWeakVA(RelType type, uint32_t a, case R_ARM_MOVW_PREL_NC: case R_ARM_MOVT_PREL: case R_ARM_REL32: + case R_ARM_THM_ALU_PREL_11_0: case R_ARM_THM_MOVW_PREL_NC: case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_PC12: return p + a; + // p + a is unrepresentable as negative immediates can't be encoded. + case R_ARM_THM_PC8: + return p; } llvm_unreachable("ARM pc-relative relocation expected\n"); } @@ -542,6 +556,7 @@ static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t type, uint64_t a, case R_AARCH64_PREL64: case R_AARCH64_ADR_PREL_LO21: case R_AARCH64_LD_PREL_LO19: + case R_AARCH64_PLT32: return p + a; } llvm_unreachable("AArch64 pc-relative relocation expected\n"); @@ -637,6 +652,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { // Variant 2. case EM_HEXAGON: + case EM_SPARCV9: case EM_386: case EM_X86_64: return s.getVA(0) - tls->p_memsz - @@ -646,8 +662,9 @@ static int64_t getTlsTpOffset(const Symbol &s) { } } -static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, - uint64_t p, const Symbol &sym, RelExpr expr) { +uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + int64_t a, uint64_t p, + const Symbol &sym, RelExpr expr) { switch (expr) { case R_ABS: case R_DTPREL: @@ -695,7 +712,7 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf // microMIPS variants of these relocations use slightly different // expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi() - // to correctly handle less-sugnificant bit of the microMIPS symbol. + // to correctly handle less-significant bit of the microMIPS symbol. uint64_t v = in.mipsGot->getGp(file) + a - p; if (type == R_MIPS_LO16 || type == R_MICROMIPS_LO16) v += 4; @@ -732,8 +749,12 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, *hiRel->sym, hiRel->expr); return 0; } - case R_PC: { + case R_PC: + case R_ARM_PCA: { uint64_t dest; + if (expr == R_ARM_PCA) + // Some PC relative ARM (Thumb) relocations align down the place. + p = p & 0xfffffffc; if (sym.isUndefWeak()) { // On ARM and AArch64 a branch to an undefined weak resolves to the // next instruction, otherwise the place. @@ -788,7 +809,7 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, // --noinhibit-exec, even a non-weak undefined reference may reach here. // Just return A, which matches R_ABS, and the behavior of some dynamic // loaders. - if (sym.isUndefined()) + if (sym.isUndefined() || sym.isLazy()) return a; return getTlsTpOffset(sym) + a; case R_RELAX_TLS_GD_TO_LE_NEG: @@ -832,6 +853,16 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, template <class ELFT, class RelTy> void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; + const bool isDebug = isDebugSection(*this); + const bool isDebugLocOrRanges = + isDebug && (name == ".debug_loc" || name == ".debug_ranges"); + const bool isDebugLine = isDebug && name == ".debug_line"; + Optional<uint64_t> tombstone; + for (const auto &patAndValue : llvm::reverse(config->deadRelocInNonAlloc)) + if (patAndValue.first.match(this->name)) { + tombstone = patAndValue.second; + break; + } for (const RelTy &rel : rels) { RelType type = rel.getType(config->isMips64EL); @@ -854,11 +885,17 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { if (expr == R_NONE) continue; + if (expr == R_SIZE) { + target->relocateNoSym(bufLoc, type, + SignExtend64<bits>(sym.getSize() + addend)); + continue; + } + if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) { std::string msg = getLocation<ELFT>(offset) + ": has non-ABS relocation " + toString(type) + " against symbol '" + toString(sym) + "'"; - if (expr != R_PC) { + if (expr != R_PC && expr != R_ARM_PCA) { error(msg); return; } @@ -871,15 +908,49 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // address 0. For bug-compatibilty, we accept them with warnings. We // know Steel Bank Common Lisp as of 2018 have this bug. warn(msg); - target->relocateOne(bufLoc, type, - SignExtend64<bits>(sym.getVA(addend - offset))); + target->relocateNoSym(bufLoc, type, + SignExtend64<bits>(sym.getVA(addend - offset))); continue; } - if (sym.isTls() && !Out::tlsPhdr) - target->relocateOne(bufLoc, type, 0); - else - target->relocateOne(bufLoc, type, SignExtend64<bits>(sym.getVA(addend))); + if (tombstone || + (isDebug && (type == target->symbolicRel || expr == R_DTPREL))) { + // Resolve relocations in .debug_* referencing (discarded symbols or ICF + // folded section symbols) to a tombstone value. Resolving to addend is + // unsatisfactory because the result address range may collide with a + // valid range of low address, or leave multiple CUs claiming ownership of + // the same range of code, which may confuse consumers. + // + // To address the problems, we use -1 as a tombstone value for most + // .debug_* sections. We have to ignore the addend because we don't want + // to resolve an address attribute (which may have a non-zero addend) to + // -1+addend (wrap around to a low address). + // + // R_DTPREL type relocations represent an offset into the dynamic thread + // vector. The computed value is st_value plus a non-negative offset. + // Negative values are invalid, so -1 can be used as the tombstone value. + // + // If the referenced symbol is discarded (made Undefined), or the + // section defining the referenced symbol is garbage collected, + // sym.getOutputSection() is nullptr. `ds->section->repl != ds->section` + // catches the ICF folded case. However, resolving a relocation in + // .debug_line to -1 would stop debugger users from setting breakpoints on + // the folded-in function, so exclude .debug_line. + // + // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value + // (base address selection entry), so -2 is used. + auto *ds = dyn_cast<Defined>(&sym); + if (!sym.getOutputSection() || + (ds && ds->section->repl != ds->section && !isDebugLine)) { + // If -z dead-reloc-in-nonalloc= is specified, respect it. + const uint64_t value = + tombstone ? SignExtend64<bits>(*tombstone) + : (isDebugLocOrRanges ? UINT64_MAX - 1 : UINT64_MAX); + target->relocateNoSym(bufLoc, type, value); + continue; + } + } + target->relocateNoSym(bufLoc, type, SignExtend64<bits>(sym.getVA(addend))); } } @@ -896,7 +967,7 @@ static void relocateNonAllocForRelocatable(InputSection *sec, uint8_t *buf) { assert(rel.expr == R_ABS); uint8_t *bufLoc = buf + rel.offset + sec->outSecOff; uint64_t targetVA = SignExtend64(rel.sym->getVA(rel.addend), bits); - target->relocateOne(bufLoc, rel.type, targetVA); + target->relocate(bufLoc, rel, targetVA); } } @@ -924,6 +995,8 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { const unsigned bits = config->wordsize * 8; for (const Relocation &rel : relocations) { + if (rel.expr == R_NONE) + continue; uint64_t offset = rel.offset; if (auto *sec = dyn_cast<InputSection>(this)) offset += sec->outSecOff; @@ -939,29 +1012,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { switch (expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - target->relaxGot(bufLoc, type, targetVA); + target->relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_TOC: - if (!tryRelaxPPC64TocIndirection(type, rel, bufLoc)) - target->relocateOne(bufLoc, type, targetVA); + // rel.sym refers to the STT_SECTION symbol associated to the .toc input + // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC + // entry, there may be R_PPC64_TOC16_HA not paired with + // R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation + // opportunities but is safe. + if (ppc64noTocRelax.count({rel.sym, rel.addend}) || + !tryRelaxPPC64TocIndirection(rel, bufLoc)) + target->relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: - target->relaxTlsIeToLe(bufLoc, type, targetVA); + target->relaxTlsIeToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_LD_TO_LE_ABS: - target->relaxTlsLdToLe(bufLoc, type, targetVA); + target->relaxTlsLdToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: - target->relaxTlsGdToLe(bufLoc, type, targetVA); + target->relaxTlsGdToLe(bufLoc, rel, targetVA); break; case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOTPLT: - target->relaxTlsGdToIe(bufLoc, type, targetVA); + target->relaxTlsGdToIe(bufLoc, rel, targetVA); break; case R_PPC64_CALL: // If this is a call to __tls_get_addr, it may be part of a TLS @@ -986,13 +1065,25 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { } write32(bufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } - target->relocateOne(bufLoc, type, targetVA); + target->relocate(bufLoc, rel, targetVA); break; default: - target->relocateOne(bufLoc, type, targetVA); + target->relocate(bufLoc, rel, targetVA); break; } } + + // Apply jumpInstrMods. jumpInstrMods are created when the opcode of + // a jmp insn must be modified to shrink the jmp insn or to flip the jmp + // insn. This is primarily used to relax and optimize jumps created with + // basic block sections. + if (auto *sec = dyn_cast<InputSection>(this)) { + for (const JumpInstrMod &jumpMod : jumpInstrMods) { + uint64_t offset = jumpMod.offset + sec->outSecOff; + uint8_t *bufLoc = buf + offset; + target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); + } + } } // For each function-defining prologue, find any calls to __morestack, @@ -1090,7 +1181,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *buf, end, f->stOther)) continue; if (!getFile<ELFT>()->someNoSplitStack) - error(toString(this) + ": " + f->getName() + + error(lld::toString(this) + ": " + f->getName() + " (with -fsplit-stack) calls " + rel.sym->getName() + " (without -fsplit-stack), but couldn't adjust its prologue"); } @@ -1353,6 +1444,3 @@ template void EhInputSection::split<ELF32LE>(); template void EhInputSection::split<ELF32BE>(); template void EhInputSection::split<ELF64LE>(); template void EhInputSection::split<ELF64BE>(); - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 3c42af7db7b4..112c6ab49a38 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -128,6 +128,26 @@ public: return cast_or_null<ObjFile<ELFT>>(file); } + // If basic block sections are enabled, many code sections could end up with + // one or two jump instructions at the end that could be relaxed to a smaller + // instruction. The members below help trimming the trailing jump instruction + // and shrinking a section. + unsigned bytesDropped = 0; + + void drop_back(uint64_t num) { bytesDropped += num; } + + void push_back(uint64_t num) { + assert(bytesDropped >= num); + bytesDropped -= num; + } + + void trim() { + if (bytesDropped) { + rawData = rawData.drop_back(bytesDropped); + bytesDropped = 0; + } + } + ArrayRef<uint8_t> data() const { if (uncompressedSize >= 0) uncompress(); @@ -183,12 +203,25 @@ public: // the mmap'ed output buffer. template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd); void relocateAlloc(uint8_t *buf, uint8_t *bufEnd); + static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, + int64_t A, uint64_t P, const Symbol &Sym, + RelExpr Expr); // The native ELF reloc data type is not very convenient to handle. // So we convert ELF reloc records to our own records in Relocations.cpp. // This vector contains such "cooked" relocations. std::vector<Relocation> relocations; + // Indicates that this section needs to be padded with a NOP filler if set to + // true. + bool nopFiller = false; + + // These are modifiers to jump instructions that are necessary when basic + // block sections are enabled. Basic block sections creates opportunities to + // relax jump instructions at basic block boundaries after reordering the + // basic blocks. + std::vector<JumpInstrMod> jumpInstrMods; + // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find // such functions and adjust their prologues. This is very similar @@ -357,9 +390,18 @@ private: template <class ELFT> void copyShtGroup(uint8_t *buf); }; +inline bool isDebugSection(const InputSectionBase &sec) { + return sec.name.startswith(".debug") || sec.name.startswith(".zdebug"); +} + // The list of all input sections. extern std::vector<InputSectionBase *> inputSections; +// The set of TOC entries (.toc + addend) for which we should not apply +// toc-indirect to toc-relative relaxation. const Symbol * refers to the +// STT_SECTION symbol associated to the .toc input section. +extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax; + } // namespace elf std::string toString(const elf::InputSectionBase *); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 2148ac500291..b8041afed6c9 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -41,9 +41,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; // Creates an empty file to store a list of object files for final // linking of distributed ThinLTO. @@ -59,9 +58,9 @@ static std::unique_ptr<raw_fd_ostream> openFile(StringRef file) { } static std::string getThinLTOOutputFile(StringRef modulePath) { - return lto::getThinLTOOutputFile(modulePath, - config->thinLTOPrefixReplace.first, - config->thinLTOPrefixReplace.second); + return lto::getThinLTOOutputFile( + std::string(modulePath), std::string(config->thinLTOPrefixReplace.first), + std::string(config->thinLTOPrefixReplace.second)); } static lto::Config createConfig() { @@ -76,6 +75,33 @@ static lto::Config createConfig() { c.Options.FunctionSections = true; c.Options.DataSections = true; + // Check if basic block sections must be used. + // Allowed values for --lto-basicblock-sections are "all", "labels", + // "<file name specifying basic block ids>", or none. This is the equivalent + // of -fbasic-block-sections= flag in clang. + if (!config->ltoBasicBlockSections.empty()) { + if (config->ltoBasicBlockSections == "all") { + c.Options.BBSections = BasicBlockSection::All; + } else if (config->ltoBasicBlockSections == "labels") { + c.Options.BBSections = BasicBlockSection::Labels; + } else if (config->ltoBasicBlockSections == "none") { + c.Options.BBSections = BasicBlockSection::None; + } else { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFile(config->ltoBasicBlockSections.str()); + if (!MBOrErr) { + error("cannot open " + config->ltoBasicBlockSections + ":" + + MBOrErr.getError().message()); + } else { + c.Options.BBSectionsFuncListBuf = std::move(*MBOrErr); + } + c.Options.BBSections = BasicBlockSection::List; + } + } + + c.Options.UniqueBasicBlockSectionNames = + config->ltoUniqueBasicBlockSectionNames; + if (auto relocModel = getRelocModelFromCMModel()) c.RelocModel = *relocModel; else if (config->relocatable) @@ -97,21 +123,30 @@ static lto::Config createConfig() { c.PTO.SLPVectorization = c.OptLevel > 1; // Set up a custom pipeline if we've been asked to. - c.OptPipeline = config->ltoNewPmPasses; - c.AAPipeline = config->ltoAAPipeline; + c.OptPipeline = std::string(config->ltoNewPmPasses); + c.AAPipeline = std::string(config->ltoAAPipeline); // Set up optimization remarks if we've been asked to. - c.RemarksFilename = config->optRemarksFilename; - c.RemarksPasses = config->optRemarksPasses; + c.RemarksFilename = std::string(config->optRemarksFilename); + c.RemarksPasses = std::string(config->optRemarksPasses); c.RemarksWithHotness = config->optRemarksWithHotness; - c.RemarksFormat = config->optRemarksFormat; + c.RemarksFormat = std::string(config->optRemarksFormat); - c.SampleProfile = config->ltoSampleProfile; + c.SampleProfile = std::string(config->ltoSampleProfile); c.UseNewPM = config->ltoNewPassManager; c.DebugPassManager = config->ltoDebugPassManager; - c.DwoDir = config->dwoDir; + c.DwoDir = std::string(config->dwoDir); - c.CSIRProfile = config->ltoCSProfileFile; + c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; + c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty(); + + for (const llvm::StringRef &name : config->thinLTOModulesToCompile) + c.ThinLTOModulesToCompile.emplace_back(name); + + c.TimeTraceEnabled = config->timeTraceEnabled; + c.TimeTraceGranularity = config->timeTraceGranularity; + + c.CSIRProfile = std::string(config->ltoCSProfileFile); c.RunCSIRInstr = config->ltoCSProfileGenerate; if (config->emitLLVM) { @@ -122,6 +157,9 @@ static lto::Config createConfig() { }; } + if (config->ltoEmitAsm) + c.CGFileType = CGFT_AssemblyFile; + if (config->saveTemps) checkError(c.addSaveTemps(config->outputFile.str() + ".", /*UseInputModulePath*/ true)); @@ -138,10 +176,12 @@ BitcodeCompiler::BitcodeCompiler() { if (config->thinLTOIndexOnly) { auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); }; backend = lto::createWriteIndexesThinBackend( - config->thinLTOPrefixReplace.first, config->thinLTOPrefixReplace.second, + std::string(config->thinLTOPrefixReplace.first), + std::string(config->thinLTOPrefixReplace.second), config->thinLTOEmitImportsFiles, indexFile.get(), onIndexWrite); - } else if (config->thinLTOJobs != -1U) { - backend = lto::createInProcessThinBackend(config->thinLTOJobs); + } else { + backend = lto::createInProcessThinBackend( + llvm::heavyweight_hardware_concurrency(config->thinLTOJobs)); } ltoObj = std::make_unique<lto::LTO>(createConfig(), backend, @@ -218,7 +258,7 @@ void BitcodeCompiler::add(BitcodeFile &f) { // distributed build system that depends on that behavior. static void thinLTOCreateEmptyIndexFiles() { for (LazyObjFile *f : lazyObjFiles) { - if (!isBitcode(f->mb)) + if (f->fetched || !isBitcode(f->mb)) continue; std::string path = replaceThinLTOSuffix(getThinLTOOutputFile(f->getName())); std::unique_ptr<raw_fd_ostream> os = openFile(path + ".thinlto.bc"); @@ -259,12 +299,14 @@ std::vector<InputFile *> BitcodeCompiler::compile() { }, cache)); - // Emit empty index files for non-indexed files - for (StringRef s : thinIndices) { - std::string path = getThinLTOOutputFile(s); - openFile(path + ".thinlto.bc"); - if (config->thinLTOEmitImportsFiles) - openFile(path + ".imports"); + // Emit empty index files for non-indexed files but not in single-module mode. + if (config->thinLTOModulesToCompile.empty()) { + for (StringRef s : thinIndices) { + std::string path = getThinLTOOutputFile(s); + openFile(path + ".thinlto.bc"); + if (config->thinLTOEmitImportsFiles) + openFile(path + ".imports"); + } } if (config->thinLTOIndexOnly) { @@ -291,11 +333,19 @@ std::vector<InputFile *> BitcodeCompiler::compile() { } if (config->saveTemps) { - saveBuffer(buf[0], config->outputFile + ".lto.o"); + if (!buf[0].empty()) + saveBuffer(buf[0], config->outputFile + ".lto.o"); for (unsigned i = 1; i != maxTasks; ++i) saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o"); } + if (config->ltoEmitAsm) { + saveBuffer(buf[0], config->outputFile); + for (unsigned i = 1; i != maxTasks; ++i) + saveBuffer(buf[i], config->outputFile + Twine(i)); + return {}; + } + std::vector<InputFile *> ret; for (unsigned i = 0; i != maxTasks; ++i) if (!buf[i].empty()) @@ -306,6 +356,3 @@ std::vector<InputFile *> BitcodeCompiler::compile() { ret.push_back(createObjectFile(*file)); return ret; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 57e0e1e8acbf..72e2ebff9b8c 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -21,7 +21,6 @@ #include "Writer.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" @@ -29,6 +28,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include <algorithm> #include <cassert> @@ -43,10 +43,10 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { -LinkerScript *script; +LinkerScript *elf::script; static uint64_t getOutputSectionVA(SectionBase *sec) { OutputSection *os = sec->getOutputSection(); @@ -88,7 +88,7 @@ OutputSection *LinkerScript::createOutputSection(StringRef name, if (!secRef) secRef = sec; } - sec->location = location; + sec->location = std::string(location); return sec; } @@ -103,10 +103,11 @@ OutputSection *LinkerScript::getOrCreateOutputSection(StringRef name) { static void expandMemoryRegion(MemoryRegion *memRegion, uint64_t size, StringRef regionName, StringRef secName) { memRegion->curPos += size; - uint64_t newSize = memRegion->curPos - memRegion->origin; - if (newSize > memRegion->length) + uint64_t newSize = memRegion->curPos - (memRegion->origin)().getValue(); + uint64_t length = (memRegion->length)().getValue(); + if (newSize > length) error("section '" + secName + "' will not fit in region '" + regionName + - "': overflowed by " + Twine(newSize - memRegion->length) + " bytes"); + "': overflowed by " + Twine(newSize - length) + " bytes"); } void LinkerScript::expandMemoryRegions(uint64_t size) { @@ -246,32 +247,30 @@ getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { return changed; } -// This method is used to handle INSERT AFTER statement. Here we rebuild -// the list of script commands to mix sections inserted into. +// Process INSERT [AFTER|BEFORE] commands. For each command, we move the +// specified output section to the designated place. void LinkerScript::processInsertCommands() { - std::vector<BaseCommand *> v; - auto insert = [&](std::vector<BaseCommand *> &from) { - v.insert(v.end(), from.begin(), from.end()); - from.clear(); - }; - - for (BaseCommand *base : sectionCommands) { - if (auto *os = dyn_cast<OutputSection>(base)) { - insert(insertBeforeCommands[os->name]); - v.push_back(base); - insert(insertAfterCommands[os->name]); + for (const InsertCommand &cmd : insertCommands) { + // If cmd.os is empty, it may have been discarded by + // adjustSectionsBeforeSorting(). We do not handle such output sections. + auto from = llvm::find(sectionCommands, cmd.os); + if (from == sectionCommands.end()) continue; + sectionCommands.erase(from); + + auto insertPos = llvm::find_if(sectionCommands, [&cmd](BaseCommand *base) { + auto *to = dyn_cast<OutputSection>(base); + return to != nullptr && to->name == cmd.where; + }); + if (insertPos == sectionCommands.end()) { + error("unable to insert " + cmd.os->name + + (cmd.isAfter ? " after " : " before ") + cmd.where); + } else { + if (cmd.isAfter) + ++insertPos; + sectionCommands.insert(insertPos, cmd.os); } - v.push_back(base); } - - for (auto &cmds : {insertBeforeCommands, insertAfterCommands}) - for (const std::pair<StringRef, std::vector<BaseCommand *>> &p : cmds) - if (!p.second.empty()) - error("unable to INSERT AFTER/BEFORE " + p.first + - ": section not defined"); - - sectionCommands = std::move(v); } // Symbols defined in script should not be inlined by LTO. At the same time @@ -324,8 +323,8 @@ static std::string getFilename(InputFile *file) { if (!file) return ""; if (file->archiveName.empty()) - return file->getName(); - return (file->archiveName + "(" + file->getName() + ")").str(); + return std::string(file->getName()); + return (file->archiveName + ':' + file->getName()).str(); } bool LinkerScript::shouldKeep(InputSectionBase *s) { @@ -335,7 +334,9 @@ bool LinkerScript::shouldKeep(InputSectionBase *s) { for (InputSectionDescription *id : keptSections) if (id->filePat.match(filename)) for (SectionPattern &p : id->sectionPatterns) - if (p.sectionPat.match(s->name)) + if (p.sectionPat.match(s->name) && + (s->flags & id->withFlags) == id->withFlags && + (s->flags & id->withoutFlags) == 0) return true; return false; } @@ -406,14 +407,15 @@ static void sortInputSections(MutableArrayRef<InputSectionBase *> vec, // Compute and remember which sections the InputSectionDescription matches. std::vector<InputSectionBase *> -LinkerScript::computeInputSections(const InputSectionDescription *cmd) { +LinkerScript::computeInputSections(const InputSectionDescription *cmd, + ArrayRef<InputSectionBase *> sections) { std::vector<InputSectionBase *> ret; // Collects all sections that satisfy constraints of Cmd. for (const SectionPattern &pat : cmd->sectionPatterns) { size_t sizeBefore = ret.size(); - for (InputSectionBase *sec : inputSections) { + for (InputSectionBase *sec : sections) { if (!sec->isLive() || sec->parent) continue; @@ -426,10 +428,15 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) { cast<InputSection>(sec)->getRelocatedSection()) continue; + // Check the name early to improve performance in the common case. + if (!pat.sectionPat.match(sec->name)) + continue; + std::string filename = getFilename(sec->file); if (!cmd->filePat.match(filename) || pat.excludedFilePat.match(filename) || - !pat.sectionPat.match(sec->name)) + (sec->flags & cmd->withFlags) != cmd->withFlags || + (sec->flags & cmd->withoutFlags) != 0) continue; ret.push_back(sec); @@ -459,13 +466,29 @@ void LinkerScript::discard(InputSectionBase *s) { discard(ds); } +void LinkerScript::discardSynthetic(OutputSection &outCmd) { + for (Partition &part : partitions) { + if (!part.armExidx || !part.armExidx->isLive()) + continue; + std::vector<InputSectionBase *> secs(part.armExidx->exidxSections.begin(), + part.armExidx->exidxSections.end()); + for (BaseCommand *base : outCmd.sectionCommands) + if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { + std::vector<InputSectionBase *> matches = + computeInputSections(cmd, secs); + for (InputSectionBase *s : matches) + discard(s); + } + } +} + std::vector<InputSectionBase *> LinkerScript::createInputSectionList(OutputSection &outCmd) { std::vector<InputSectionBase *> ret; for (BaseCommand *base : outCmd.sectionCommands) { if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { - cmd->sectionBases = computeInputSections(cmd); + cmd->sectionBases = computeInputSections(cmd, inputSections); for (InputSectionBase *s : cmd->sectionBases) s->parent = &outCmd; ret.insert(ret.end(), cmd->sectionBases.begin(), cmd->sectionBases.end()); @@ -486,6 +509,7 @@ void LinkerScript::processSectionCommands() { if (sec->name == "/DISCARD/") { for (InputSectionBase *s : v) discard(s); + discardSynthetic(*sec); sec->sectionCommands.clear(); continue; } @@ -676,14 +700,12 @@ void LinkerScript::addOrphanSections() { std::function<void(InputSectionBase *)> add; add = [&](InputSectionBase *s) { if (s->isLive() && !s->parent) { - StringRef name = getOutputSectionName(s); - - if (config->orphanHandling == OrphanHandlingPolicy::Error) - error(toString(s) + " is being placed in '" + name + "'"); - else if (config->orphanHandling == OrphanHandlingPolicy::Warn) - warn(toString(s) + " is being placed in '" + name + "'"); + orphanSections.push_back(s); - if (OutputSection *sec = findByName(sectionCommands, name)) { + StringRef name = getOutputSectionName(s); + if (config->unique) { + v.push_back(createSection(s, name)); + } else if (OutputSection *sec = findByName(sectionCommands, name)) { sec->recordSection(s); } else { if (OutputSection *os = addInputSec(map, s, name)) @@ -727,6 +749,22 @@ void LinkerScript::addOrphanSections() { sectionCommands.insert(sectionCommands.begin(), v.begin(), v.end()); } +void LinkerScript::diagnoseOrphanHandling() const { + for (const InputSectionBase *sec : orphanSections) { + // Input SHT_REL[A] retained by --emit-relocs are ignored by + // computeInputSections(). Don't warn/error. + if (isa<InputSection>(sec) && + cast<InputSection>(sec)->getRelocatedSection()) + continue; + + StringRef name = getOutputSectionName(sec); + if (config->orphanHandling == OrphanHandlingPolicy::Error) + error(toString(sec) + " is being placed in '" + name + "'"); + else if (config->orphanHandling == OrphanHandlingPolicy::Warn) + warn(toString(sec) + " is being placed in '" + name + "'"); + } +} + uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) { bool isTbss = (ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS; @@ -756,9 +794,16 @@ void LinkerScript::output(InputSection *s) { void LinkerScript::switchTo(OutputSection *sec) { ctx->outSec = sec; - uint64_t before = advance(0, 1); - ctx->outSec->addr = advance(0, ctx->outSec->alignment); - expandMemoryRegions(ctx->outSec->addr - before); + uint64_t pos = advance(0, 1); + if (sec->addrExpr && script->hasSectionsCommand) { + // The alignment is ignored. + ctx->outSec->addr = pos; + } else { + // ctx->outSec->alignment is the max of ALIGN and the maximum of input + // section alignments. + ctx->outSec->addr = advance(0, ctx->outSec->alignment); + expandMemoryRegions(ctx->outSec->addr - pos); + } } // This function searches for a memory region to place the given output @@ -806,6 +851,8 @@ void LinkerScript::assignOffsets(OutputSection *sec) { if (!(sec->flags & SHF_ALLOC)) dot = 0; + const bool sameMemRegion = ctx->memRegion == sec->memRegion; + const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; ctx->memRegion = sec->memRegion; ctx->lmaRegion = sec->lmaRegion; if (ctx->memRegion) @@ -824,18 +871,20 @@ void LinkerScript::assignOffsets(OutputSection *sec) { switchTo(sec); + // ctx->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() or + // AT>, recompute ctx->lmaOffset; otherwise, if both previous/current LMA + // region is the default, and the two sections are in the same memory region, + // reuse previous lmaOffset; otherwise, reset lmaOffset to 0. This emulates + // heuristics described in + // https://sourceware.org/binutils/docs/ld/Output-Section-LMA.html if (sec->lmaExpr) ctx->lmaOffset = sec->lmaExpr().getValue() - dot; + else if (MemoryRegion *mr = sec->lmaRegion) + ctx->lmaOffset = alignTo(mr->curPos, sec->alignment) - dot; + else if (!sameMemRegion || !prevLMARegionIsDefault) + ctx->lmaOffset = 0; - if (MemoryRegion *mr = sec->lmaRegion) - ctx->lmaOffset = mr->curPos - dot; - - // If neither AT nor AT> is specified for an allocatable section, the linker - // will set the LMA such that the difference between VMA and LMA for the - // section is the same as the preceding output section in the same region - // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html - // This, however, should only be done by the first "non-header" section - // in the segment. + // Propagate ctx->lmaOffset to the first "non-header" section. if (PhdrEntry *l = ctx->outSec->ptLoad) if (sec == findFirstSection(l)) l->lmaOffset = ctx->lmaOffset; @@ -946,7 +995,7 @@ void LinkerScript::adjustSectionsBeforeSorting() { // We do not want to keep any special flags for output section // in case it is empty. - bool isEmpty = getInputSections(sec).empty(); + bool isEmpty = (getFirstInputSection(sec) == nullptr); if (isEmpty) sec->flags = flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | SHF_WRITE | SHF_EXECINSTR); @@ -1068,7 +1117,7 @@ void LinkerScript::allocateHeaders(std::vector<PhdrEntry *> &phdrs) { LinkerScript::AddressState::AddressState() { for (auto &mri : script->memoryRegions) { MemoryRegion *mr = mri.second; - mr->curPos = mr->origin; + mr->curPos = (mr->origin)().getValue(); } } @@ -1195,11 +1244,8 @@ std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *cmd) { if (Optional<size_t> idx = getPhdrIndex(phdrsCommands, s)) ret.push_back(*idx); else if (s != "NONE") - error(cmd->location + ": section header '" + s + + error(cmd->location + ": program header '" + s + "' is not listed in PHDRS"); } return ret; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 25a14e08dade..ec4fc22db486 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -109,11 +109,11 @@ struct SymbolAssignment : BaseCommand { std::string commandString; // Address of this assignment command. - unsigned addr; + uint64_t addr; // Size of this assignment command. This is usually 0, but if // you move '.' this may be greater than 0. - unsigned size; + uint64_t size; }; // Linker scripts allow additional constraints to be put on output sections. @@ -126,14 +126,14 @@ enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; // target memory. Instances of the struct are created by parsing the // MEMORY command. struct MemoryRegion { - MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags, + MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, uint32_t negFlags) - : name(name), origin(origin), length(length), flags(flags), + : name(std::string(name)), origin(origin), length(length), flags(flags), negFlags(negFlags) {} std::string name; - uint64_t origin; - uint64_t length; + Expr origin; + Expr length; uint32_t flags; uint32_t negFlags; uint64_t curPos = 0; @@ -155,14 +155,16 @@ struct SectionPattern { }; struct InputSectionDescription : BaseCommand { - InputSectionDescription(StringRef filePattern) - : BaseCommand(InputSectionKind), filePat(filePattern) {} + InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, + uint64_t withoutFlags = 0) + : BaseCommand(InputSectionKind), filePat(filePattern), + withFlags(withFlags), withoutFlags(withoutFlags) {} static bool classof(const BaseCommand *c) { return c->kind == InputSectionKind; } - StringMatcher filePat; + SingleStringMatcher filePat; // Input sections that matches at least one of SectionPatterns // will be associated with this InputSectionDescription. @@ -180,6 +182,10 @@ struct InputSectionDescription : BaseCommand { // they were created in. This is used to insert newly created ThunkSections // into Sections at the end of a createThunks() pass. std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; + + // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. + uint64_t withFlags; + uint64_t withoutFlags; }; // Represents BYTE(), SHORT(), LONG(), or QUAD(). @@ -202,6 +208,12 @@ struct ByteCommand : BaseCommand { unsigned size; }; +struct InsertCommand { + OutputSection *os; + bool isAfter; + StringRef where; +}; + struct PhdrsCommand { StringRef name; unsigned type = llvm::ELF::PT_NULL; @@ -233,10 +245,13 @@ class LinkerScript final { void expandMemoryRegions(uint64_t size); std::vector<InputSectionBase *> - computeInputSections(const InputSectionDescription *); + computeInputSections(const InputSectionDescription *, + ArrayRef<InputSectionBase *>); std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd); + void discardSynthetic(OutputSection &); + std::vector<size_t> getPhdrIndices(OutputSection *sec); MemoryRegion *findMemoryRegion(OutputSection *sec); @@ -270,6 +285,7 @@ public: ExprValue getSymbolValue(StringRef name, const Twine &loc); void addOrphanSections(); + void diagnoseOrphanHandling() const; void adjustSectionsBeforeSorting(); void adjustSectionsAfterSorting(); @@ -305,10 +321,12 @@ public: // A list of symbols referenced by the script. std::vector<llvm::StringRef> referencedSymbols; - // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need - // to be inserted into SECTIONS commands list. - llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands; - llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands; + // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need + // to be reordered. + std::vector<InsertCommand> insertCommands; + + // Sections that will be warned/errored by --orphan-handling. + std::vector<const InputSectionBase *> orphanSections; }; extern LinkerScript *script; diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index e5f5c4f4ff23..12cffead1f80 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -26,16 +26,16 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::object; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { using SymbolMapTy = DenseMap<const SectionBase *, SmallVector<Defined *, 4>>; static constexpr char indent8[] = " "; // 8 spaces @@ -138,7 +138,7 @@ static void printEhFrame(raw_ostream &os, const EhFrameSection *sec) { } } -void writeMapFile() { +void elf::writeMapFile() { if (config->mapFile.empty()) return; @@ -227,7 +227,7 @@ static void print(StringRef a, StringRef b) { // // In this case, strlen is defined by libc.so.6 and used by other two // files. -void writeCrossReferenceTable() { +void elf::writeCrossReferenceTable() { if (!config->cref) return; @@ -259,5 +259,20 @@ void writeCrossReferenceTable() { } } -} // namespace elf -} // namespace lld +void elf::writeArchiveStats() { + if (config->printArchiveStats.empty()) + return; + + std::error_code ec; + raw_fd_ostream os(config->printArchiveStats, ec, sys::fs::OF_None); + if (ec) { + error("--print-archive-stats=: cannot open " + config->printArchiveStats + + ": " + ec.message()); + return; + } + + os << "members\tfetched\tarchive\n"; + for (const ArchiveFile *f : archiveFiles) + os << f->getMemberCount() << '\t' << f->getFetchedMemberCount() << '\t' + << f->getName() << '\n'; +} diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h index 7e7938919edf..c4da18f8ad7f 100644 --- a/lld/ELF/MapFile.h +++ b/lld/ELF/MapFile.h @@ -13,6 +13,7 @@ namespace lld { namespace elf { void writeMapFile(); void writeCrossReferenceTable(); +void writeArchiveStats(); } // namespace elf } // namespace lld diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index bb0105c28928..28e13e8c1234 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -31,17 +31,17 @@ #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include <functional> #include <vector> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; -namespace endian = llvm::support::endian; - -namespace lld { -namespace elf { namespace { template <class ELFT> class MarkLive { public: @@ -141,7 +141,7 @@ void MarkLive<ELFT>::scanEhFrameSection(EhInputSection &eh, if (firstRelI == (unsigned)-1) continue; - if (endian::read32<ELFT::TargetEndianness>(piece.data().data() + 4) == 0) { + if (read32<ELFT::TargetEndianness>(piece.data().data() + 4) == 0) { // This is a CIE, we only need to worry about the first relocation. It is // known to point to the personality function. resolveReloc(eh, rels[firstRelI], false); @@ -322,7 +322,8 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() { // Before calling this function, Live bits are off for all // input sections. This function make some or all of them on // so that they are emitted to the output file. -template <class ELFT> void markLive() { +template <class ELFT> void elf::markLive() { + llvm::TimeTraceScope timeScope("markLive"); // If -gc-sections is not given, no sections are removed. if (!config->gcSections) { for (InputSectionBase *sec : inputSections) @@ -390,10 +391,7 @@ template <class ELFT> void markLive() { message("removing unused section " + toString(sec)); } -template void markLive<ELF32LE>(); -template void markLive<ELF32BE>(); -template void markLive<ELF64LE>(); -template void markLive<ELF64BE>(); - -} // namespace elf -} // namespace lld +template void elf::markLive<ELF32LE>(); +template void elf::markLive<ELF32BE>(); +template void elf::markLive<ELF64LE>(); +template void elf::markLive<ELF64BE>(); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index ea78a3526211..c3c1309aca1a 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -1,5 +1,23 @@ include "llvm/Option/OptParser.td" +// Convenience classes for long options which only accept two dashes. For lld +// specific or newer long options, we prefer two dashes to avoid collision with +// short options. For many others, we have to accept both forms to be compatible +// with GNU ld. +class FF<string name> : Flag<["--"], name>; +class JJ<string name>: Joined<["--"], name>; + +multiclass EEq<string name, string help> { + def NAME: Separate<["--"], name>; + def NAME # _eq: Joined<["--"], name # "=">, Alias<!cast<Separate>(NAME)>, + HelpText<help>; +} + +multiclass BB<string name, string help1, string help2> { + def NAME: Flag<["--"], name>, HelpText<help1>; + def no_ # NAME: Flag<["--"], "no-" # name>, HelpText<help2>; +} + // For options whose names are multiple letters, either one dash or // two can precede the option name except those that start with 'o'. class F<string name>: Flag<["--", "-"], name>; @@ -42,6 +60,10 @@ defm compress_debug_sections: defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">; +defm optimize_bb_jumps: BB<"optimize-bb-jumps", + "Remove direct jumps at the end to the next basic block", + "Do not remove any direct jumps at the end to the next basic block (default)">; + defm split_stack_adjust_size : Eq<"split-stack-adjust-size", "Specify adjustment to stack size when a split-stack function calls a " @@ -69,11 +91,11 @@ defm allow_shlib_undefined: B<"allow-shlib-undefined", "Allow unresolved references in shared libraries (default when linking a shared library)", "Do not allow unresolved references in shared libraries (default when linking an executable)">; -defm apply_dynamic_relocs: B<"apply-dynamic-relocs", +defm apply_dynamic_relocs: BB<"apply-dynamic-relocs", "Apply link-time values for dynamic relocations", "Do not apply link-time values for dynamic relocations (default)">; -defm dependent_libraries: B<"dependent-libraries", +defm dependent_libraries: BB<"dependent-libraries", "Process dependent library specifiers from input files (default)", "Ignore dependent library specifiers from input files">; @@ -84,7 +106,7 @@ defm as_needed: B<"as-needed", defm call_graph_ordering_file: Eq<"call-graph-ordering-file", "Layout sections to optimize the given callgraph">; -defm call_graph_profile_sort: B<"call-graph-profile-sort", +defm call_graph_profile_sort: BB<"call-graph-profile-sort", "Reorder sections with call graph profile (default)", "Do not reorder sections with call graph profile">; @@ -123,7 +145,12 @@ def discard_none: F<"discard-none">, defm dynamic_linker: Eq<"dynamic-linker", "Which dynamic linker to use">; -defm dynamic_list: Eq<"dynamic-list", "Read a list of dynamic symbols">; +defm dynamic_list : Eq<"dynamic-list", + "Read a list of dynamic symbols. (executable) Put matched non-local defined" + "symbols to the dynamic symbol table. (shared object) References to matched" + "non-local STV_DEFAULT symbols shouldn't be bound to definitions within the " + "shared object. Implies -Bsymbolic but does not set DF_SYMBOLIC">, + MetaVarName<"<file>">; defm eh_frame_hdr: B<"eh-frame-hdr", "Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header", @@ -151,7 +178,7 @@ def error_unresolved_symbols: F<"error-unresolved-symbols">, defm exclude_libs: Eq<"exclude-libs", "Exclude static libraries from automatic export">; -defm execute_only: B<"execute-only", +defm execute_only: BB<"execute-only", "Mark executable sections unreadable", "Mark executable sections readable (default)">; @@ -159,8 +186,12 @@ defm export_dynamic: B<"export-dynamic", "Put symbols in the dynamic symbol table", "Do not put symbols in the dynamic symbol table (default)">; -defm export_dynamic_symbol: - Eq<"export-dynamic-symbol", "Put a symbol in the dynamic symbol table">; +defm export_dynamic_symbol : EEq<"export-dynamic-symbol", + "(executable) Put matched symbols in the dynamic symbol table. " + "(shared object) References to matched non-local STV_DEFAULT symbols " + "shouldn't be bound to definitions within the shared object. " + "Does not imply -Bsymbolic.">, + MetaVarName<"glob">; defm fatal_warnings: B<"fatal-warnings", "Treat warnings as errors", @@ -183,11 +214,11 @@ defm gc_sections: B<"gc-sections", "Enable garbage collection of unused sections", "Disable garbage collection of unused sections (default)">; -defm gdb_index: B<"gdb-index", +defm gdb_index: BB<"gdb-index", "Generate .gdb_index section", "Do not generate .gdb_index section (default)">; -defm gnu_unique: B<"gnu-unique", +defm gnu_unique: BB<"gnu-unique", "Enable STB_GNU_UNIQUE symbol binding (default)", "Disable STB_GNU_UNIQUE symbol binding">; @@ -227,7 +258,7 @@ defm merge_exidx_entries: B<"merge-exidx-entries", "Enable merging .ARM.exidx entries (default)", "Disable merging .ARM.exidx entries">; -defm mmap_output_file: B<"mmap-output-file", +defm mmap_output_file: BB<"mmap-output-file", "Mmap the output file for writing (default)", "Do not mmap the output file for writing">; @@ -252,9 +283,6 @@ def no_nmagic: F<"no-nmagic">, MetaVarName<"<magic>">, def no_omagic: F<"no-omagic">, MetaVarName<"<magic>">, HelpText<"Do not set the text data sections to be writable, page align sections (default)">; -def no_rosegment: F<"no-rosegment">, - HelpText<"Do not put read-only non-executable sections in their own segment">; - def no_undefined: F<"no-undefined">, HelpText<"Report unresolved symbols even if the linker is creating a shared library">; @@ -264,7 +292,7 @@ def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">, HelpText<"Specify the binary format for the output object file">; -def omagic: Flag<["--"], "omagic">, MetaVarName<"<magic>">, +def omagic: FF<"omagic">, MetaVarName<"<magic>">, HelpText<"Set the text and data sections to be readable and writable, do not page align sections, link against static libraries">; defm orphan_handling: @@ -274,7 +302,7 @@ defm pack_dyn_relocs: Eq<"pack-dyn-relocs", "Pack dynamic relocations in the given format">, MetaVarName<"[none,android,relr,android+relr]">; -defm use_android_relr_tags: B<"use-android-relr-tags", +defm use_android_relr_tags: BB<"use-android-relr-tags", "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; @@ -293,6 +321,10 @@ defm print_icf_sections: B<"print-icf-sections", "List identical folded sections", "Do not list identical folded sections (default)">; +def print_archive_stats: J<"print-archive-stats=">, + HelpText<"Write archive usage statistics to the specified file. " + "Print the numbers of members and fetched members for each archive">; + defm print_symbol_order: Eq<"print-symbol-order", "Print a symbol order specified by --call-graph-ordering-file into the specified file">; @@ -307,6 +339,10 @@ def print_map: F<"print-map">, defm reproduce: Eq<"reproduce", "Write a tar file containing input files and command line options to reproduce link">; +defm rosegment: BB<"rosegment", + "Put read-only non-executable sections in their own segment (default)", + "Do not put read-only non-executable sections in their own segment">; + defm rpath: Eq<"rpath", "Add a DT_RUNPATH to the output">; def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; @@ -350,9 +386,16 @@ defm target2: Eq<"target2", "Interpret R_ARM_TARGET2 as <type>, where <type> is one of rel, abs, or got-rel">, MetaVarName<"<type>">; -defm threads: B<"threads", - "Run the linker multi-threaded (default)", - "Do not run the linker multi-threaded">; +defm threads + : Eq<"threads", + "Number of threads. '1' disables multi-threading. By default all " + "available hardware threads are used">; + +def time_trace: F<"time-trace">, HelpText<"Record time trace">; +def time_trace_file_eq: J<"time-trace-file=">, HelpText<"Specify time trace output file">; + +defm time_trace_granularity: Eq<"time-trace-granularity", + "Minimum time granularity (in microseconds) traced by time profiler">; defm toc_optimize : B<"toc-optimize", "(PowerPC64) Enable TOC related optimizations (default)", @@ -368,6 +411,8 @@ defm undefined: Eq<"undefined", "Force undefined symbol during linking">, defm undefined_glob: Eq<"undefined-glob", "Force undefined symbol during linking">, MetaVarName<"<pattern>">; +def unique: F<"unique">, HelpText<"Creates a separate output section for every orphan input section">; + defm unresolved_symbols: Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">; @@ -386,19 +431,25 @@ def version: F<"version">, HelpText<"Display the version number and exit">; defm version_script: Eq<"version-script", "Read a version script">; -defm warn_backrefs: B<"warn-backrefs", +defm warn_backrefs: BB<"warn-backrefs", "Warn about backward symbol references to fetch archive members", "Do not warn about backward symbol references to fetch archive members (default)">; +defm warn_backrefs_exclude + : EEq<"warn-backrefs-exclude", + "Glob describing an archive (or an object file within --start-lib) " + "which should be ignored for --warn-backrefs.">, + MetaVarName<"<glob>">; + defm warn_common: B<"warn-common", "Warn about duplicate common symbols", "Do not warn about duplicate common symbols (default)">; -defm warn_ifunc_textrel: B<"warn-ifunc-textrel", +defm warn_ifunc_textrel: BB<"warn-ifunc-textrel", "Warn about using ifunc symbols with text relocations", "Do not warn about using ifunc symbols with text relocations (default)">; -defm warn_symbol_ordering: B<"warn-symbol-ordering", +defm warn_symbol_ordering: BB<"warn-symbol-ordering", "Warn about problems with the symbol ordering file (default)", "Do not warn about problems with the symbol ordering file">; @@ -460,84 +511,99 @@ def: JoinedOrSeparate<["-"], "u">, Alias<undefined>, HelpText<"Alias for --undef def: Flag<["-"], "V">, Alias<version>, HelpText<"Alias for --version">; // LTO-related options. -def lto_aa_pipeline: J<"lto-aa-pipeline=">, +def lto_aa_pipeline: JJ<"lto-aa-pipeline=">, HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; -def lto_debug_pass_manager: F<"lto-debug-pass-manager">, +def lto_debug_pass_manager: FF<"lto-debug-pass-manager">, HelpText<"Debug new pass manager">; -def lto_new_pass_manager: F<"lto-new-pass-manager">, +def lto_emit_asm: FF<"lto-emit-asm">, + HelpText<"Emit assembly code">; +def lto_new_pass_manager: FF<"lto-new-pass-manager">, HelpText<"Use new pass manager">; -def lto_newpm_passes: J<"lto-newpm-passes=">, +def lto_newpm_passes: JJ<"lto-newpm-passes=">, HelpText<"Passes to run during LTO">; -def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, +def lto_O: JJ<"lto-O">, MetaVarName<"<opt-level>">, HelpText<"Optimization level for LTO">; -def lto_partitions: J<"lto-partitions=">, +def lto_partitions: JJ<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; -def lto_cs_profile_generate: F<"lto-cs-profile-generate">, +def lto_cs_profile_generate: FF<"lto-cs-profile-generate">, HelpText<"Perform context sensitive PGO instrumentation">; -def lto_cs_profile_file: J<"lto-cs-profile-file=">, +def lto_cs_profile_file: JJ<"lto-cs-profile-file=">, HelpText<"Context sensitive profile file path">; -def lto_obj_path_eq: J<"lto-obj-path=">; -def lto_sample_profile: J<"lto-sample-profile=">, +def lto_obj_path_eq: JJ<"lto-obj-path=">; +def lto_sample_profile: JJ<"lto-sample-profile=">, HelpText<"Sample profile file path">; +def lto_whole_program_visibility: FF<"lto-whole-program-visibility">, + HelpText<"Asserts that the LTO link has whole program visibility">; def disable_verify: F<"disable-verify">; defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">; def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">, HelpText<"YAML output file for optimization remarks">; def opt_remarks_passes: Separate<["--"], "opt-remarks-passes">, HelpText<"Regex for the passes that need to be serialized to the output file">; -def opt_remarks_with_hotness: Flag<["--"], "opt-remarks-with-hotness">, +def opt_remarks_with_hotness: FF<"opt-remarks-with-hotness">, HelpText<"Include hotness information in the optimization remarks file">; def opt_remarks_format: Separate<["--"], "opt-remarks-format">, HelpText<"The format used for serializing remarks (default: YAML)">; -defm plugin_opt: Eq<"plugin-opt", "specifies LTO options for compatibility with GNU linkers">; def save_temps: F<"save-temps">; -def thinlto_cache_dir: J<"thinlto-cache-dir=">, +def lto_basicblock_sections: JJ<"lto-basicblock-sections=">, + HelpText<"Enable basic block sections for LTO">; +defm lto_unique_bb_section_names: BB<"lto-unique-bb-section-names", + "Give unique names to every basic block section for LTO", + "Do not give unique names to every basic block section for LTO (default)">; +def shuffle_sections: JJ<"shuffle-sections=">, MetaVarName<"<seed>">, + HelpText<"Shuffle input sections using the given seed. If 0, use a random seed">; +def thinlto_cache_dir: JJ<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; -defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; -def thinlto_emit_imports_files: F<"thinlto-emit-imports-files">; -def thinlto_index_only: F<"thinlto-index-only">; -def thinlto_index_only_eq: J<"thinlto-index-only=">; -def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; -def thinlto_object_suffix_replace_eq: J<"thinlto-object-suffix-replace=">; -def thinlto_prefix_replace_eq: J<"thinlto-prefix-replace=">; - -def: J<"plugin-opt=O">, Alias<lto_O>, HelpText<"Alias for -lto-O">; +defm thinlto_cache_policy: EEq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; +def thinlto_emit_imports_files: FF<"thinlto-emit-imports-files">; +def thinlto_index_only: FF<"thinlto-index-only">; +def thinlto_index_only_eq: JJ<"thinlto-index-only=">; +def thinlto_jobs: JJ<"thinlto-jobs=">, + HelpText<"Number of ThinLTO jobs. Default to --threads=">; +def thinlto_object_suffix_replace_eq: JJ<"thinlto-object-suffix-replace=">; +def thinlto_prefix_replace_eq: JJ<"thinlto-prefix-replace=">; +def thinlto_single_module_eq: JJ<"thinlto-single-module=">, + HelpText<"Specific a single module to compile in ThinLTO mode, for debugging only">; + +def: J<"plugin-opt=O">, Alias<lto_O>, HelpText<"Alias for --lto-O">; def: F<"plugin-opt=debug-pass-manager">, - Alias<lto_debug_pass_manager>, HelpText<"Alias for -lto-debug-pass-manager">; -def: F<"plugin-opt=disable-verify">, Alias<disable_verify>, HelpText<"Alias for -disable-verify">; + Alias<lto_debug_pass_manager>, HelpText<"Alias for --lto-debug-pass-manager">; +def: F<"plugin-opt=disable-verify">, Alias<disable_verify>, HelpText<"Alias for --disable-verify">; def plugin_opt_dwo_dir_eq: J<"plugin-opt=dwo_dir=">, HelpText<"Directory to store .dwo files when LTO and debug fission are used">; +def plugin_opt_emit_asm: F<"plugin-opt=emit-asm">, + Alias<lto_emit_asm>, HelpText<"Alias for --lto-emit-asm">; def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">; -def: J<"plugin-opt=jobs=">, Alias<thinlto_jobs>, HelpText<"Alias for -thinlto-jobs">; -def: J<"plugin-opt=lto-partitions=">, Alias<lto_partitions>, HelpText<"Alias for -lto-partitions">; +def: J<"plugin-opt=jobs=">, Alias<thinlto_jobs>, HelpText<"Alias for --thinlto-jobs">; +def: J<"plugin-opt=lto-partitions=">, Alias<lto_partitions>, HelpText<"Alias for --lto-partitions">; def plugin_opt_mcpu_eq: J<"plugin-opt=mcpu=">; def: F<"plugin-opt=new-pass-manager">, - Alias<lto_new_pass_manager>, HelpText<"Alias for -lto-new-pass-manager">; + Alias<lto_new_pass_manager>, HelpText<"Alias for --lto-new-pass-manager">; def: F<"plugin-opt=cs-profile-generate">, - Alias<lto_cs_profile_generate>, HelpText<"Alias for -lto-cs-profile-generate">; + Alias<lto_cs_profile_generate>, HelpText<"Alias for --lto-cs-profile-generate">; def: J<"plugin-opt=cs-profile-path=">, - Alias<lto_cs_profile_file>, HelpText<"Alias for -lto-cs-profile-file">; + Alias<lto_cs_profile_file>, HelpText<"Alias for --lto-cs-profile-file">; def: J<"plugin-opt=obj-path=">, Alias<lto_obj_path_eq>, - HelpText<"Alias for -lto-obj-path=">; + HelpText<"Alias for --lto-obj-path=">; def: J<"plugin-opt=sample-profile=">, - Alias<lto_sample_profile>, HelpText<"Alias for -lto-sample-profile">; -def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for -save-temps">; + Alias<lto_sample_profile>, HelpText<"Alias for --lto-sample-profile">; +def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for --save-temps">; def: F<"plugin-opt=thinlto-emit-imports-files">, Alias<thinlto_emit_imports_files>, - HelpText<"Alias for -thinlto-emit-imports-files">; + HelpText<"Alias for --thinlto-emit-imports-files">; def: F<"plugin-opt=thinlto-index-only">, Alias<thinlto_index_only>, - HelpText<"Alias for -thinlto-index-only">; + HelpText<"Alias for --thinlto-index-only">; def: J<"plugin-opt=thinlto-index-only=">, Alias<thinlto_index_only_eq>, - HelpText<"Alias for -thinlto-index-only=">; + HelpText<"Alias for --thinlto-index-only=">; def: J<"plugin-opt=thinlto-object-suffix-replace=">, Alias<thinlto_object_suffix_replace_eq>, - HelpText<"Alias for -thinlto-object-suffix-replace=">; + HelpText<"Alias for --thinlto-object-suffix-replace=">; def: J<"plugin-opt=thinlto-prefix-replace=">, Alias<thinlto_prefix_replace_eq>, - HelpText<"Alias for -thinlto-prefix-replace=">; + HelpText<"Alias for --thinlto-prefix-replace=">; // Ignore LTO plugin-related options. // clang -flto passes -plugin and -plugin-opt to the linker. This is required @@ -548,10 +614,17 @@ def: J<"plugin-opt=thinlto-prefix-replace=">, // --version output. defm plugin: Eq<"plugin", "Ignored for compatibility with GNU linkers">; -def plugin_opt_fresolution_eq: J<"plugin-opt=-fresolution=">; -def plugin_opt_pass_through_eq: J<"plugin-opt=-pass-through=">; -def plugin_opt_thinlto: J<"plugin-opt=thinlto">; -def plugin_opt_slash: J<"plugin-opt=/">; +def plugin_opt_eq_minus: J<"plugin-opt=-">, + HelpText<"Specify an LLVM option for compatibility with LLVMgold.so">; +def: J<"plugin-opt=thinlto">; + +// Ignore GCC collect2 LTO plugin related options. Note that we don't support +// GCC LTO, but GCC collect2 passes these options even in non-LTO mode. +def: J<"plugin-opt=-fresolution=">; +def: J<"plugin-opt=-pass-through=">; +// This may be either an unhandled LLVMgold.so feature or GCC passed +// -plugin-opt=path/to/{liblto_plugin.so,lto-wrapper} +def plugin_opt_eq : J<"plugin-opt=">; // Options listed below are silently ignored for now for compatibility. def: F<"detect-odr-violations">; @@ -562,6 +635,7 @@ def: F<"no-copy-dt-needed-entries">; def: F<"no-ctors-in-init-array">; def: F<"no-keep-memory">; def: F<"no-pipeline-knowledge">; +def: F<"no-relax">; def: F<"no-warn-mismatch">; def: Flag<["-"], "p">; def: Separate<["--", "-"], "rpath-link">; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 6142cb0783ce..7e9e76b070ec 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -14,11 +14,11 @@ #include "Target.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Compression.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/SHA1.h" #include <regex> @@ -27,9 +27,9 @@ using namespace llvm::dwarf; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { uint8_t *Out::bufferStart; uint8_t Out::first; PhdrEntry *Out::tlsPhdr; @@ -39,7 +39,7 @@ OutputSection *Out::preinitArray; OutputSection *Out::initArray; OutputSection *Out::finiArray; -std::vector<OutputSection *> outputSections; +std::vector<OutputSection *> elf::outputSections; uint32_t OutputSection::getPhdrFlags() const { uint32_t ret = 0; @@ -114,8 +114,7 @@ void OutputSection::commitSection(InputSection *isec) { flags = isec->flags; } else { // Otherwise, check if new type or flags are compatible with existing ones. - unsigned mask = SHF_TLS | SHF_LINK_ORDER; - if ((flags & mask) != (isec->flags & mask)) + if ((flags ^ isec->flags) & SHF_TLS) error("incompatible section flags for " + name + "\n>>> " + toString(isec) + ": 0x" + utohexstr(isec->flags) + "\n>>> output section " + name + ": 0x" + utohexstr(flags)); @@ -226,7 +225,7 @@ static void sortByOrder(MutableArrayRef<InputSection *> in, in[i] = v[i].second; } -uint64_t getHeaderSize() { +uint64_t elf::getHeaderSize() { if (config->oFormatBinary) return 0; return Out::elfHeader->size + Out::programHeaders->size; @@ -243,6 +242,25 @@ void OutputSection::sort(llvm::function_ref<int(InputSectionBase *s)> order) { sortByOrder(isd->sections, order); } +static void nopInstrFill(uint8_t *buf, size_t size) { + if (size == 0) + return; + unsigned i = 0; + if (size == 0) + return; + std::vector<std::vector<uint8_t>> nopFiller = *target->nopInstrs; + unsigned num = size / nopFiller.back().size(); + for (unsigned c = 0; c < num; ++c) { + memcpy(buf + i, nopFiller.back().data(), nopFiller.back().size()); + i += nopFiller.back().size(); + } + unsigned remaining = size - i; + if (!remaining) + return; + assert(nopFiller[remaining - 1].size() == remaining); + memcpy(buf + i, nopFiller[remaining - 1].data(), remaining); +} + // Fill [Buf, Buf + Size) with Filler. // This is used for linker script "=fillexp" command. static void fill(uint8_t *buf, size_t size, @@ -331,7 +349,11 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { end = buf + size; else end = buf + sections[i + 1]->outSecOff; - fill(start, end - start, filler); + if (isec->nopFiller) { + assert(target->nopInstrs); + nopInstrFill(start, end - start); + } else + fill(start, end - start, filler); } }); @@ -357,8 +379,7 @@ static void finalizeShtGroup(OutputSection *os, } void OutputSection::finalize() { - std::vector<InputSection *> v = getInputSections(this); - InputSection *first = v.empty() ? nullptr : v[0]; + InputSection *first = getFirstInputSection(this); if (flags & SHF_LINK_ORDER) { // We must preserve the link order dependency of sections with the @@ -367,8 +388,9 @@ void OutputSection::finalize() { // all InputSections in the OutputSection have the same dependency. if (auto *ex = dyn_cast<ARMExidxSyntheticSection>(first)) link = ex->getLinkOrderDep()->getParent()->sectionIndex; - else if (auto *d = first->getLinkOrderDep()) - link = d->getParent()->sectionIndex; + else if (first->flags & SHF_LINK_ORDER) + if (auto *d = first->getLinkOrderDep()) + link = d->getParent()->sectionIndex; } if (type == SHT_GROUP) { @@ -456,7 +478,7 @@ void OutputSection::sortCtorsDtors() { // If an input string is in the form of "foo.N" where N is a number, // return N. Otherwise, returns 65536, which is one greater than the // lowest priority. -int getPriority(StringRef s) { +int elf::getPriority(StringRef s) { size_t pos = s.rfind('.'); if (pos == StringRef::npos) return 65536; @@ -466,7 +488,15 @@ int getPriority(StringRef s) { return v; } -std::vector<InputSection *> getInputSections(OutputSection *os) { +InputSection *elf::getFirstInputSection(const OutputSection *os) { + for (BaseCommand *base : os->sectionCommands) + if (auto *isd = dyn_cast<InputSectionDescription>(base)) + if (!isd->sections.empty()) + return isd->sections[0]; + return nullptr; +} + +std::vector<InputSection *> elf::getInputSections(const OutputSection *os) { std::vector<InputSection *> ret; for (BaseCommand *base : os->sectionCommands) if (auto *isd = dyn_cast<InputSectionDescription>(base)) @@ -507,6 +537,3 @@ template void OutputSection::maybeCompress<ELF32LE>(); template void OutputSection::maybeCompress<ELF32BE>(); template void OutputSection::maybeCompress<ELF64LE>(); template void OutputSection::maybeCompress<ELF64BE>(); - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index a24294eedf35..d5686f11ec8e 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -118,7 +118,8 @@ private: int getPriority(StringRef s); -std::vector<InputSection *> getInputSections(OutputSection* os); +InputSection *getFirstInputSection(const OutputSection *os); +std::vector<InputSection *> getInputSections(const OutputSection *os); // All output sections that are handled by the linker specially are // globally accessible. Writer initializes them, so don't use them diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 4731554e0c0d..751ded397768 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -62,9 +62,9 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { static Optional<std::string> getLinkerScriptLocation(const Symbol &sym) { for (BaseCommand *base : script->sectionCommands) if (auto *cmd = dyn_cast<SymbolAssignment>(base)) @@ -73,6 +73,15 @@ static Optional<std::string> getLinkerScriptLocation(const Symbol &sym) { return None; } +static std::string getDefinedLocation(const Symbol &sym) { + std::string msg = "\n>>> defined in "; + if (sym.file) + msg += toString(sym.file); + else if (Optional<std::string> loc = getLinkerScriptLocation(sym)) + msg += *loc; + return msg; +} + // Construct a message in the following format. // // >>> defined in /home/alice/src/foo.o @@ -80,19 +89,30 @@ static Optional<std::string> getLinkerScriptLocation(const Symbol &sym) { // >>> /home/alice/src/bar.o:(.text+0x1) static std::string getLocation(InputSectionBase &s, const Symbol &sym, uint64_t off) { - std::string msg = "\n>>> defined in "; - if (sym.file) - msg += toString(sym.file); - else if (Optional<std::string> loc = getLinkerScriptLocation(sym)) - msg += *loc; - - msg += "\n>>> referenced by "; + std::string msg = getDefinedLocation(sym) + "\n>>> referenced by "; std::string src = s.getSrcMsg(sym, off); if (!src.empty()) msg += src + "\n>>> "; return msg + s.getObjMsg(off); } +void elf::reportRangeError(uint8_t *loc, const Relocation &rel, const Twine &v, + int64_t min, uint64_t max) { + ErrorPlace errPlace = getErrorPlace(loc); + std::string hint; + if (rel.sym && !rel.sym->isLocal()) + hint = "; references " + lld::toString(*rel.sym) + + getDefinedLocation(*rel.sym); + + if (errPlace.isec && errPlace.isec->name.startswith(".debug")) + hint += "; consider recompiling with -fdebug-types-section to reduce size " + "of debug sections"; + + errorOrWarn(errPlace.loc + "relocation " + lld::toString(rel.type) + + " out of range: " + v.str() + " is not in [" + Twine(min).str() + + ", " + Twine(max).str() + "]" + hint); +} + namespace { // Build a bitmask with one bit set for each RelExpr. // @@ -177,9 +197,9 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, return 1; } - bool canRelax = config->emachine != EM_ARM && - config->emachine != EM_HEXAGON && - config->emachine != EM_RISCV; + bool toExecRelax = !config->shared && config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && + config->emachine != EM_RISCV; // If we are producing an executable and the symbol is non-preemptable, it // must be defined and the code sequence can be relaxed to use Local-Exec. @@ -197,7 +217,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>( expr)) { // Local-Dynamic relocs can be relaxed to Local-Exec. - if (canRelax && !config->shared) { + if (toExecRelax) { c.relocations.push_back( {target->adjustRelaxExpr(type, nullptr, R_RELAX_TLS_LD_TO_LE), type, offset, addend, &sym}); @@ -218,7 +238,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, } // Local-Dynamic relocs can be relaxed to Local-Exec. - if (expr == R_DTPREL && !config->shared) { + if (expr == R_DTPREL && toExecRelax) { c.relocations.push_back( {target->adjustRelaxExpr(type, nullptr, R_RELAX_TLS_LD_TO_LE), type, offset, addend, &sym}); @@ -240,7 +260,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC>(expr)) { - if (!canRelax || config->shared) { + if (!toExecRelax) { if (in.got->addDynTlsEntry(sym)) { uint64_t off = in.got->getGlobalDynOffset(sym); @@ -288,7 +308,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // defined. if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr) && - canRelax && isLocalInExecutable) { + toExecRelax && isLocalInExecutable) { c.relocations.push_back({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); return 1; } @@ -408,6 +428,14 @@ static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, assert(absVal && relE); + // Allow R_PLT_PC (optimized to R_PC here) to a hidden undefined weak symbol + // in PIC mode. This is a little strange, but it allows us to link function + // calls to such symbols (e.g. glibc/stdlib/exit.c:__run_exit_handlers). + // Normally such a call will be guarded with a comparison, which will load a + // zero from the GOT. + if (sym.isUndefWeak()) + return true; + // We set the final symbols values for linker script defined symbols later. // They always can be computed as a link time constant. if (sym.scriptDefined) @@ -753,7 +781,7 @@ static const Symbol *getAlternativeSpelling(const Undefined &sym, break; // Substitute name[i]. - newName = name; + newName = std::string(name); for (char c = '0'; c <= 'z'; ++c) { newName[i] = c; if (const Symbol *s = suggest(newName)) @@ -842,7 +870,7 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, if (msg.empty()) msg = "undefined " + visibility() + "symbol: " + toString(sym); - const size_t maxUndefReferences = 10; + const size_t maxUndefReferences = 3; size_t i = 0; for (UndefinedDiag::Loc l : undef.locs) { if (i >= maxUndefReferences) @@ -873,8 +901,9 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, } if (sym.getName().startswith("_ZTV")) - msg += "\nthe vtable symbol may be undefined because the class is missing " - "its key function (see https://lld.llvm.org/missingkeyfunction)"; + msg += + "\n>>> the vtable symbol may be undefined because the class is missing " + "its key function (see https://lld.llvm.org/missingkeyfunction)"; if (undef.isWarning) warn(msg); @@ -882,7 +911,7 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, error(msg); } -template <class ELFT> void reportUndefinedSymbols() { +template <class ELFT> void elf::reportUndefinedSymbols() { // Find the first "undefined symbol" diagnostic for each diagnostic, and // collect all "referenced from" lines at the first diagnostic. DenseMap<Symbol *, UndefinedDiag *> firstRef; @@ -918,8 +947,12 @@ static bool maybeReportUndefined(Symbol &sym, InputSectionBase &sec, // .toc and the .rela.toc are incorrectly not placed in the comdat. The ELF // spec says references from outside the group to a STB_LOCAL symbol are not // allowed. Work around the bug. - if (config->emachine == EM_PPC64 && - cast<Undefined>(sym).discardedSecIdx != 0 && sec.name == ".toc") + // + // PPC32 .got2 is similar but cannot be fixed. Multiple .got2 is infeasible + // because .LC0-.LTOC is not representable if the two labels are in different + // .got2 + if (cast<Undefined>(sym).discardedSecIdx != 0 && + (sec.name == ".got2" || sec.name == ".toc")) return false; bool isWarning = @@ -1190,10 +1223,17 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, getLocation(sec, sym, offset)); if (!sym.isInPlt()) addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); - if (!sym.isDefined()) + if (!sym.isDefined()) { replaceWithDefined( sym, in.plt, target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0); + if (config->emachine == EM_PPC) { + // PPC32 canonical PLT entries are at the beginning of .glink + cast<Defined>(sym).value = in.plt->headerSize; + in.plt->headerSize += 16; + cast<PPC32GlinkSection>(in.plt)->canonical_plts.push_back(&sym); + } + } sym.needsPltAddr = true; sec.relocations.push_back({expr, type, offset, addend, &sym}); return; @@ -1254,17 +1294,6 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (expr == R_NONE) return; - // We can separate the small code model relocations into 2 categories: - // 1) Those that access the compiler generated .toc sections. - // 2) Those that access the linker allocated got entries. - // lld allocates got entries to symbols on demand. Since we don't try to sort - // the got entries in any way, we don't have to track which objects have - // got-based small code model relocs. The .toc sections get placed after the - // end of the linker allocated .got section and we do sort those so sections - // addressed with small code model relocations come first. - if (config->emachine == EM_PPC64 && isPPC64SmallCodeModelTocReloc(type)) - sec.file->ppc64SmallCodeModelTocRelocs = true; - if (sym.isGnuIFunc() && !config->zText && config->warnIfuncTextrel) { warn("using ifunc symbols when text relocations are allowed may produce " "a binary that will segfault, if the object file is linked with " @@ -1278,6 +1307,25 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // Read an addend. int64_t addend = computeAddend<ELFT>(rel, end, sec, expr, sym.isLocal()); + if (config->emachine == EM_PPC64) { + // We can separate the small code model relocations into 2 categories: + // 1) Those that access the compiler generated .toc sections. + // 2) Those that access the linker allocated got entries. + // lld allocates got entries to symbols on demand. Since we don't try to + // sort the got entries in any way, we don't have to track which objects + // have got-based small code model relocs. The .toc sections get placed + // after the end of the linker allocated .got section and we do sort those + // so sections addressed with small code model relocations come first. + if (isPPC64SmallCodeModelTocReloc(type)) + sec.file->ppc64SmallCodeModelTocRelocs = true; + + // Record the TOC entry (.toc + addend) as not relaxable. See the comment in + // InputSectionBase::relocateAlloc(). + if (type == R_PPC64_TOC16_LO && sym.isSection() && isa<Defined>(sym) && + cast<Defined>(sym).section->name == ".toc") + ppc64noTocRelax.insert({&sym, addend}); + } + // Relax relocations. // // If we know that a PLT entry will be resolved within the same ELF module, we @@ -1290,10 +1338,16 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (expr == R_GOT_PC && !isAbsoluteValue(sym)) { expr = target->adjustRelaxExpr(type, relocatedAddr, expr); } else { - // Addend of R_PPC_PLTREL24 is used to choose call stub type. It should be - // ignored if optimized to R_PC. + // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call + // stub type. It should be ignored if optimized to R_PC. if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL) - addend = 0; + addend &= ~0x8000; + // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into + // call __tls_get_addr even if the symbol is non-preemptible. + if (!(config->emachine == EM_HEXAGON && + (type == R_HEX_GD_PLT_B22_PCREL || + type == R_HEX_GD_PLT_B22_PCREL_X || + type == R_HEX_GD_PLT_B32_PCREL_X))) expr = fromPlt(expr); } } @@ -1460,7 +1514,7 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef<RelTy> rels) { }); } -template <class ELFT> void scanRelocations(InputSectionBase &s) { +template <class ELFT> void elf::scanRelocations(InputSectionBase &s) { if (s.areRelocsRela) scanRelocs<ELFT>(s, s.relas<ELFT>()); else @@ -1744,6 +1798,37 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os, uint64_t off) { auto *ts = make<ThunkSection>(os, off); ts->partition = os->partition; + if ((config->fixCortexA53Errata843419 || config->fixCortexA8) && + !isd->sections.empty()) { + // The errata fixes are sensitive to addresses modulo 4 KiB. When we add + // thunks we disturb the base addresses of sections placed after the thunks + // this makes patches we have generated redundant, and may cause us to + // generate more patches as different instructions are now in sensitive + // locations. When we generate more patches we may force more branches to + // go out of range, causing more thunks to be generated. In pathological + // cases this can cause the address dependent content pass not to converge. + // We fix this by rounding up the size of the ThunkSection to 4KiB, this + // limits the insertion of a ThunkSection on the addresses modulo 4 KiB, + // which means that adding Thunks to the section does not invalidate + // errata patches for following code. + // Rounding up the size to 4KiB has consequences for code-size and can + // trip up linker script defined assertions. For example the linux kernel + // has an assertion that what LLD represents as an InputSectionDescription + // does not exceed 4 KiB even if the overall OutputSection is > 128 Mib. + // We use the heuristic of rounding up the size when both of the following + // conditions are true: + // 1.) The OutputSection is larger than the ThunkSectionSpacing. This + // accounts for the case where no single InputSectionDescription is + // larger than the OutputSection size. This is conservative but simple. + // 2.) The InputSectionDescription is larger than 4 KiB. This will prevent + // any assertion failures that an InputSectionDescription is < 4 KiB + // in size. + uint64_t isdSize = isd->sections.back()->outSecOff + + isd->sections.back()->getSize() - + isd->sections.front()->outSecOff; + if (os->size > target->getThunkSectionSpacing() && isdSize > 4096) + ts->roundUpSizeForErrata = true; + } isd->thunkSections.push_back({ts, pass}); return ts; } @@ -1812,9 +1897,7 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; - // TODO Restore addend on all targets. - if (config->emachine == EM_AARCH64 || config->emachine == EM_PPC64) - rel.addend = t->addend; + rel.addend = t->addend; if (rel.sym->isInPlt()) rel.expr = toPlt(rel.expr); } @@ -1892,16 +1975,11 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { rel.sym = t->getThunkTargetSym(); rel.expr = fromPlt(rel.expr); - // On AArch64 and PPC64, a jump/call relocation may be encoded as + // On AArch64 and PPC, a jump/call relocation may be encoded as // STT_SECTION + non-zero addend, clear the addend after // redirection. - // - // The addend of R_PPC_PLTREL24 should be ignored after changing to - // R_PC. - if (config->emachine == EM_AARCH64 || - config->emachine == EM_PPC64 || - (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24)) - rel.addend = 0; + if (config->emachine != EM_MIPS) + rel.addend = -getPCBias(rel.type); } for (auto &p : isd->thunkSections) @@ -1917,14 +1995,49 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { return addressesChanged; } -template void scanRelocations<ELF32LE>(InputSectionBase &); -template void scanRelocations<ELF32BE>(InputSectionBase &); -template void scanRelocations<ELF64LE>(InputSectionBase &); -template void scanRelocations<ELF64BE>(InputSectionBase &); -template void reportUndefinedSymbols<ELF32LE>(); -template void reportUndefinedSymbols<ELF32BE>(); -template void reportUndefinedSymbols<ELF64LE>(); -template void reportUndefinedSymbols<ELF64BE>(); - -} // namespace elf -} // namespace lld +// The following aid in the conversion of call x@GDPLT to call __tls_get_addr +// hexagonNeedsTLSSymbol scans for relocations would require a call to +// __tls_get_addr. +// hexagonTLSSymbolUpdate rebinds the relocation to __tls_get_addr. +bool elf::hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections) { + bool needTlsSymbol = false; + forEachInputSectionDescription( + outputSections, [&](OutputSection *os, InputSectionDescription *isd) { + for (InputSection *isec : isd->sections) + for (Relocation &rel : isec->relocations) + if (rel.sym->type == llvm::ELF::STT_TLS && rel.expr == R_PLT_PC) { + needTlsSymbol = true; + return; + } + }); + return needTlsSymbol; +} + +void elf::hexagonTLSSymbolUpdate(ArrayRef<OutputSection *> outputSections) { + Symbol *sym = symtab->find("__tls_get_addr"); + if (!sym) + return; + bool needEntry = true; + forEachInputSectionDescription( + outputSections, [&](OutputSection *os, InputSectionDescription *isd) { + for (InputSection *isec : isd->sections) + for (Relocation &rel : isec->relocations) + if (rel.sym->type == llvm::ELF::STT_TLS && rel.expr == R_PLT_PC) { + if (needEntry) { + addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, + *sym); + needEntry = false; + } + rel.sym = sym; + } + }); +} + +template void elf::scanRelocations<ELF32LE>(InputSectionBase &); +template void elf::scanRelocations<ELF32BE>(InputSectionBase &); +template void elf::scanRelocations<ELF64LE>(InputSectionBase &); +template void elf::scanRelocations<ELF64BE>(InputSectionBase &); +template void elf::reportUndefinedSymbols<ELF32LE>(); +template void elf::reportUndefinedSymbols<ELF32BE>(); +template void elf::reportUndefinedSymbols<ELF64LE>(); +template void elf::reportUndefinedSymbols<ELF64BE>(); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index bfec1e628851..ec59c63410d0 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -24,6 +24,7 @@ class SectionBase; // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL. using RelType = uint32_t; +using JumpModType = uint32_t; // List of target-independent relocation types. Relocations read // from files are converted to these types so that the main code @@ -80,6 +81,7 @@ enum RelExpr { R_AARCH64_PAGE_PC, R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, R_AARCH64_TLSDESC_PAGE, + R_ARM_PCA, R_ARM_SBREL, R_MIPS_GOTREL, R_MIPS_GOT_GP, @@ -107,6 +109,15 @@ struct Relocation { Symbol *sym; }; +// Manipulate jump instructions with these modifiers. These are used to relax +// jump instruction opcodes at basic block boundaries and are particularly +// useful when basic block sections are enabled. +struct JumpInstrMod { + JumpModType original; + uint64_t offset; + unsigned size; +}; + // This function writes undefined symbol diagnostics to an internal buffer. // Call reportUndefinedSymbols() after calling scanRelocations() to emit // the diagnostics. @@ -114,6 +125,9 @@ template <class ELFT> void scanRelocations(InputSectionBase &); template <class ELFT> void reportUndefinedSymbols(); +void hexagonTLSSymbolUpdate(ArrayRef<OutputSection *> outputSections); +bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections); + class ThunkSection; class Thunk; struct InputSectionDescription; diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp index e0ff56fec3f3..9ac8447eef0e 100644 --- a/lld/ELF/ScriptLexer.cpp +++ b/lld/ELF/ScriptLexer.cpp @@ -36,9 +36,9 @@ #include "llvm/ADT/Twine.h" using namespace llvm; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { // Returns a whole line containing the current token. StringRef ScriptLexer::getLine() { StringRef s = getCurrentMB().getBuffer(); @@ -52,6 +52,8 @@ StringRef ScriptLexer::getLine() { // Returns 1-based line number of the current token. size_t ScriptLexer::getLineNumber() { + if (pos == 0) + return 1; StringRef s = getCurrentMB().getBuffer(); StringRef tok = tokens[pos - 1]; return s.substr(0, tok.data() - s.data()).count('\n') + 1; @@ -64,7 +66,7 @@ size_t ScriptLexer::getColumnNumber() { } std::string ScriptLexer::getCurrentLocation() { - std::string filename = getCurrentMB().getBufferIdentifier(); + std::string filename = std::string(getCurrentMB().getBufferIdentifier()); return (filename + ":" + Twine(getLineNumber())).str(); } @@ -187,7 +189,7 @@ static std::vector<StringRef> tokenizeExpr(StringRef s) { break; } - // Get a token before the opreator. + // Get a token before the operator. if (e != 0) ret.push_back(s.substr(0, e)); @@ -292,12 +294,11 @@ static bool encloses(StringRef s, StringRef t) { MemoryBufferRef ScriptLexer::getCurrentMB() { // Find input buffer containing the current token. - assert(!mbs.empty() && pos > 0); + assert(!mbs.empty()); + if (pos == 0) + return mbs.back(); for (MemoryBufferRef mb : mbs) if (encloses(mb.getBuffer(), tokens[pos - 1])) return mb; llvm_unreachable("getCurrentMB: failed to find a token"); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h index 98e4cac95a73..306d428e98fe 100644 --- a/lld/ELF/ScriptLexer.h +++ b/lld/ELF/ScriptLexer.h @@ -40,13 +40,14 @@ public: bool inExpr = false; size_t pos = 0; +protected: + MemoryBufferRef getCurrentMB(); + private: void maybeSplitExpr(); StringRef getLine(); size_t getLineNumber(); size_t getColumnNumber(); - - MemoryBufferRef getCurrentMB(); }; } // namespace elf diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index fd8de3b54bd7..fea6b7a274e7 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ScopedPrinter.h" #include <cassert> #include <limits> #include <vector> @@ -37,9 +38,9 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { namespace { class ScriptParser final : ScriptLexer { public: @@ -91,10 +92,13 @@ private: OutputSection *readOutputSectionDescription(StringRef outSec); std::vector<BaseCommand *> readOverlay(); std::vector<StringRef> readOutputSectionPhdrs(); + std::pair<uint64_t, uint64_t> readInputSectionFlags(); InputSectionDescription *readInputSectionDescription(StringRef tok); StringMatcher readFilePatterns(); std::vector<SectionPattern> readInputSectionsList(); - InputSectionDescription *readInputSectionRules(StringRef filePattern); + InputSectionDescription *readInputSectionRules(StringRef filePattern, + uint64_t withFlags, + uint64_t withoutFlags); unsigned readPhdrType(); SortSectionPolicy readSortKind(); SymbolAssignment *readProvideHidden(bool provide, bool hidden); @@ -104,7 +108,7 @@ private: Expr readConstant(); Expr getPageSize(); - uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); + Expr readMemoryAssignment(StringRef, StringRef, StringRef); std::pair<uint32_t, uint32_t> readMemoryAttributes(); Expr combine(StringRef op, Expr l, Expr r); @@ -171,7 +175,6 @@ static ExprValue bitOr(ExprValue a, ExprValue b) { } void ScriptParser::readDynamicList() { - config->hasDynamicList = true; expect("{"); std::vector<SymbolVersion> locals; std::vector<SymbolVersion> globals; @@ -286,22 +289,40 @@ void ScriptParser::addFile(StringRef s) { } if (s.startswith("/")) { + // Case 1: s is an absolute path. Just open it. driver->addFile(s, /*withLOption=*/false); } else if (s.startswith("=")) { + // Case 2: relative to the sysroot. if (config->sysroot.empty()) driver->addFile(s.substr(1), /*withLOption=*/false); else driver->addFile(saver.save(config->sysroot + "/" + s.substr(1)), /*withLOption=*/false); } else if (s.startswith("-l")) { + // Case 3: search in the list of library paths. driver->addLibrary(s.substr(2)); - } else if (sys::fs::exists(s)) { - driver->addFile(s, /*withLOption=*/false); } else { - if (Optional<std::string> path = findFromSearchPaths(s)) - driver->addFile(saver.save(*path), /*withLOption=*/true); - else - setError("unable to find " + s); + // Case 4: s is a relative path. Search in the directory of the script file. + std::string filename = std::string(getCurrentMB().getBufferIdentifier()); + StringRef directory = sys::path::parent_path(filename); + if (!directory.empty()) { + SmallString<0> path(directory); + sys::path::append(path, s); + if (sys::fs::exists(path)) { + driver->addFile(path, /*withLOption=*/false); + return; + } + } + // Then search in the current working directory. + if (sys::fs::exists(s)) { + driver->addFile(s, /*withLOption=*/false); + } else { + // Finally, search in the list of library paths. + if (Optional<std::string> path = findFromSearchPaths(s)) + driver->addFile(saver.save(*path), /*withLOption=*/true); + else + setError("unable to find " + s); + } } } @@ -400,6 +421,7 @@ static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) { .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS}) .Case("elf32-littleriscv", {ELF32LEKind, EM_RISCV}) .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV}) + .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9}) .Default({ELFNoneKind, EM_NONE}); } @@ -408,14 +430,14 @@ static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) { void ScriptParser::readOutputFormat() { expect("("); - StringRef name = unquote(next()); - StringRef s = name; + config->bfdname = unquote(next()); + StringRef s = config->bfdname; if (s.consume_back("-freebsd")) config->osabi = ELFOSABI_FREEBSD; std::tie(config->ekind, config->emachine) = parseBfdName(s); if (config->emachine == EM_NONE) - setError("unknown output format name: " + name); + setError("unknown output format name: " + config->bfdname); if (s == "elf32-ntradlittlemips" || s == "elf32-ntradbigmips") config->mipsN32Abi = true; @@ -519,13 +541,6 @@ std::vector<BaseCommand *> ScriptParser::readOverlay() { } void ScriptParser::readSections() { - script->hasSectionsCommand = true; - - // -no-rosegment is used to avoid placing read only non-executable sections in - // their own segment. We do the same if SECTIONS command is present in linker - // script. See comment for computeFlags(). - config->singleRoRx = true; - expect("{"); std::vector<BaseCommand *> v; while (!errorCount() && !consume("}")) { @@ -544,22 +559,23 @@ void ScriptParser::readSections() { else v.push_back(readOutputSectionDescription(tok)); } + script->sectionCommands.insert(script->sectionCommands.end(), v.begin(), + v.end()); - if (!atEOF() && consume("INSERT")) { - std::vector<BaseCommand *> *dest = nullptr; - if (consume("AFTER")) - dest = &script->insertAfterCommands[next()]; - else if (consume("BEFORE")) - dest = &script->insertBeforeCommands[next()]; - else - setError("expected AFTER/BEFORE, but got '" + next() + "'"); - if (dest) - dest->insert(dest->end(), v.begin(), v.end()); + if (atEOF() || !consume("INSERT")) { + script->hasSectionsCommand = true; return; } - script->sectionCommands.insert(script->sectionCommands.end(), v.begin(), - v.end()); + bool isAfter = false; + if (consume("AFTER")) + isAfter = true; + else if (!consume("BEFORE")) + setError("expected AFTER/BEFORE, but got '" + next() + "'"); + StringRef where = next(); + for (BaseCommand *cmd : v) + if (auto *os = dyn_cast<OutputSection>(cmd)) + script->insertCommands.push_back({os, isAfter, where}); } void ScriptParser::readTarget() { @@ -593,10 +609,11 @@ static int precedence(StringRef op) { } StringMatcher ScriptParser::readFilePatterns() { - std::vector<StringRef> v; + StringMatcher Matcher; + while (!errorCount() && !consume(")")) - v.push_back(next()); - return StringMatcher(v); + Matcher.addPattern(SingleStringMatcher(next())); + return Matcher; } SortSectionPolicy ScriptParser::readSortKind() { @@ -633,12 +650,12 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() { excludeFilePat = readFilePatterns(); } - std::vector<StringRef> v; + StringMatcher SectionMatcher; while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE") - v.push_back(unquote(next())); + SectionMatcher.addPattern(unquote(next())); - if (!v.empty()) - ret.push_back({std::move(excludeFilePat), StringMatcher(v)}); + if (!SectionMatcher.empty()) + ret.push_back({std::move(excludeFilePat), std::move(SectionMatcher)}); else setError("section pattern is expected"); } @@ -657,8 +674,10 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() { // // <section-list> is parsed by readInputSectionsList(). InputSectionDescription * -ScriptParser::readInputSectionRules(StringRef filePattern) { - auto *cmd = make<InputSectionDescription>(filePattern); +ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags, + uint64_t withoutFlags) { + auto *cmd = + make<InputSectionDescription>(filePattern, withFlags, withoutFlags); expect("("); while (!errorCount() && !consume(")")) { @@ -694,15 +713,23 @@ InputSectionDescription * ScriptParser::readInputSectionDescription(StringRef tok) { // Input section wildcard can be surrounded by KEEP. // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep + uint64_t withFlags = 0; + uint64_t withoutFlags = 0; if (tok == "KEEP") { expect("("); - StringRef filePattern = next(); - InputSectionDescription *cmd = readInputSectionRules(filePattern); + if (consume("INPUT_SECTION_FLAGS")) + std::tie(withFlags, withoutFlags) = readInputSectionFlags(); + InputSectionDescription *cmd = + readInputSectionRules(next(), withFlags, withoutFlags); expect(")"); script->keptSections.push_back(cmd); return cmd; } - return readInputSectionRules(tok); + if (tok == "INPUT_SECTION_FLAGS") { + std::tie(withFlags, withoutFlags) = readInputSectionFlags(); + tok = next(); + } + return readInputSectionRules(tok, withFlags, withoutFlags); } void ScriptParser::readSort() { @@ -737,6 +764,7 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, Stri expect("("); if (consume("NOLOAD")) { cmd->noload = true; + cmd->type = SHT_NOBITS; } else { skip(); // This is "COPY", "INFO" or "OVERLAY". cmd->nonAlloc = true; @@ -781,9 +809,14 @@ OutputSection *ScriptParser::readOverlaySectionDescription() { script->createOutputSection(next(), getCurrentLocation()); cmd->inOverlay = true; expect("{"); - while (!errorCount() && !consume("}")) - cmd->sectionCommands.push_back(readInputSectionRules(next())); - cmd->phdrs = readOutputSectionPhdrs(); + while (!errorCount() && !consume("}")) { + uint64_t withFlags = 0; + uint64_t withoutFlags = 0; + if (consume("INPUT_SECTION_FLAGS")) + std::tie(withFlags, withoutFlags) = readInputSectionFlags(); + cmd->sectionCommands.push_back( + readInputSectionRules(next(), withFlags, withoutFlags)); + } return cmd; } @@ -828,9 +861,9 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) { // We handle the FILL command as an alias for =fillexp section attribute, // which is different from what GNU linkers do. // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html - expect("("); + if (peek() != "(") + setError("( expected, but got " + peek()); cmd->filler = readFill(); - expect(")"); } else if (tok == "SORT") { readSort(); } else if (tok == "INCLUDE") { @@ -841,18 +874,21 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) { // We have a file name and no input sections description. It is not a // commonly used syntax, but still acceptable. In that case, all sections // from the file will be included. + // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not + // handle this case here as it will already have been matched by the + // case above. auto *isd = make<InputSectionDescription>(tok); - isd->sectionPatterns.push_back({{}, StringMatcher({"*"})}); + isd->sectionPatterns.push_back({{}, StringMatcher("*")}); cmd->sectionCommands.push_back(isd); } } if (consume(">")) - cmd->memoryRegionName = next(); + cmd->memoryRegionName = std::string(next()); if (consume("AT")) { expect(">"); - cmd->lmaRegionName = next(); + cmd->lmaRegionName = std::string(next()); } if (cmd->lmaExpr && !cmd->lmaRegionName.empty()) @@ -882,8 +918,11 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) { // When reading a hexstring, ld.bfd handles it as a blob of arbitrary // size, while ld.gold always handles it as a 32-bit big-endian number. // We are compatible with ld.gold because it's easier to implement. +// Also, we require that expressions with operators must be wrapped into +// round brackets. We did it to resolve the ambiguity when parsing scripts like: +// SECTIONS { .foo : { ... } =120+3 /DISCARD/ : { ... } } std::array<uint8_t, 4> ScriptParser::readFill() { - uint64_t value = readExpr()().val; + uint64_t value = readPrimary()().val; if (value > UINT32_MAX) setError("filler expression result does not fit 32-bit: 0x" + Twine::utohexstr(value)); @@ -1102,6 +1141,63 @@ ByteCommand *ScriptParser::readByteCommand(StringRef tok) { return make<ByteCommand>(e, size, commandString); } +static llvm::Optional<uint64_t> parseFlag(StringRef tok) { + if (llvm::Optional<uint64_t> asInt = parseInt(tok)) + return asInt; +#define CASE_ENT(enum) #enum, ELF::enum + return StringSwitch<llvm::Optional<uint64_t>>(tok) + .Case(CASE_ENT(SHF_WRITE)) + .Case(CASE_ENT(SHF_ALLOC)) + .Case(CASE_ENT(SHF_EXECINSTR)) + .Case(CASE_ENT(SHF_MERGE)) + .Case(CASE_ENT(SHF_STRINGS)) + .Case(CASE_ENT(SHF_INFO_LINK)) + .Case(CASE_ENT(SHF_LINK_ORDER)) + .Case(CASE_ENT(SHF_OS_NONCONFORMING)) + .Case(CASE_ENT(SHF_GROUP)) + .Case(CASE_ENT(SHF_TLS)) + .Case(CASE_ENT(SHF_COMPRESSED)) + .Case(CASE_ENT(SHF_EXCLUDE)) + .Case(CASE_ENT(SHF_ARM_PURECODE)) + .Default(None); +#undef CASE_ENT +} + +// Reads the '(' <flags> ')' list of section flags in +// INPUT_SECTION_FLAGS '(' <flags> ')' in the +// following form: +// <flags> ::= <flag> +// | <flags> & flag +// <flag> ::= Recognized Flag Name, or Integer value of flag. +// If the first character of <flag> is a ! then this means without flag, +// otherwise with flag. +// Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and +// without flag SHF_WRITE. +std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() { + uint64_t withFlags = 0; + uint64_t withoutFlags = 0; + expect("("); + while (!errorCount()) { + StringRef tok = unquote(next()); + bool without = tok.consume_front("!"); + if (llvm::Optional<uint64_t> flag = parseFlag(tok)) { + if (without) + withoutFlags |= *flag; + else + withFlags |= *flag; + } else { + setError("unrecognised flag: " + tok); + } + if (consume(")")) + break; + if (!consume("&")) { + next(); + setError("expected & or )"); + } + } + return std::make_pair(withFlags, withoutFlags); +} + StringRef ScriptParser::readParenLiteral() { expect("("); bool orig = inExpr; @@ -1222,7 +1318,7 @@ Expr ScriptParser::readPrimary() { setError("memory region not defined: " + name); return [] { return 0; }; } - return [=] { return script->memoryRegions[name]->length; }; + return script->memoryRegions[name]->length; } if (tok == "LOADADDR") { StringRef name = readParenLiteral(); @@ -1249,7 +1345,7 @@ Expr ScriptParser::readPrimary() { setError("memory region not defined: " + name); return [] { return 0; }; } - return [=] { return script->memoryRegions[name]->origin; }; + return script->memoryRegions[name]->origin; } if (tok == "SEGMENT_START") { expect("("); @@ -1268,7 +1364,7 @@ Expr ScriptParser::readPrimary() { return [=] { return cmd->size; }; } if (tok == "SIZEOF_HEADERS") - return [=] { return getHeaderSize(); }; + return [=] { return elf::getHeaderSize(); }; // Tok is the dot. if (tok == ".") @@ -1374,12 +1470,11 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) { // as a parent. This version hierarchy is, probably against your // instinct, purely for hint; the runtime doesn't care about it // at all. In LLD, we simply ignore it. - if (peek() != ";") - skip(); - expect(";"); + if (next() != ";") + expect(";"); } -static bool hasWildcard(StringRef s) { +bool elf::hasWildcard(StringRef s) { return s.find_first_of("?*[") != StringRef::npos; } @@ -1440,14 +1535,14 @@ std::vector<SymbolVersion> ScriptParser::readVersionExtern() { return ret; } -uint64_t ScriptParser::readMemoryAssignment(StringRef s1, StringRef s2, - StringRef s3) { +Expr ScriptParser::readMemoryAssignment(StringRef s1, StringRef s2, + StringRef s3) { if (!consume(s1) && !consume(s2) && !consume(s3)) { setError("expected one of: " + s1 + ", " + s2 + ", or " + s3); - return 0; + return [] { return 0; }; } expect("="); - return readExpr()().getValue(); + return readExpr(); } // Parse the MEMORY command as specified in: @@ -1471,9 +1566,9 @@ void ScriptParser::readMemory() { } expect(":"); - uint64_t origin = readMemoryAssignment("ORIGIN", "org", "o"); + Expr origin = readMemoryAssignment("ORIGIN", "org", "o"); expect(","); - uint64_t length = readMemoryAssignment("LENGTH", "len", "l"); + Expr length = readMemoryAssignment("LENGTH", "len", "l"); // Add the memory region to the region map. MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, negFlags); @@ -1511,19 +1606,18 @@ std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { return {flags, negFlags}; } -void readLinkerScript(MemoryBufferRef mb) { +void elf::readLinkerScript(MemoryBufferRef mb) { ScriptParser(mb).readLinkerScript(); } -void readVersionScript(MemoryBufferRef mb) { +void elf::readVersionScript(MemoryBufferRef mb) { ScriptParser(mb).readVersionScript(); } -void readDynamicList(MemoryBufferRef mb) { ScriptParser(mb).readDynamicList(); } +void elf::readDynamicList(MemoryBufferRef mb) { + ScriptParser(mb).readDynamicList(); +} -void readDefsym(StringRef name, MemoryBufferRef mb) { +void elf::readDefsym(StringRef name, MemoryBufferRef mb) { ScriptParser(mb).readDefsym(name); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/ScriptParser.h b/lld/ELF/ScriptParser.h index c953fb302b9a..eed1958647f8 100644 --- a/lld/ELF/ScriptParser.h +++ b/lld/ELF/ScriptParser.h @@ -27,6 +27,8 @@ void readDynamicList(MemoryBufferRef mb); // Parses the defsym expression. void readDefsym(StringRef name, MemoryBufferRef mb); +bool hasWildcard(StringRef s); + } // namespace elf } // namespace lld diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index f7a8a99cf8f9..afc8b05f8767 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -26,10 +26,10 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { -SymbolTable *symtab; +SymbolTable *elf::symtab; void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { // Swap symbols as instructed by -wrap. @@ -40,12 +40,18 @@ void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { idx2 = idx1; idx1 = idx3; - // Now renaming is complete. No one refers Real symbol. We could leave - // Real as-is, but if Real is written to the symbol table, that may - // contain irrelevant values. So, we copy all values from Sym to Real. - StringRef s = real->getName(); + if (real->exportDynamic) + sym->exportDynamic = true; + + // Now renaming is complete, and no one refers to real. We drop real from + // .symtab and .dynsym. If real is undefined, it is important that we don't + // leave it in .dynsym, because otherwise it might lead to an undefined symbol + // error in a subsequent link. If real is defined, we could emit real as an + // alias for sym, but that could degrade the user experience of some tools + // that can print out only one symbol for each location: sym is a preferred + // name than real, but they might print out real instead. memcpy(real, sym, sizeof(SymbolUnion)); - real->setName(s); + real->isUsedInRegularObj = false; } // Find an existing symbol or create a new one. @@ -88,7 +94,7 @@ Symbol *SymbolTable::insert(StringRef name) { } Symbol *SymbolTable::addSymbol(const Symbol &newSym) { - Symbol *sym = symtab->insert(newSym.getName()); + Symbol *sym = insert(newSym.getName()); sym->resolve(newSym); return sym; } @@ -103,6 +109,13 @@ Symbol *SymbolTable::find(StringRef name) { return sym; } +// A version script/dynamic list is only meaningful for a Defined symbol. +// A CommonSymbol will be converted to a Defined in replaceCommonSymbols(). +// A lazy symbol may be made Defined if an LTO libcall fetches it. +static bool canBeVersioned(const Symbol &sym) { + return sym.isDefined() || sym.isCommon() || sym.isLazy(); +} + // Initialize demangledSyms with a map from demangled symbols to symbol // objects. Used to handle "extern C++" directive in version scripts. // @@ -119,11 +132,9 @@ Symbol *SymbolTable::find(StringRef name) { StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() { if (!demangledSyms) { demangledSyms.emplace(); - for (Symbol *sym : symVector) { - if (!sym->isDefined() && !sym->isCommon()) - continue; - (*demangledSyms)[demangleItanium(sym->getName())].push_back(sym); - } + for (Symbol *sym : symVector) + if (canBeVersioned(*sym)) + (*demangledSyms)[demangleItanium(sym->getName())].push_back(sym); } return *demangledSyms; } @@ -131,15 +142,15 @@ StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() { std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) { if (ver.isExternCpp) return getDemangledSyms().lookup(ver.name); - if (Symbol *b = find(ver.name)) - if (b->isDefined() || b->isCommon()) - return {b}; + if (Symbol *sym = find(ver.name)) + if (canBeVersioned(*sym)) + return {sym}; return {}; } std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) { std::vector<Symbol *> res; - StringMatcher m(ver.name); + SingleStringMatcher m(ver.name); if (ver.isExternCpp) { for (auto &p : getDemangledSyms()) @@ -149,7 +160,7 @@ std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) { } for (Symbol *sym : symVector) - if ((sym->isDefined() || sym->isCommon()) && m.match(sym->getName())) + if (canBeVersioned(*sym) && m.match(sym->getName())) res.push_back(sym); return res; } @@ -264,6 +275,3 @@ void SymbolTable::scanVersionScript() { // --dynamic-list. handleDynamicList(); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index f0f6121009a5..8f2f55418df5 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -16,27 +16,39 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include <cstring> using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; -namespace lld { // Returns a symbol for an error message. static std::string demangle(StringRef symName) { if (elf::config->demangle) return demangleItanium(symName); - return symName; + return std::string(symName); } -std::string toString(const elf::Symbol &b) { return demangle(b.getName()); } -std::string toELFString(const Archive::Symbol &b) { +std::string lld::toString(const elf::Symbol &sym) { + StringRef name = sym.getName(); + std::string ret = demangle(name); + + // If sym has a non-default version, its name may have been truncated at '@' + // by Symbol::parseSymbolVersion(). Add the trailing part. This check is safe + // because every symbol name ends with '\0'. + if (name.data()[name.size()] == '@') + ret += name.data() + name.size(); + return ret; +} + +std::string lld::toELFString(const Archive::Symbol &b) { return demangle(b.getName()); } -namespace elf { Defined *ElfSym::bss; Defined *ElfSym::etext1; Defined *ElfSym::etext2; @@ -52,6 +64,7 @@ Defined *ElfSym::relaIpltStart; Defined *ElfSym::relaIpltEnd; Defined *ElfSym::riscvGlobalPointer; Defined *ElfSym::tlsModuleBase; +DenseMap<const Symbol *, const InputFile *> elf::backwardReferences; static uint64_t getSymVA(const Symbol &sym, int64_t &addend) { switch (sym.kind()) { @@ -99,7 +112,7 @@ static uint64_t getSymVA(const Symbol &sym, int64_t &addend) { // MIPS relocatable files can mix regular and microMIPS code. // Linker needs to distinguish such code. To do so microMIPS // symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other` - // field. Unfortunately, the `MIPS::relocateOne()` method has + // field. Unfortunately, the `MIPS::relocate()` method has // a symbol value only. To pass type of the symbol (regular/microMIPS) // to that routine as well as other places where we write // a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry` @@ -265,7 +278,7 @@ uint8_t Symbol::computeBinding() const { if (config->relocatable) return binding; if ((visibility != STV_DEFAULT && visibility != STV_PROTECTED) || - versionId == VER_NDX_LOCAL) + (versionId == VER_NDX_LOCAL && isDefined())) return STB_LOCAL; if (!config->gnuUnique && binding == STB_GNU_UNIQUE) return STB_GLOBAL; @@ -278,13 +291,17 @@ bool Symbol::includeInDynsym() const { if (computeBinding() == STB_LOCAL) return false; if (!isDefined() && !isCommon()) - return true; + // This should unconditionally return true, unfortunately glibc -static-pie + // expects undefined weak symbols not to exist in .dynsym, e.g. + // __pthread_mutex_lock reference in _dl_add_to_namespace_list, + // __pthread_initialize_minimal reference in csu/libc-start.c. + return !(config->noDynamicLinker && isUndefWeak()); return exportDynamic || inDynamicList; } // Print out a log message for --trace-symbol. -void printTraceSymbol(const Symbol *sym) { +void elf::printTraceSymbol(const Symbol *sym) { std::string s; if (sym->isUndefined()) s = ": reference to "; @@ -300,7 +317,7 @@ void printTraceSymbol(const Symbol *sym) { message(toString(sym->file) + s + sym->getName()); } -void maybeWarnUnorderableSymbol(const Symbol *sym) { +void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { if (!config->warnSymbolOrdering) return; @@ -332,7 +349,7 @@ void maybeWarnUnorderableSymbol(const Symbol *sym) { // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. -bool computeIsPreemptible(const Symbol &sym) { +bool elf::computeIsPreemptible(const Symbol &sym) { assert(!sym.isLocal()); // Only symbols with default visibility that appear in dynsym can be @@ -348,16 +365,22 @@ bool computeIsPreemptible(const Symbol &sym) { if (!config->shared) return false; - // If the dynamic list is present, it specifies preemptable symbols in a DSO. - if (config->hasDynamicList) + // If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is + // specified and the symbol is STT_FUNC, the symbol is preemptible iff it is + // in the dynamic list. + if (config->symbolic || (config->bsymbolicFunctions && sym.isFunc())) return sym.inDynamicList; - - // -Bsymbolic means that definitions are not preempted. - if (config->bsymbolic || (config->bsymbolicFunctions && sym.isFunc())) - return false; return true; } +void elf::reportBackrefs() { + for (auto &it : backwardReferences) { + const Symbol &sym = *it.first; + warn("backward reference detected: " + sym.getName() + " in " + + toString(it.second) + " refers to " + toString(sym.file)); + } +} + static uint8_t getMinVisibility(uint8_t va, uint8_t vb) { if (va == STV_DEFAULT) return vb; @@ -490,13 +513,28 @@ void Symbol::resolveUndefined(const Undefined &other) { // group assignment rule simulates the traditional linker's semantics. bool backref = config->warnBackrefs && other.file && file->groupId < other.file->groupId; + if (backref) { + // Some libraries have known problems and can cause noise. Filter them out + // with --warn-backrefs-exclude=. + StringRef name = + !file->archiveName.empty() ? file->archiveName : file->getName(); + for (const llvm::GlobPattern &pat : config->warnBackrefsExclude) + if (pat.match(name)) { + backref = false; + break; + } + } fetch(); // We don't report backward references to weak symbols as they can be // overridden later. + // + // A traditional linker does not error for -ldef1 -lref -ldef2 (linking + // sandwich), where def2 may or may not be the same as def1. We don't want + // to warn for this case, so dismiss the warning if we see a subsequent lazy + // definition. if (backref && !isWeak()) - warn("backward reference detected: " + other.getName() + " in " + - toString(other.file) + " refers to " + toString(file)); + backwardReferences.try_emplace(this, other.file); return; } @@ -510,7 +548,6 @@ void Symbol::resolveUndefined(const Undefined &other) { // reference is weak. if (other.binding != STB_WEAK || !referenced) binding = other.binding; - referenced = true; } } @@ -654,8 +691,12 @@ void Symbol::resolveDefined(const Defined &other) { } template <class LazyT> void Symbol::resolveLazy(const LazyT &other) { - if (!isUndefined()) + if (!isUndefined()) { + // See the comment in resolveUndefined(). + if (isDefined()) + backwardReferences.erase(this); return; + } // An undefined weak will not fetch archive members. See comment on Lazy in // Symbols.h for the details. @@ -683,9 +724,6 @@ void Symbol::resolveShared(const SharedSymbol &other) { uint8_t bind = binding; replace(other); binding = bind; - referenced = true; - } + } else if (traced) + printTraceSymbol(&other); } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index ac606198afd8..b69d263153d2 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -17,10 +17,12 @@ #include "InputSection.h" #include "lld/Common/LLVM.h" #include "lld/Common/Strings.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" namespace lld { +// Returns a string representation for a symbol for diagnostics. std::string toString(const elf::Symbol &); // There are two different ways to convert an Archive::Symbol to a string: @@ -256,6 +258,9 @@ public: uint8_t isPreemptible : 1; // True if an undefined or shared symbol is used from a live section. + // + // NOTE: In Writer.cpp the field is used to mark local defined symbols + // which are referenced by relocations when -r or --emit-relocs is given. uint8_t used : 1; // True if a call to this symbol needs to be followed by a restore of the @@ -515,13 +520,16 @@ size_t Symbol::getSymbolSize() const { void Symbol::replace(const Symbol &newSym) { using llvm::ELF::STT_TLS; - // Symbols representing thread-local variables must be referenced by - // TLS-aware relocations, and non-TLS symbols must be reference by - // non-TLS relocations, so there's a clear distinction between TLS - // and non-TLS symbols. It is an error if the same symbol is defined - // as a TLS symbol in one file and as a non-TLS symbol in other file. - if (symbolKind != PlaceholderKind && !isLazy() && !newSym.isLazy() && - (type == STT_TLS) != (newSym.type == STT_TLS)) + // st_value of STT_TLS represents the assigned offset, not the actual address + // which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can only be + // referenced by special TLS relocations. It is usually an error if a STT_TLS + // symbol is replaced by a non-STT_TLS symbol, vice versa. There are two + // exceptions: (a) a STT_NOTYPE lazy/undefined symbol can be replaced by a + // STT_TLS symbol, (b) a STT_TLS undefined symbol can be replaced by a + // STT_NOTYPE lazy symbol. + if (symbolKind != PlaceholderKind && !newSym.isLazy() && + (type == STT_TLS) != (newSym.type == STT_TLS) && + type != llvm::ELF::STT_NOTYPE) error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " + toString(newSym.file) + "\n>>> defined in " + toString(file)); @@ -555,6 +563,11 @@ void Symbol::replace(const Symbol &newSym) { void maybeWarnUnorderableSymbol(const Symbol *sym); bool computeIsPreemptible(const Symbol &sym); +void reportBackrefs(); + +// A mapping from a symbol to an InputFile referencing it backward. Used by +// --warn-backrefs. +extern llvm::DenseMap<const Symbol *, const InputFile *> backwardReferences; } // namespace elf } // namespace lld diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 550a5b38b89b..731b9f658060 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -22,10 +22,10 @@ #include "Symbols.h" #include "Target.h" #include "Writer.h" +#include "lld/Common/DWARF.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" @@ -36,6 +36,8 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/TimeProfiler.h" #include <cstdlib> #include <thread> @@ -44,13 +46,13 @@ using namespace llvm::dwarf; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; +using namespace lld; +using namespace lld::elf; using llvm::support::endian::read32le; using llvm::support::endian::write32le; using llvm::support::endian::write64le; -namespace lld { -namespace elf { constexpr size_t MergeNoTailSection::numShards; static uint64_t readUint(uint8_t *buf) { @@ -81,7 +83,7 @@ static ArrayRef<uint8_t> getVersion() { // With this feature, you can identify LLD-generated binaries easily // by "readelf --string-dump .comment <file>". // The returned object is a mergeable string section. -MergeInputSection *createCommentSection() { +MergeInputSection *elf::createCommentSection() { return make<MergeInputSection>(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, getVersion(), ".comment"); } @@ -137,7 +139,7 @@ MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { flags.ases |= s->ases; flags.flags1 |= s->flags1; flags.flags2 |= s->flags2; - flags.fp_abi = getMipsFpAbiFlag(flags.fp_abi, s->fp_abi, filename); + flags.fp_abi = elf::getMipsFpAbiFlag(flags.fp_abi, s->fp_abi, filename); }; if (create) @@ -251,7 +253,7 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { return make<MipsReginfoSection<ELFT>>(reginfo); } -InputSection *createInterpSection() { +InputSection *elf::createInterpSection() { // StringSaver guarantees that the returned string ends with '\0'. StringRef s = saver.save(config->dynamicLinker); ArrayRef<uint8_t> contents = {(const uint8_t *)s.data(), s.size() + 1}; @@ -260,8 +262,8 @@ InputSection *createInterpSection() { ".interp"); } -Defined *addSyntheticLocal(StringRef name, uint8_t type, uint64_t value, - uint64_t size, InputSectionBase §ion) { +Defined *elf::addSyntheticLocal(StringRef name, uint8_t type, uint64_t value, + uint64_t size, InputSectionBase §ion) { auto *s = make<Defined>(section.file, name, STB_LOCAL, STV_DEFAULT, type, value, size, §ion); if (in.symTab) @@ -1273,7 +1275,7 @@ static uint64_t addPltRelSz() { // Add remaining entries to complete .dynamic contents. template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { - Partition &part = getPartition(); + elf::Partition &part = getPartition(); bool isMain = part.name.empty(); for (StringRef s : config->filterList) @@ -1315,6 +1317,8 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { dtFlags1 |= DF_1_NODELETE; if (config->zNodlopen) dtFlags1 |= DF_1_NOOPEN; + if (config->pie) + dtFlags1 |= DF_1_PIE; if (config->zNow) { dtFlags |= DF_BIND_NOW; dtFlags1 |= DF_1_NOW; @@ -1400,7 +1404,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { if (config->emachine == EM_AARCH64) { if (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) addInt(DT_AARCH64_BTI_PLT, 0); - if (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_PAC) + if (config->zPacPlt) addInt(DT_AARCH64_PAC_PLT, 0); } @@ -2149,7 +2153,7 @@ template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *buf) { eSym->st_size = sym->getSize(); // st_value is usually an address of a symbol, but that has a - // special meaining for uninstantiated common symbols (this can + // special meaning for uninstantiated common symbols (this can // occur if -r is given). if (BssSection *commonSec = getCommonSec(ent.sym)) eSym->st_value = commonSec->alignment; @@ -2176,7 +2180,7 @@ template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *buf) { // We already set the less-significant bit for symbols // marked by the `STO_MIPS_MICROMIPS` flag and for microMIPS PLT // records. That allows us to distinguish such symbols in - // the `MIPS<ELFT>::relocateOne()` routine. Now we should + // the `MIPS<ELFT>::relocate()` routine. Now we should // clear that bit for non-dynamic symbol table, so tools // like `objdump` will be able to deal with a correct // symbol position. @@ -2248,7 +2252,7 @@ size_t SymtabShndxSection::getSize() const { // DSOs. That means resolving all dynamic symbols takes O(m)*O(n) // where m is the number of DSOs and n is the number of dynamic // symbols. For modern large programs, both m and n are large. So -// making each step faster by using hash tables substiantially +// making each step faster by using hash tables substantially // improves time to load programs. // // (Note that this is not the only way to design the shared library. @@ -2446,7 +2450,7 @@ PltSection::PltSection() : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"), headerSize(target->pltHeaderSize) { // On PowerPC, this section contains lazy symbol resolvers. - if (config->emachine == EM_PPC || config->emachine == EM_PPC64) { + if (config->emachine == EM_PPC64) { name = ".glink"; alignment = 4; } @@ -2464,11 +2468,6 @@ PltSection::PltSection() } void PltSection::writeTo(uint8_t *buf) { - if (config->emachine == EM_PPC) { - writePPC32GlinkSection(buf, entries.size()); - return; - } - // At beginning of PLT, we have code to call the dynamic // linker to resolve dynsyms at runtime. Write such code. target->writePltHeader(buf); @@ -2540,6 +2539,19 @@ void IpltSection::addSymbols() { } } +PPC32GlinkSection::PPC32GlinkSection() { + name = ".glink"; + alignment = 4; +} + +void PPC32GlinkSection::writeTo(uint8_t *buf) { + writePPC32GlinkSection(buf, entries.size()); +} + +size_t PPC32GlinkSection::getSize() const { + return headerSize + entries.size() * target->pltEntrySize + footerSize; +} + // This is an x86-only extra PLT section and used only when a security // enhancement feature called CET is enabled. In this comment, I'll explain what // the feature is and why we have two PLT sections if CET is enabled. @@ -2664,12 +2676,12 @@ readAddressAreas(DWARFContext &dwarf, InputSection *sec) { uint32_t cuIdx = 0; for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units()) { if (Error e = cu->tryExtractDIEsIfNeeded(false)) { - error(toString(sec) + ": " + toString(std::move(e))); + warn(toString(sec) + ": " + toString(std::move(e))); return {}; } Expected<DWARFAddressRangesVector> ranges = cu->collectAddressRanges(); if (!ranges) { - error(toString(sec) + ": " + toString(ranges.takeError())); + warn(toString(sec) + ": " + toString(ranges.takeError())); return {}; } @@ -2677,15 +2689,11 @@ readAddressAreas(DWARFContext &dwarf, InputSection *sec) { for (DWARFAddressRange &r : *ranges) { if (r.SectionIndex == -1ULL) continue; - InputSectionBase *s = sections[r.SectionIndex]; - if (!s || s == &InputSection::discarded || !s->isLive()) - continue; // Range list with zero size has no effect. - if (r.LowPC == r.HighPC) - continue; - auto *isec = cast<InputSection>(s); - uint64_t offset = isec->getOffsetInFile(); - ret.push_back({isec, r.LowPC - offset, r.HighPC - offset, cuIdx}); + InputSectionBase *s = sections[r.SectionIndex]; + if (s && s != &InputSection::discarded && s->isLive()) + if (r.LowPC != r.HighPC) + ret.push_back({cast<InputSection>(s), r.LowPC, r.HighPC, cuIdx}); } ++cuIdx; } @@ -2697,12 +2705,16 @@ template <class ELFT> static std::vector<GdbIndexSection::NameAttrEntry> readPubNamesAndTypes(const LLDDwarfObj<ELFT> &obj, const std::vector<GdbIndexSection::CuEntry> &cus) { - const DWARFSection &pubNames = obj.getGnuPubnamesSection(); - const DWARFSection &pubTypes = obj.getGnuPubtypesSection(); + const LLDDWARFSection &pubNames = obj.getGnuPubnamesSection(); + const LLDDWARFSection &pubTypes = obj.getGnuPubtypesSection(); std::vector<GdbIndexSection::NameAttrEntry> ret; - for (const DWARFSection *pub : {&pubNames, &pubTypes}) { - DWARFDebugPubTable table(obj, *pub, config->isLE, true); + for (const LLDDWARFSection *pub : {&pubNames, &pubTypes}) { + DWARFDataExtractor data(obj, *pub, config->isLE, config->wordsize); + DWARFDebugPubTable table; + table.extract(data, /*GnuStyle=*/true, [&](Error e) { + warn(toString(pub->sec) + ": " + toString(std::move(e))); + }); for (const DWARFDebugPubTable::Set &set : table.getData()) { // The value written into the constant pool is kind << 24 | cuIndex. As we // don't know how many compilation units precede this object to compute @@ -2740,11 +2752,11 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs, // The number of symbols we will handle in this function is of the order // of millions for very large executables, so we use multi-threading to // speed it up. - size_t numShards = 32; - size_t concurrency = 1; - if (threadsEnabled) - concurrency = - std::min<size_t>(PowerOf2Floor(hardware_concurrency()), numShards); + constexpr size_t numShards = 32; + size_t concurrency = PowerOf2Floor( + std::min<size_t>(hardware_concurrency(parallel::strategy.ThreadsRequested) + .compute_thread_count(), + numShards)); // A sharded map to uniquify symbols by name. std::vector<DenseMap<CachedHashStringRef, size_t>> map(numShards); @@ -2816,6 +2828,8 @@ template <class ELFT> GdbIndexSection *GdbIndexSection::create() { std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size()); parallelForEachN(0, sections.size(), [&](size_t i) { + // To keep memory usage low, we don't want to keep cached DWARFContext, so + // avoid getDwarf() here. ObjFile<ELFT> *file = sections[i]->getFile<ELFT>(); DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file)); @@ -3041,7 +3055,7 @@ bool VersionTableSection::isNeeded() const { (getPartition().verDef || getPartition().verNeed->isNeeded()); } -void addVerneed(Symbol *ss) { +void elf::addVerneed(Symbol *ss) { auto &file = cast<SharedFile>(*ss->file); if (ss->verdefIndex == VER_NDX_GLOBAL) { ss->versionId = VER_NDX_GLOBAL; @@ -3185,10 +3199,10 @@ void MergeNoTailSection::finalizeContents() { // Concurrency level. Must be a power of 2 to avoid expensive modulo // operations in the following tight loop. - size_t concurrency = 1; - if (threadsEnabled) - concurrency = - std::min<size_t>(PowerOf2Floor(hardware_concurrency()), numShards); + size_t concurrency = PowerOf2Floor( + std::min<size_t>(hardware_concurrency(parallel::strategy.ThreadsRequested) + .compute_thread_count(), + numShards)); // Add section pieces to the builders. parallelForEachN(0, concurrency, [&](size_t threadId) { @@ -3224,16 +3238,17 @@ void MergeNoTailSection::finalizeContents() { }); } -MergeSyntheticSection *createMergeSynthetic(StringRef name, uint32_t type, - uint64_t flags, - uint32_t alignment) { +MergeSyntheticSection *elf::createMergeSynthetic(StringRef name, uint32_t type, + uint64_t flags, + uint32_t alignment) { bool shouldTailMerge = (flags & SHF_STRINGS) && config->optimize >= 2; if (shouldTailMerge) return make<MergeTailSection>(name, type, flags, alignment); return make<MergeNoTailSection>(name, type, flags, alignment); } -template <class ELFT> void splitSections() { +template <class ELFT> void elf::splitSections() { + llvm::TimeTraceScope timeScope("Split sections"); // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). parallelForEach(inputSections, [](InputSectionBase *sec) { @@ -3254,7 +3269,7 @@ ARMExidxSyntheticSection::ARMExidxSyntheticSection() static InputSection *findExidxSection(InputSection *isec) { for (InputSection *d : isec->dependentSections) - if (d->type == SHT_ARM_EXIDX) + if (d->type == SHT_ARM_EXIDX && d->isLive()) return d; return nullptr; } @@ -3267,8 +3282,13 @@ static bool isValidExidxSectionDep(InputSection *isec) { bool ARMExidxSyntheticSection::addSection(InputSection *isec) { if (isec->type == SHT_ARM_EXIDX) { if (InputSection *dep = isec->getLinkOrderDep()) - if (isValidExidxSectionDep(dep)) + if (isValidExidxSectionDep(dep)) { exidxSections.push_back(isec); + // Every exidxSection is 8 bytes, we need an estimate of + // size before assignAddresses can be called. Final size + // will only be known after finalize is called. + size += 8; + } return true; } @@ -3349,19 +3369,30 @@ void ARMExidxSyntheticSection::finalizeContents() { // ICF may remove executable InputSections and their dependent .ARM.exidx // section that we recorded earlier. auto isDiscarded = [](const InputSection *isec) { return !isec->isLive(); }; - llvm::erase_if(executableSections, isDiscarded); llvm::erase_if(exidxSections, isDiscarded); + // We need to remove discarded InputSections and InputSections without + // .ARM.exidx sections that if we generated the .ARM.exidx it would be out + // of range. + auto isDiscardedOrOutOfRange = [this](InputSection *isec) { + if (!isec->isLive()) + return true; + if (findExidxSection(isec)) + return false; + int64_t off = static_cast<int64_t>(isec->getVA() - getVA()); + return off != llvm::SignExtend64(off, 31); + }; + llvm::erase_if(executableSections, isDiscardedOrOutOfRange); // Sort the executable sections that may or may not have associated // .ARM.exidx sections by order of ascending address. This requires the - // relative positions of InputSections to be known. + // relative positions of InputSections and OutputSections to be known. auto compareByFilePosition = [](const InputSection *a, const InputSection *b) { OutputSection *aOut = a->getParent(); OutputSection *bOut = b->getParent(); if (aOut != bOut) - return aOut->sectionIndex < bOut->sectionIndex; + return aOut->addr < bOut->addr; return a->outSecOff < b->outSecOff; }; llvm::stable_sort(executableSections, compareByFilePosition); @@ -3428,7 +3459,7 @@ void ARMExidxSyntheticSection::writeTo(uint8_t *buf) { memcpy(buf + offset, cantUnwindData, sizeof(cantUnwindData)); uint64_t s = isec->getVA(); uint64_t p = getVA() + offset; - target->relocateOne(buf + offset, R_ARM_PREL31, s - p); + target->relocateNoSym(buf + offset, R_ARM_PREL31, s - p); offset += 8; } } @@ -3436,7 +3467,7 @@ void ARMExidxSyntheticSection::writeTo(uint8_t *buf) { memcpy(buf + offset, cantUnwindData, sizeof(cantUnwindData)); uint64_t s = sentinel->getVA(sentinel->getSize()); uint64_t p = getVA() + offset; - target->relocateOne(buf + offset, R_ARM_PREL31, s - p); + target->relocateNoSym(buf + offset, R_ARM_PREL31, s - p); assert(size == offset + 8); } @@ -3451,19 +3482,14 @@ bool ARMExidxSyntheticSection::classof(const SectionBase *d) { } ThunkSection::ThunkSection(OutputSection *os, uint64_t off) - : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, - config->wordsize, ".text.thunk") { + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, + ".text.thunk") { this->parent = os; this->outSecOff = off; } -// When the errata patching is on, we round the size up to a 4 KiB -// boundary. This limits the effect that adding Thunks has on the addresses -// of the program modulo 4 KiB. As the errata patching is sensitive to address -// modulo 4 KiB this can prevent further patches from being needed due to -// Thunk insertion. size_t ThunkSection::getSize() const { - if (config->fixCortexA53Errata843419 || config->fixCortexA8) + if (roundUpSizeForErrata) return alignTo(size, 4096); return size; } @@ -3607,7 +3633,7 @@ static uint8_t getAbiVersion() { return 0; } -template <typename ELFT> void writeEhdr(uint8_t *buf, Partition &part) { +template <typename ELFT> void elf::writeEhdr(uint8_t *buf, Partition &part) { // For executable segments, the trap instructions are written before writing // the header. Setting Elf header bytes to zero ensures that any unused bytes // in header are zero-cleared, instead of having trap instructions. @@ -3633,7 +3659,7 @@ template <typename ELFT> void writeEhdr(uint8_t *buf, Partition &part) { } } -template <typename ELFT> void writePhdrs(uint8_t *buf, Partition &part) { +template <typename ELFT> void elf::writePhdrs(uint8_t *buf, Partition &part) { // Write the program header table. auto *hBuf = reinterpret_cast<typename ELFT::Phdr *>(buf); for (PhdrEntry *p : part.phdrs) { @@ -3708,85 +3734,82 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { } } -InStruct in; +InStruct elf::in; -std::vector<Partition> partitions; -Partition *mainPart; +std::vector<Partition> elf::partitions; +Partition *elf::mainPart; template GdbIndexSection *GdbIndexSection::create<ELF32LE>(); template GdbIndexSection *GdbIndexSection::create<ELF32BE>(); template GdbIndexSection *GdbIndexSection::create<ELF64LE>(); template GdbIndexSection *GdbIndexSection::create<ELF64BE>(); -template void splitSections<ELF32LE>(); -template void splitSections<ELF32BE>(); -template void splitSections<ELF64LE>(); -template void splitSections<ELF64BE>(); - -template class MipsAbiFlagsSection<ELF32LE>; -template class MipsAbiFlagsSection<ELF32BE>; -template class MipsAbiFlagsSection<ELF64LE>; -template class MipsAbiFlagsSection<ELF64BE>; - -template class MipsOptionsSection<ELF32LE>; -template class MipsOptionsSection<ELF32BE>; -template class MipsOptionsSection<ELF64LE>; -template class MipsOptionsSection<ELF64BE>; - -template class MipsReginfoSection<ELF32LE>; -template class MipsReginfoSection<ELF32BE>; -template class MipsReginfoSection<ELF64LE>; -template class MipsReginfoSection<ELF64BE>; - -template class DynamicSection<ELF32LE>; -template class DynamicSection<ELF32BE>; -template class DynamicSection<ELF64LE>; -template class DynamicSection<ELF64BE>; - -template class RelocationSection<ELF32LE>; -template class RelocationSection<ELF32BE>; -template class RelocationSection<ELF64LE>; -template class RelocationSection<ELF64BE>; - -template class AndroidPackedRelocationSection<ELF32LE>; -template class AndroidPackedRelocationSection<ELF32BE>; -template class AndroidPackedRelocationSection<ELF64LE>; -template class AndroidPackedRelocationSection<ELF64BE>; - -template class RelrSection<ELF32LE>; -template class RelrSection<ELF32BE>; -template class RelrSection<ELF64LE>; -template class RelrSection<ELF64BE>; - -template class SymbolTableSection<ELF32LE>; -template class SymbolTableSection<ELF32BE>; -template class SymbolTableSection<ELF64LE>; -template class SymbolTableSection<ELF64BE>; - -template class VersionNeedSection<ELF32LE>; -template class VersionNeedSection<ELF32BE>; -template class VersionNeedSection<ELF64LE>; -template class VersionNeedSection<ELF64BE>; - -template void writeEhdr<ELF32LE>(uint8_t *Buf, Partition &Part); -template void writeEhdr<ELF32BE>(uint8_t *Buf, Partition &Part); -template void writeEhdr<ELF64LE>(uint8_t *Buf, Partition &Part); -template void writeEhdr<ELF64BE>(uint8_t *Buf, Partition &Part); - -template void writePhdrs<ELF32LE>(uint8_t *Buf, Partition &Part); -template void writePhdrs<ELF32BE>(uint8_t *Buf, Partition &Part); -template void writePhdrs<ELF64LE>(uint8_t *Buf, Partition &Part); -template void writePhdrs<ELF64BE>(uint8_t *Buf, Partition &Part); - -template class PartitionElfHeaderSection<ELF32LE>; -template class PartitionElfHeaderSection<ELF32BE>; -template class PartitionElfHeaderSection<ELF64LE>; -template class PartitionElfHeaderSection<ELF64BE>; - -template class PartitionProgramHeadersSection<ELF32LE>; -template class PartitionProgramHeadersSection<ELF32BE>; -template class PartitionProgramHeadersSection<ELF64LE>; -template class PartitionProgramHeadersSection<ELF64BE>; - -} // namespace elf -} // namespace lld +template void elf::splitSections<ELF32LE>(); +template void elf::splitSections<ELF32BE>(); +template void elf::splitSections<ELF64LE>(); +template void elf::splitSections<ELF64BE>(); + +template class elf::MipsAbiFlagsSection<ELF32LE>; +template class elf::MipsAbiFlagsSection<ELF32BE>; +template class elf::MipsAbiFlagsSection<ELF64LE>; +template class elf::MipsAbiFlagsSection<ELF64BE>; + +template class elf::MipsOptionsSection<ELF32LE>; +template class elf::MipsOptionsSection<ELF32BE>; +template class elf::MipsOptionsSection<ELF64LE>; +template class elf::MipsOptionsSection<ELF64BE>; + +template class elf::MipsReginfoSection<ELF32LE>; +template class elf::MipsReginfoSection<ELF32BE>; +template class elf::MipsReginfoSection<ELF64LE>; +template class elf::MipsReginfoSection<ELF64BE>; + +template class elf::DynamicSection<ELF32LE>; +template class elf::DynamicSection<ELF32BE>; +template class elf::DynamicSection<ELF64LE>; +template class elf::DynamicSection<ELF64BE>; + +template class elf::RelocationSection<ELF32LE>; +template class elf::RelocationSection<ELF32BE>; +template class elf::RelocationSection<ELF64LE>; +template class elf::RelocationSection<ELF64BE>; + +template class elf::AndroidPackedRelocationSection<ELF32LE>; +template class elf::AndroidPackedRelocationSection<ELF32BE>; +template class elf::AndroidPackedRelocationSection<ELF64LE>; +template class elf::AndroidPackedRelocationSection<ELF64BE>; + +template class elf::RelrSection<ELF32LE>; +template class elf::RelrSection<ELF32BE>; +template class elf::RelrSection<ELF64LE>; +template class elf::RelrSection<ELF64BE>; + +template class elf::SymbolTableSection<ELF32LE>; +template class elf::SymbolTableSection<ELF32BE>; +template class elf::SymbolTableSection<ELF64LE>; +template class elf::SymbolTableSection<ELF64BE>; + +template class elf::VersionNeedSection<ELF32LE>; +template class elf::VersionNeedSection<ELF32BE>; +template class elf::VersionNeedSection<ELF64LE>; +template class elf::VersionNeedSection<ELF64BE>; + +template void elf::writeEhdr<ELF32LE>(uint8_t *Buf, Partition &Part); +template void elf::writeEhdr<ELF32BE>(uint8_t *Buf, Partition &Part); +template void elf::writeEhdr<ELF64LE>(uint8_t *Buf, Partition &Part); +template void elf::writeEhdr<ELF64BE>(uint8_t *Buf, Partition &Part); + +template void elf::writePhdrs<ELF32LE>(uint8_t *Buf, Partition &Part); +template void elf::writePhdrs<ELF32BE>(uint8_t *Buf, Partition &Part); +template void elf::writePhdrs<ELF64LE>(uint8_t *Buf, Partition &Part); +template void elf::writePhdrs<ELF64BE>(uint8_t *Buf, Partition &Part); + +template class elf::PartitionElfHeaderSection<ELF32LE>; +template class elf::PartitionElfHeaderSection<ELF32BE>; +template class elf::PartitionElfHeaderSection<ELF64LE>; +template class elf::PartitionElfHeaderSection<ELF64BE>; + +template class elf::PartitionProgramHeadersSection<ELF32LE>; +template class elf::PartitionProgramHeadersSection<ELF32BE>; +template class elf::PartitionProgramHeadersSection<ELF64LE>; +template class elf::PartitionProgramHeadersSection<ELF64BE>; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index f0a598dda51d..8ed82ba64a6e 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -364,7 +364,7 @@ private: // Try to merge two GOTs. In case of success the `Dst` contains // result of merging and the function returns true. In case of - // ovwerflow the `Dst` is unchanged and the function returns false. + // overflow the `Dst` is unchanged and the function returns false. bool tryMergeGots(FileGot & dst, FileGot & src, bool isPrimary); }; @@ -683,9 +683,8 @@ public: void addEntry(Symbol &sym); size_t getNumEntries() const { return entries.size(); } - size_t headerSize = 0; + size_t headerSize; -private: std::vector<const Symbol *> entries; }; @@ -705,6 +704,16 @@ public: void addEntry(Symbol &sym); }; +class PPC32GlinkSection : public PltSection { +public: + PPC32GlinkSection(); + void writeTo(uint8_t *buf) override; + size_t getSize() const override; + + std::vector<const Symbol *> canonical_plts; + static constexpr size_t footerSize = 64; +}; + // This is x86-only. class IBTPltSection : public SyntheticSection { public: @@ -1037,7 +1046,7 @@ public: std::vector<InputSection *> exidxSections; private: - size_t size; + size_t size = 0; // Instead of storing pointers to the .ARM.exidx InputSections from // InputObjects, we store pointers to the executable sections that need @@ -1069,6 +1078,10 @@ public: InputSection *getTargetInputSection() const; bool assignOffsets(); + // When true, round up reported size of section to 4 KiB. See comment + // in addThunkSection() for more details. + bool roundUpSizeForErrata = false; + private: std::vector<Thunk *> thunks; size_t size = 0; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 70a68fd8db9e..6abd8b452e23 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -35,19 +35,19 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; -namespace lld { -std::string toString(elf::RelType type) { +const TargetInfo *elf::target; + +std::string lld::toString(RelType type) { StringRef s = getELFRelocationTypeName(elf::config->emachine, type); if (s == "Unknown") return ("Unknown (" + Twine(type) + ")").str(); - return s; + return std::string(s); } -namespace elf { -const TargetInfo *target; - -TargetInfo *getTarget() { +TargetInfo *elf::getTarget() { switch (config->emachine) { case EM_386: case EM_IAMCU: @@ -95,7 +95,7 @@ template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *loc) { assert(loc != nullptr); for (InputSectionBase *d : inputSections) { auto *isec = cast<InputSection>(d); - if (!isec->getParent()) + if (!isec->getParent() || (isec->type & SHT_NOBITS)) continue; const uint8_t *isecLoc = @@ -112,7 +112,7 @@ template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *loc) { return {}; } -ErrorPlace getErrorPlace(const uint8_t *loc) { +ErrorPlace elf::getErrorPlace(const uint8_t *loc) { switch (config->ekind) { case ELF32LEKind: return getErrPlace<ELF32LE>(loc); @@ -155,26 +155,27 @@ RelExpr TargetInfo::adjustRelaxExpr(RelType type, const uint8_t *data, return expr; } -void TargetInfo::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { +void TargetInfo::relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const { llvm_unreachable("Should not have claimed to be relaxable"); } -void TargetInfo::relaxTlsGdToLe(uint8_t *loc, RelType type, +void TargetInfo::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const { llvm_unreachable("Should not have claimed to be relaxable"); } -void TargetInfo::relaxTlsGdToIe(uint8_t *loc, RelType type, +void TargetInfo::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const { llvm_unreachable("Should not have claimed to be relaxable"); } -void TargetInfo::relaxTlsIeToLe(uint8_t *loc, RelType type, +void TargetInfo::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const { llvm_unreachable("Should not have claimed to be relaxable"); } -void TargetInfo::relaxTlsLdToLe(uint8_t *loc, RelType type, +void TargetInfo::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const { llvm_unreachable("Should not have claimed to be relaxable"); } @@ -185,6 +186,3 @@ uint64_t TargetInfo::getImageBase() const { return *config->imageBase; return config->isPic ? 0 : defaultImageBase; } - -} // namespace elf -} // namespace lld diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 949a7bfdf64b..47905ae64a47 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -82,10 +82,27 @@ public: virtual bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const; - virtual void relocateOne(uint8_t *loc, RelType type, uint64_t val) const = 0; + virtual void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const = 0; + void relocateNoSym(uint8_t *loc, RelType type, uint64_t val) const { + relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val); + } + + virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, + JumpModType val) const {} virtual ~TargetInfo(); + // This deletes a jump insn at the end of the section if it is a fall thru to + // the next section. Further, if there is a conditional jump and a direct + // jump consecutively, it tries to flip the conditional jump to convert the + // direct jump into a fall thru and delete it. Returns true if a jump + // instruction can be deleted. + virtual bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const { + return false; + } + unsigned defaultCommonPageSize = 4096; unsigned defaultMaxPageSize = 4096; @@ -122,6 +139,10 @@ public: // executable OutputSections. std::array<uint8_t, 4> trapInstr; + // Stores the NOP instructions of different sizes for the target and is used + // to pad sections that are relaxed. + llvm::Optional<std::vector<std::vector<uint8_t>>> nopInstrs; + // If a target needs to rewrite calls to __morestack to instead call // __morestack_non_split when a split-stack enabled caller calls a // non-split-stack callee this will return true. Otherwise returns false. @@ -129,11 +150,16 @@ public: virtual RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const; - virtual void relaxGot(uint8_t *loc, RelType type, uint64_t val) const; - virtual void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const; - virtual void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const; - virtual void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const; - virtual void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const; + virtual void relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const; + virtual void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const; + virtual void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const; + virtual void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const; + virtual void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const; protected: // On FreeBSD x86_64 the first page cannot be mmaped. @@ -171,8 +197,7 @@ static inline std::string getErrorLocation(const uint8_t *loc) { void writePPC32GlinkSection(uint8_t *buf, size_t numEntries); -bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, - uint8_t *bufLoc); +bool tryRelaxPPC64TocIndirection(const Relocation &rel, uint8_t *bufLoc); unsigned getPPCDFormOp(unsigned secondaryOp); // In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first @@ -188,6 +213,7 @@ unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther); // the .toc section. bool isPPC64SmallCodeModelTocReloc(RelType type); +void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); @@ -196,44 +222,36 @@ TargetInfo *getTarget(); template <class ELFT> bool isMipsPIC(const Defined *sym); -static inline void reportRangeError(uint8_t *loc, RelType type, const Twine &v, - int64_t min, uint64_t max) { - ErrorPlace errPlace = getErrorPlace(loc); - StringRef hint; - if (errPlace.isec && errPlace.isec->name.startswith(".debug")) - hint = "; consider recompiling with -fdebug-types-section to reduce size " - "of debug sections"; - - errorOrWarn(errPlace.loc + "relocation " + lld::toString(type) + - " out of range: " + v.str() + " is not in [" + Twine(min).str() + - ", " + Twine(max).str() + "]" + hint); -} +void reportRangeError(uint8_t *loc, const Relocation &rel, const Twine &v, + int64_t min, uint64_t max); // Make sure that V can be represented as an N bit signed integer. -inline void checkInt(uint8_t *loc, int64_t v, int n, RelType type) { +inline void checkInt(uint8_t *loc, int64_t v, int n, const Relocation &rel) { if (v != llvm::SignExtend64(v, n)) - reportRangeError(loc, type, Twine(v), llvm::minIntN(n), llvm::maxIntN(n)); + reportRangeError(loc, rel, Twine(v), llvm::minIntN(n), llvm::maxIntN(n)); } // Make sure that V can be represented as an N bit unsigned integer. -inline void checkUInt(uint8_t *loc, uint64_t v, int n, RelType type) { +inline void checkUInt(uint8_t *loc, uint64_t v, int n, const Relocation &rel) { if ((v >> n) != 0) - reportRangeError(loc, type, Twine(v), 0, llvm::maxUIntN(n)); + reportRangeError(loc, rel, Twine(v), 0, llvm::maxUIntN(n)); } // Make sure that V can be represented as an N bit signed or unsigned integer. -inline void checkIntUInt(uint8_t *loc, uint64_t v, int n, RelType type) { +inline void checkIntUInt(uint8_t *loc, uint64_t v, int n, + const Relocation &rel) { // For the error message we should cast V to a signed integer so that error // messages show a small negative value rather than an extremely large one if (v != (uint64_t)llvm::SignExtend64(v, n) && (v >> n) != 0) - reportRangeError(loc, type, Twine((int64_t)v), llvm::minIntN(n), + reportRangeError(loc, rel, Twine((int64_t)v), llvm::minIntN(n), llvm::maxUIntN(n)); } -inline void checkAlignment(uint8_t *loc, uint64_t v, int n, RelType type) { +inline void checkAlignment(uint8_t *loc, uint64_t v, int n, + const Relocation &rel) { if ((v & (n - 1)) != 0) error(getErrorLocation(loc) + "improper alignment for relocation " + - lld::toString(type) + ": 0x" + llvm::utohexstr(v) + + lld::toString(rel.type) + ": 0x" + llvm::utohexstr(v) + " is not aligned to " + Twine(n) + " bytes"); } diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 7b927a434e36..ea74d343ebb2 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -40,9 +40,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { @@ -245,8 +244,7 @@ public: // decide the offsets in the call stub. PPC32PltCallStub(const InputSection &isec, const Relocation &rel, Symbol &dest) - : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0), - file(isec.file) {} + : Thunk(dest, rel.addend), file(isec.file) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -257,6 +255,14 @@ private: const InputFile *file; }; +class PPC32LongThunk final : public Thunk { +public: + PPC32LongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} + uint32_t size() override { return config->isPic ? 32 : 16; } + void writeTo(uint8_t *buf) override; + void addSymbols(ThunkSection &isec) override; +}; + // PPC64 Plt call stubs. // Any call site that needs to call through a plt entry needs a call stub in // the .text section. The call stub is responsible for: @@ -273,6 +279,20 @@ public: void addSymbols(ThunkSection &isec) override; }; +// PPC64 R2 Save Stub +// When the caller requires a valid R2 TOC pointer but the callee does not +// require a TOC pointer and the callee cannot guarantee that it doesn't +// clobber R2 then we need to save R2. This stub: +// 1) Saves the TOC pointer to the stack. +// 2) Tail calls the callee. +class PPC64R2SaveStub final : public Thunk { +public: + PPC64R2SaveStub(Symbol &dest) : Thunk(dest, 0) {} + uint32_t size() override { return 8; } + void writeTo(uint8_t *buf) override; + void addSymbols(ThunkSection &isec) override; +}; + // A bl instruction uses a signed 24 bit offset, with an implicit 4 byte // alignment. This gives a possible 26 bits of 'reach'. If the call offset is // larger then that we need to emit a long-branch thunk. The target address @@ -345,7 +365,7 @@ void AArch64ABSLongThunk::writeTo(uint8_t *buf) { }; uint64_t s = getAArch64ThunkDestVA(destination, addend); memcpy(buf, data, sizeof(data)); - target->relocateOne(buf + 8, R_AARCH64_ABS64, s); + target->relocateNoSym(buf + 8, R_AARCH64_ABS64, s); } void AArch64ABSLongThunk::addSymbols(ThunkSection &isec) { @@ -369,9 +389,9 @@ void AArch64ADRPThunk::writeTo(uint8_t *buf) { uint64_t s = getAArch64ThunkDestVA(destination, addend); uint64_t p = getThunkTargetSym()->getVA(); memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(s) - getAArch64Page(p)); - target->relocateOne(buf + 4, R_AARCH64_ADD_ABS_LO12_NC, s); + target->relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, + getAArch64Page(s) - getAArch64Page(p)); + target->relocateNoSym(buf + 4, R_AARCH64_ADD_ABS_LO12_NC, s); } void AArch64ADRPThunk::addSymbols(ThunkSection &isec) { @@ -415,7 +435,7 @@ void ARMThunk::writeTo(uint8_t *buf) { 0x00, 0x00, 0x00, 0xea, // b S }; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_ARM_JUMP24, offset); + target->relocateNoSym(buf, R_ARM_JUMP24, offset); } bool ARMThunk::isCompatibleWith(const InputSection &isec, @@ -453,7 +473,7 @@ void ThumbThunk::writeTo(uint8_t *buf) { 0x00, 0xf0, 0x00, 0xb0, // b.w S }; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_ARM_THM_JUMP24, offset); + target->relocateNoSym(buf, R_ARM_THM_JUMP24, offset); } bool ThumbThunk::isCompatibleWith(const InputSection &isec, @@ -470,8 +490,8 @@ void ARMV7ABSLongThunk::writeLong(uint8_t *buf) { }; uint64_t s = getARMThunkDestVA(destination); memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_ARM_MOVW_ABS_NC, s); - target->relocateOne(buf + 4, R_ARM_MOVT_ABS, s); + target->relocateNoSym(buf, R_ARM_MOVW_ABS_NC, s); + target->relocateNoSym(buf + 4, R_ARM_MOVT_ABS, s); } void ARMV7ABSLongThunk::addSymbols(ThunkSection &isec) { @@ -488,8 +508,8 @@ void ThumbV7ABSLongThunk::writeLong(uint8_t *buf) { }; uint64_t s = getARMThunkDestVA(destination); memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_ARM_THM_MOVW_ABS_NC, s); - target->relocateOne(buf + 4, R_ARM_THM_MOVT_ABS, s); + target->relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, s); + target->relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, s); } void ThumbV7ABSLongThunk::addSymbols(ThunkSection &isec) { @@ -509,8 +529,8 @@ void ARMV7PILongThunk::writeLong(uint8_t *buf) { uint64_t p = getThunkTargetSym()->getVA(); int64_t offset = s - p - 16; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_ARM_MOVW_PREL_NC, offset); - target->relocateOne(buf + 4, R_ARM_MOVT_PREL, offset); + target->relocateNoSym(buf, R_ARM_MOVW_PREL_NC, offset); + target->relocateNoSym(buf + 4, R_ARM_MOVT_PREL, offset); } void ARMV7PILongThunk::addSymbols(ThunkSection &isec) { @@ -530,8 +550,8 @@ void ThumbV7PILongThunk::writeLong(uint8_t *buf) { uint64_t p = getThunkTargetSym()->getVA() & ~0x1; int64_t offset = s - p - 12; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf, R_ARM_THM_MOVW_PREL_NC, offset); - target->relocateOne(buf + 4, R_ARM_THM_MOVT_PREL, offset); + target->relocateNoSym(buf, R_ARM_THM_MOVW_PREL_NC, offset); + target->relocateNoSym(buf + 4, R_ARM_THM_MOVT_PREL, offset); } void ThumbV7PILongThunk::addSymbols(ThunkSection &isec) { @@ -546,7 +566,7 @@ void ARMV5ABSLongThunk::writeLong(uint8_t *buf) { 0x00, 0x00, 0x00, 0x00, // L1: .word S }; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf + 4, R_ARM_ABS32, getARMThunkDestVA(destination)); + target->relocateNoSym(buf + 4, R_ARM_ABS32, getARMThunkDestVA(destination)); } void ARMV5ABSLongThunk::addSymbols(ThunkSection &isec) { @@ -572,7 +592,7 @@ void ARMV5PILongThunk::writeLong(uint8_t *buf) { uint64_t s = getARMThunkDestVA(destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf + 12, R_ARM_REL32, s - p - 12); + target->relocateNoSym(buf + 12, R_ARM_REL32, s - p - 12); } void ARMV5PILongThunk::addSymbols(ThunkSection &isec) { @@ -602,7 +622,7 @@ void ThumbV6MABSLongThunk::writeLong(uint8_t *buf) { }; uint64_t s = getARMThunkDestVA(destination); memcpy(buf, data, sizeof(data)); - target->relocateOne(buf + 8, R_ARM_ABS32, s); + target->relocateNoSym(buf + 8, R_ARM_ABS32, s); } void ThumbV6MABSLongThunk::addSymbols(ThunkSection &isec) { @@ -628,7 +648,7 @@ void ThumbV6MPILongThunk::writeLong(uint8_t *buf) { uint64_t s = getARMThunkDestVA(destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; memcpy(buf, data, sizeof(data)); - target->relocateOne(buf + 12, R_ARM_REL32, s - p - 12); + target->relocateNoSym(buf + 12, R_ARM_REL32, s - p - 12); } void ThumbV6MPILongThunk::addSymbols(ThunkSection &isec) { @@ -645,8 +665,8 @@ void MipsThunk::writeTo(uint8_t *buf) { write32(buf + 4, 0x08000000 | (s >> 2)); // j func write32(buf + 8, 0x27390000); // addiu $25, $25, %lo(func) write32(buf + 12, 0x00000000); // nop - target->relocateOne(buf, R_MIPS_HI16, s); - target->relocateOne(buf + 8, R_MIPS_LO16, s); + target->relocateNoSym(buf, R_MIPS_HI16, s); + target->relocateNoSym(buf + 8, R_MIPS_LO16, s); } void MipsThunk::addSymbols(ThunkSection &isec) { @@ -667,9 +687,9 @@ void MicroMipsThunk::writeTo(uint8_t *buf) { write16(buf + 4, 0xd400); // j func write16(buf + 8, 0x3339); // addiu $25, $25, %lo(func) write16(buf + 12, 0x0c00); // nop - target->relocateOne(buf, R_MICROMIPS_HI16, s); - target->relocateOne(buf + 4, R_MICROMIPS_26_S1, s); - target->relocateOne(buf + 8, R_MICROMIPS_LO16, s); + target->relocateNoSym(buf, R_MICROMIPS_HI16, s); + target->relocateNoSym(buf + 4, R_MICROMIPS_26_S1, s); + target->relocateNoSym(buf + 8, R_MICROMIPS_LO16, s); } void MicroMipsThunk::addSymbols(ThunkSection &isec) { @@ -691,9 +711,9 @@ void MicroMipsR6Thunk::writeTo(uint8_t *buf) { write16(buf, 0x1320); // lui $25, %hi(func) write16(buf + 4, 0x3339); // addiu $25, $25, %lo(func) write16(buf + 8, 0x9400); // bc func - target->relocateOne(buf, R_MICROMIPS_HI16, s); - target->relocateOne(buf + 4, R_MICROMIPS_LO16, s); - target->relocateOne(buf + 8, R_MICROMIPS_PC26_S1, s - p - 12); + target->relocateNoSym(buf, R_MICROMIPS_HI16, s); + target->relocateNoSym(buf + 4, R_MICROMIPS_LO16, s); + target->relocateNoSym(buf + 8, R_MICROMIPS_PC26_S1, s - p - 12); } void MicroMipsR6Thunk::addSymbols(ThunkSection &isec) { @@ -707,8 +727,8 @@ InputSection *MicroMipsR6Thunk::getTargetInputSection() const { return dyn_cast<InputSection>(dr.section); } -void writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA, - const InputFile *file, int64_t addend) { +void elf::writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA, + const InputFile *file, int64_t addend) { if (!config->isPic) { write32(buf + 0, 0x3d600000 | (gotPltVA + 0x8000) >> 16); // lis r11,ha write32(buf + 4, 0x816b0000 | (uint16_t)gotPltVA); // lwz r11,l(r11) @@ -765,7 +785,34 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec, return !config->isPic || (isec.file == file && rel.addend == addend); } -void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) { +void PPC32LongThunk::addSymbols(ThunkSection &isec) { + addSymbol(saver.save("__LongThunk_" + destination.getName()), STT_FUNC, 0, + isec); +} + +void PPC32LongThunk::writeTo(uint8_t *buf) { + auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000) >> 16; }; + auto lo = [](uint32_t v) -> uint16_t { return v; }; + uint32_t d = destination.getVA(addend); + if (config->isPic) { + uint32_t off = d - (getThunkTargetSym()->getVA() + 8); + write32(buf + 0, 0x7c0802a6); // mflr r12,0 + write32(buf + 4, 0x429f0005); // bcl r20,r31,.+4 + write32(buf + 8, 0x7d8802a6); // mtctr r12 + write32(buf + 12, 0x3d8c0000 | ha(off)); // addis r12,r12,off@ha + write32(buf + 16, 0x398c0000 | lo(off)); // addi r12,r12,off@l + write32(buf + 20, 0x7c0803a6); // mtlr r0 + buf += 24; + } else { + write32(buf + 0, 0x3d800000 | ha(d)); // lis r12,d@ha + write32(buf + 4, 0x398c0000 | lo(d)); // addi r12,r12,d@l + buf += 8; + } + write32(buf + 0, 0x7d8903a6); // mtctr r12 + write32(buf + 4, 0x4e800420); // bctr +} + +void elf::writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) { uint16_t offHa = (offset + 0x8000) >> 16; uint16_t offLo = offset & 0xffff; @@ -789,6 +836,21 @@ void PPC64PltCallStub::addSymbols(ThunkSection &isec) { s->file = destination.file; } +void PPC64R2SaveStub::writeTo(uint8_t *buf) { + int64_t offset = destination.getVA() - (getThunkTargetSym()->getVA() + 4); + // The branch offset needs to fit in 26 bits. + if (!isInt<26>(offset)) + fatal("R2 save stub branch offset is too large: " + Twine(offset)); + write32(buf + 0, 0xf8410018); // std r2,24(r1) + write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset> +} + +void PPC64R2SaveStub::addSymbols(ThunkSection &isec) { + Defined *s = addSymbol(saver.save("__toc_save_" + destination.getName()), + STT_FUNC, 0, isec); + s->needsTocRestore = true; +} + void PPC64LongBranchThunk::writeTo(uint8_t *buf) { int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) - getPPC64TocBase(); @@ -805,7 +867,8 @@ Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {} Thunk::~Thunk() = default; static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) { - if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) + if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && + type != R_AARCH64_PLT32) fatal("unrecognized relocation type"); if (config->picThunk) return make<AArch64ADRPThunk>(s, a); @@ -902,23 +965,32 @@ static Thunk *addThunkMips(RelType type, Symbol &s) { static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, Symbol &s) { - assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) && + assert((rel.type == R_PPC_LOCAL24PC || rel.type == R_PPC_REL24 || + rel.type == R_PPC_PLTREL24) && "unexpected relocation type for thunk"); - return make<PPC32PltCallStub>(isec, rel, s); + if (s.isInPlt()) + return make<PPC32PltCallStub>(isec, rel, s); + return make<PPC32LongThunk>(s, rel.addend); } static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) { - assert(type == R_PPC64_REL24 && "unexpected relocation type for thunk"); + assert((type == R_PPC64_REL14 || type == R_PPC64_REL24) && + "unexpected relocation type for thunk"); if (s.isInPlt()) return make<PPC64PltCallStub>(s); + // This check looks at the st_other bits of the callee. If the value is 1 + // then the callee clobbers the TOC and we need an R2 save stub. + if ((s.stOther >> 5) == 1) + return make<PPC64R2SaveStub>(s); + if (config->picThunk) return make<PPC64PILongBranchThunk>(s, a); return make<PPC64PDLongBranchThunk>(s, a); } -Thunk *addThunk(const InputSection &isec, Relocation &rel) { +Thunk *elf::addThunk(const InputSection &isec, Relocation &rel) { Symbol &s = *rel.sym; int64_t a = rel.addend; @@ -939,6 +1011,3 @@ Thunk *addThunk(const InputSection &isec, Relocation &rel) { llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC"); } - -} // end namespace elf -} // end namespace lld diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6373044d8804..b9fd03bc2eda 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -22,22 +22,25 @@ #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include <climits> +#define DEBUG_TYPE "lld" + using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; +using namespace lld; +using namespace lld::elf; -namespace lld { -namespace elf { namespace { // The writer writes a SymbolTable result to a file. template <class ELFT> class Writer { @@ -56,6 +59,7 @@ private: void sortSections(); void resolveShfLinkOrder(); void finalizeAddressDependentContent(); + void optimizeBasicBlockJumps(); void sortInputSections(); void finalizeSections(); void checkExecuteOnly(); @@ -91,7 +95,7 @@ static bool isSectionPrefix(StringRef prefix, StringRef name) { return name.startswith(prefix) || name == prefix.drop_back(); } -StringRef getOutputSectionName(const InputSectionBase *s) { +StringRef elf::getOutputSectionName(const InputSectionBase *s) { if (config->relocatable) return s->name; @@ -107,15 +111,29 @@ StringRef getOutputSectionName(const InputSectionBase *s) { } } - // This check is for -z keep-text-section-prefix. This option separates text - // sections with prefix ".text.hot", ".text.unlikely", ".text.startup" or - // ".text.exit". - // When enabled, this allows identifying the hot code region (.text.hot) in - // the final binary which can be selectively mapped to huge pages or mlocked, - // for instance. + // A BssSection created for a common symbol is identified as "COMMON" in + // linker scripts. It should go to .bss section. + if (s->name == "COMMON") + return ".bss"; + + if (script->hasSectionsCommand) + return s->name; + + // When no SECTIONS is specified, emulate GNU ld's internal linker scripts + // by grouping sections with certain prefixes. + + // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.", + // ".text.unlikely.", ".text.startup." or ".text.exit." before others. + // We provide an option -z keep-text-section-prefix to group such sections + // into separate output sections. This is more flexible. See also + // sortISDBySectionOrder(). + // ".text.unknown" means the hotness of the section is unknown. When + // SampleFDO is used, if a function doesn't have sample, it could be very + // cold or it could be a new function never being sampled. Those functions + // will be kept in the ".text.unknown" section. if (config->zKeepTextSectionPrefix) - for (StringRef v : - {".text.hot.", ".text.unlikely.", ".text.startup.", ".text.exit."}) + for (StringRef v : {".text.hot.", ".text.unknown.", ".text.unlikely.", + ".text.startup.", ".text.exit."}) if (isSectionPrefix(v, s->name)) return v.drop_back(); @@ -126,33 +144,40 @@ StringRef getOutputSectionName(const InputSectionBase *s) { if (isSectionPrefix(v, s->name)) return v.drop_back(); - // CommonSection is identified as "COMMON" in linker scripts. - // By default, it should go to .bss section. - if (s->name == "COMMON") - return ".bss"; - return s->name; } static bool needsInterpSection() { - return !config->shared && !config->dynamicLinker.empty() && - script->needsInterpSection(); + return !config->relocatable && !config->shared && + !config->dynamicLinker.empty() && script->needsInterpSection(); } -template <class ELFT> void writeResult() { Writer<ELFT>().run(); } +template <class ELFT> void elf::writeResult() { + llvm::TimeTraceScope timeScope("Write output file"); + Writer<ELFT>().run(); +} static void removeEmptyPTLoad(std::vector<PhdrEntry *> &phdrs) { - llvm::erase_if(phdrs, [&](const PhdrEntry *p) { - if (p->p_type != PT_LOAD) - return false; - if (!p->firstSec) - return true; - uint64_t size = p->lastSec->addr + p->lastSec->size - p->firstSec->addr; - return size == 0; - }); + auto it = std::stable_partition( + phdrs.begin(), phdrs.end(), [&](const PhdrEntry *p) { + if (p->p_type != PT_LOAD) + return true; + if (!p->firstSec) + return false; + uint64_t size = p->lastSec->addr + p->lastSec->size - p->firstSec->addr; + return size != 0; + }); + + // Clear OutputSection::ptLoad for sections contained in removed + // segments. + DenseSet<PhdrEntry *> removed(it, phdrs.end()); + for (OutputSection *sec : outputSections) + if (removed.count(sec->ptLoad)) + sec->ptLoad = nullptr; + phdrs.erase(it, phdrs.end()); } -void copySectionsIntoPartitions() { +void elf::copySectionsIntoPartitions() { std::vector<InputSectionBase *> newSections; for (unsigned part = 2; part != partitions.size() + 1; ++part) { for (InputSectionBase *s : inputSections) { @@ -174,7 +199,7 @@ void copySectionsIntoPartitions() { newSections.end()); } -void combineEhSections() { +void elf::combineEhSections() { for (InputSectionBase *&s : inputSections) { // Ignore dead sections and the partition end marker (.part.end), // whose partition number is out of bounds. @@ -215,7 +240,7 @@ static Defined *addAbsolute(StringRef name) { // The linker is expected to define some symbols depending on // the linking result. This function defines such symbols. -void addReservedSymbols() { +void elf::addReservedSymbols() { if (config->emachine == EM_MIPS) { // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer // so that it points to an absolute address which by default is relative @@ -239,6 +264,8 @@ void addReservedSymbols() { // glibc *crt1.o has a undefined reference to _SDA_BASE_. Since we don't // support Small Data Area, define it arbitrarily as 0. addOptionalRegular("_SDA_BASE_", nullptr, 0, STV_HIDDEN); + } else if (config->emachine == EM_PPC64) { + addPPC64SaveRestore(); } // The Power Architecture 64-bit v2 ABI defines a TableOfContents (TOC) which @@ -308,7 +335,7 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) { return nullptr; } -template <class ELFT> void createSyntheticSections() { +template <class ELFT> void elf::createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. memset(&Out::first, 0, sizeof(Out)); @@ -521,7 +548,8 @@ template <class ELFT> void createSyntheticSections() { add(in.ibtPlt); } - in.plt = make<PltSection>(); + in.plt = config->emachine == EM_PPC ? make<PPC32GlinkSection>() + : make<PltSection>(); add(in.plt); in.iplt = make<IpltSection>(); add(in.iplt); @@ -548,8 +576,7 @@ template <class ELFT> void createSyntheticSections() { // The main function of the writer. template <class ELFT> void Writer<ELFT>::run() { - if (config->discard != DiscardPolicy::All) - copyLocalSymbols(); + copyLocalSymbols(); if (config->copyRelocs) addSectionSymbols(); @@ -590,6 +617,14 @@ template <class ELFT> void Writer<ELFT>::run() { for (OutputSection *sec : outputSections) sec->addr = 0; + // Handle --print-map(-M)/--Map, --cref and --print-archive-stats=. Dump them + // before checkSections() because the files may be useful in case + // checkSections() or openFile() fails, for example, due to an erroneous file + // size. + writeMapFile(); + writeCrossReferenceTable(); + writeArchiveStats(); + if (config->checkSections) checkSections(); @@ -616,27 +651,67 @@ template <class ELFT> void Writer<ELFT>::run() { if (errorCount()) return; - // Handle -Map and -cref options. - writeMapFile(); - writeCrossReferenceTable(); - if (errorCount()) - return; - if (auto e = buffer->commit()) error("failed to write to the output file: " + toString(std::move(e))); } +template <class ELFT, class RelTy> +static void markUsedLocalSymbolsImpl(ObjFile<ELFT> *file, + llvm::ArrayRef<RelTy> rels) { + for (const RelTy &rel : rels) { + Symbol &sym = file->getRelocTargetSym(rel); + if (sym.isLocal()) + sym.used = true; + } +} + +// The function ensures that the "used" field of local symbols reflects the fact +// that the symbol is used in a relocation from a live section. +template <class ELFT> static void markUsedLocalSymbols() { + // With --gc-sections, the field is already filled. + // See MarkLive<ELFT>::resolveReloc(). + if (config->gcSections) + return; + // Without --gc-sections, the field is initialized with "true". + // Drop the flag first and then rise for symbols referenced in relocations. + for (InputFile *file : objectFiles) { + ObjFile<ELFT> *f = cast<ObjFile<ELFT>>(file); + for (Symbol *b : f->getLocalSymbols()) + b->used = false; + for (InputSectionBase *s : f->getSections()) { + InputSection *isec = dyn_cast_or_null<InputSection>(s); + if (!isec) + continue; + if (isec->type == SHT_REL) + markUsedLocalSymbolsImpl(f, isec->getDataAs<typename ELFT::Rel>()); + else if (isec->type == SHT_RELA) + markUsedLocalSymbolsImpl(f, isec->getDataAs<typename ELFT::Rela>()); + } + } +} + static bool shouldKeepInSymtab(const Defined &sym) { if (sym.isSection()) return false; - if (config->discard == DiscardPolicy::None) + // If --emit-reloc or -r is given, preserve symbols referenced by relocations + // from live sections. + if (config->copyRelocs && sym.used) return true; - // If -emit-reloc is given, all symbols including local ones need to be - // copied because they may be referenced by relocations. - if (config->emitRelocs) + // Exclude local symbols pointing to .ARM.exidx sections. + // They are probably mapping symbols "$d", which are optional for these + // sections. After merging the .ARM.exidx sections, some of these symbols + // may become dangling. The easiest way to avoid the issue is not to add + // them to the symbol table from the beginning. + if (config->emachine == EM_ARM && sym.section && + sym.section->type == SHT_ARM_EXIDX) + return false; + + if (config->discard == DiscardPolicy::None) return true; + if (config->discard == DiscardPolicy::All) + return false; // In ELF assembly .L symbols are normally discarded by the assembler. // If the assembler fails to do so, the linker discards them if @@ -683,12 +758,12 @@ static bool includeInSymtab(const Symbol &b) { template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { if (!in.symTab) return; + if (config->copyRelocs && config->discard != DiscardPolicy::None) + markUsedLocalSymbols<ELFT>(); for (InputFile *file : objectFiles) { ObjFile<ELFT> *f = cast<ObjFile<ELFT>>(file); for (Symbol *b : f->getLocalSymbols()) { - if (!b->isLocal()) - fatal(toString(f) + - ": broken object: getLocalSymbols returns a non-local symbol"); + assert(b->isLocal() && "should have been caught in initializeSymbols()"); auto *dr = dyn_cast<Defined>(b); // No reason to keep local undefined symbol in symtab. @@ -810,7 +885,8 @@ static bool isRelroSection(const OutputSection *sec) { StringRef s = sec->name; return s == ".data.rel.ro" || s == ".bss.rel.ro" || s == ".ctors" || s == ".dtors" || s == ".jcr" || s == ".eh_frame" || - s == ".openbsd.randomdata"; + s == ".fini_array" || s == ".init_array" || + s == ".openbsd.randomdata" || s == ".preinit_array"; } // We compute a rank for each section. The rank indicates where the @@ -1201,6 +1277,27 @@ findOrphanPos(std::vector<BaseCommand *>::iterator b, return i; } +// Adds random priorities to sections not already in the map. +static void maybeShuffle(DenseMap<const InputSectionBase *, int> &order) { + if (!config->shuffleSectionSeed) + return; + + std::vector<int> priorities(inputSections.size() - order.size()); + // Existing priorities are < 0, so use priorities >= 0 for the missing + // sections. + int curPrio = 0; + for (int &prio : priorities) + prio = curPrio++; + uint32_t seed = *config->shuffleSectionSeed; + std::mt19937 g(seed ? seed : std::random_device()()); + llvm::shuffle(priorities.begin(), priorities.end(), g); + int prioIndex = 0; + for (InputSectionBase *sec : inputSections) { + if (order.try_emplace(sec, priorities[prioIndex]).second) + ++prioIndex; + } +} + // Builds section order for handling --symbol-ordering-file. static DenseMap<const InputSectionBase *, int> buildSectionOrder() { DenseMap<const InputSectionBase *, int> sectionOrder; @@ -1330,6 +1427,19 @@ static void sortSection(OutputSection *sec, const DenseMap<const InputSectionBase *, int> &order) { StringRef name = sec->name; + // Never sort these. + if (name == ".init" || name == ".fini") + return; + + // Sort input sections by priority using the list provided by + // --symbol-ordering-file or --shuffle-sections=. This is a least significant + // digit radix sort. The sections may be sorted stably again by a more + // significant key. + if (!order.empty()) + for (BaseCommand *b : sec->sectionCommands) + if (auto *isd = dyn_cast<InputSectionDescription>(b)) + sortISDBySectionOrder(isd, order); + // Sort input sections by section name suffixes for // __attribute__((init_priority(N))). if (name == ".init_array" || name == ".fini_array") { @@ -1345,10 +1455,6 @@ static void sortSection(OutputSection *sec, return; } - // Never sort these. - if (name == ".init" || name == ".fini") - return; - // .toc is allocated just after .got and is accessed using GOT-relative // relocations. Object files compiled with small code model have an // addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations. @@ -1366,13 +1472,6 @@ static void sortSection(OutputSection *sec, }); return; } - - // Sort input sections by priority using the list provided - // by --symbol-ordering-file. - if (!order.empty()) - for (BaseCommand *b : sec->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(b)) - sortISDBySectionOrder(isd, order); } // If no layout was provided by linker script, we want to apply default @@ -1380,6 +1479,7 @@ static void sortSection(OutputSection *sec, template <class ELFT> void Writer<ELFT>::sortInputSections() { // Build the order once since it is expensive. DenseMap<const InputSectionBase *, int> order = buildSectionOrder(); + maybeShuffle(order); for (BaseCommand *base : script->sectionCommands) if (auto *sec = dyn_cast<OutputSection>(base)) sortSection(sec, order); @@ -1416,9 +1516,15 @@ template <class ELFT> void Writer<ELFT>::sortSections() { llvm::find_if(script->sectionCommands, isSection), llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(), compareSections); + + // Process INSERT commands. From this point onwards the order of + // script->sectionCommands is fixed. + script->processInsertCommands(); return; } + script->processInsertCommands(); + // Orphan sections are sections present in the input files which are // not explicitly placed into the output file by the linker script. // @@ -1504,7 +1610,7 @@ static bool compareByFilePosition(InputSection *a, InputSection *b) { OutputSection *bOut = lb->getParent(); if (aOut != bOut) - return aOut->sectionIndex < bOut->sectionIndex; + return aOut->addr < bOut->addr; return la->outSecOff < lb->outSecOff; } @@ -1523,17 +1629,30 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() { // but sort must consider them all at once. std::vector<InputSection **> scriptSections; std::vector<InputSection *> sections; + bool started = false, stopped = false; for (BaseCommand *base : sec->sectionCommands) { if (auto *isd = dyn_cast<InputSectionDescription>(base)) { for (InputSection *&isec : isd->sections) { - scriptSections.push_back(&isec); - sections.push_back(isec); - - InputSection *link = isec->getLinkOrderDep(); - if (!link->getParent()) - error(toString(isec) + ": sh_link points to discarded section " + - toString(link)); + if (!(isec->flags & SHF_LINK_ORDER)) { + if (started) + stopped = true; + } else if (stopped) { + error(toString(isec) + ": SHF_LINK_ORDER sections in " + sec->name + + " are not contiguous"); + } else { + started = true; + + scriptSections.push_back(&isec); + sections.push_back(isec); + + InputSection *link = isec->getLinkOrderDep(); + if (!link->getParent()) + error(toString(isec) + ": sh_link points to discarded section " + + toString(link)); + } } + } else if (started) { + stopped = true; } } @@ -1547,6 +1666,11 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() { } } +static void finalizeSynthetic(SyntheticSection *sec) { + if (sec && sec->isNeeded() && sec->getParent()) + sec->finalizeContents(); +} + // We need to generate and finalize the content that depends on the address of // InputSections. As the generation of the content may also alter InputSection // addresses we must converge to a fixed point. We do that here. See the comment @@ -1556,6 +1680,17 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { AArch64Err843419Patcher a64p; ARMErr657417Patcher a32p; script->assignAddresses(); + // .ARM.exidx and SHF_LINK_ORDER do not require precise addresses, but they + // do require the relative addresses of OutputSections because linker scripts + // can assign Virtual Addresses to OutputSections that are not monotonically + // increasing. + for (Partition &part : partitions) + finalizeSynthetic(part.armExidx); + resolveShfLinkOrder(); + + // Converts call x@GDPLT to call __tls_get_addr + if (config->emachine == EM_HEXAGON) + hexagonTLSSymbolUpdate(outputSections); int assignPasses = 0; for (;;) { @@ -1602,11 +1737,103 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { } } } + + // If addrExpr is set, the address may not be a multiple of the alignment. + // Warn because this is error-prone. + for (BaseCommand *cmd : script->sectionCommands) + if (auto *os = dyn_cast<OutputSection>(cmd)) + if (os->addr % os->alignment != 0) + warn("address (0x" + Twine::utohexstr(os->addr) + ") of section " + + os->name + " is not a multiple of alignment (" + + Twine(os->alignment) + ")"); +} + +// If Input Sections have been shrinked (basic block sections) then +// update symbol values and sizes associated with these sections. With basic +// block sections, input sections can shrink when the jump instructions at +// the end of the section are relaxed. +static void fixSymbolsAfterShrinking() { + for (InputFile *File : objectFiles) { + parallelForEach(File->getSymbols(), [&](Symbol *Sym) { + auto *def = dyn_cast<Defined>(Sym); + if (!def) + return; + + const SectionBase *sec = def->section; + if (!sec) + return; + + const InputSectionBase *inputSec = dyn_cast<InputSectionBase>(sec->repl); + if (!inputSec || !inputSec->bytesDropped) + return; + + const size_t OldSize = inputSec->data().size(); + const size_t NewSize = OldSize - inputSec->bytesDropped; + + if (def->value > NewSize && def->value <= OldSize) { + LLVM_DEBUG(llvm::dbgs() + << "Moving symbol " << Sym->getName() << " from " + << def->value << " to " + << def->value - inputSec->bytesDropped << " bytes\n"); + def->value -= inputSec->bytesDropped; + return; + } + + if (def->value + def->size > NewSize && def->value <= OldSize && + def->value + def->size <= OldSize) { + LLVM_DEBUG(llvm::dbgs() + << "Shrinking symbol " << Sym->getName() << " from " + << def->size << " to " << def->size - inputSec->bytesDropped + << " bytes\n"); + def->size -= inputSec->bytesDropped; + } + }); + } } -static void finalizeSynthetic(SyntheticSection *sec) { - if (sec && sec->isNeeded() && sec->getParent()) - sec->finalizeContents(); +// If basic block sections exist, there are opportunities to delete fall thru +// jumps and shrink jump instructions after basic block reordering. This +// relaxation pass does that. It is only enabled when --optimize-bb-jumps +// option is used. +template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { + assert(config->optimizeBBJumps); + + script->assignAddresses(); + // For every output section that has executable input sections, this + // does the following: + // 1. Deletes all direct jump instructions in input sections that + // jump to the following section as it is not required. + // 2. If there are two consecutive jump instructions, it checks + // if they can be flipped and one can be deleted. + for (OutputSection *os : outputSections) { + if (!(os->flags & SHF_EXECINSTR)) + continue; + std::vector<InputSection *> sections = getInputSections(os); + std::vector<unsigned> result(sections.size()); + // Delete all fall through jump instructions. Also, check if two + // consecutive jump instructions can be flipped so that a fall + // through jmp instruction can be deleted. + parallelForEachN(0, sections.size(), [&](size_t i) { + InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr; + InputSection &is = *sections[i]; + result[i] = + target->deleteFallThruJmpInsn(is, is.getFile<ELFT>(), next) ? 1 : 0; + }); + size_t numDeleted = std::count(result.begin(), result.end(), 1); + if (numDeleted > 0) { + script->assignAddresses(); + LLVM_DEBUG(llvm::dbgs() + << "Removing " << numDeleted << " fall through jumps\n"); + } + } + + fixSymbolsAfterShrinking(); + + for (OutputSection *os : outputSections) { + std::vector<InputSection *> sections = getInputSections(os); + for (InputSection *is : sections) + is->trim(); + } } // In order to allow users to manipulate linker-synthesized sections, @@ -1633,12 +1860,15 @@ static void removeUnusedSyntheticSections() { if (!os || ss->isNeeded()) continue; - // If we reach here, then SS is an unused synthetic section and we want to - // remove it from corresponding input section description of output section. + // If we reach here, then ss is an unused synthetic section and we want to + // remove it from the corresponding input section description, and + // orphanSections. for (BaseCommand *b : os->sectionCommands) if (auto *isd = dyn_cast<InputSectionDescription>(b)) llvm::erase_if(isd->sections, [=](InputSection *isec) { return isec == ss; }); + llvm::erase_if(script->orphanSections, + [=](const InputSectionBase *isec) { return isec == ss; }); } } @@ -1719,6 +1949,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // we can correctly decide if a dynamic relocation is needed. This is called // after processSymbolAssignments() because it needs to know whether a // linker-script-defined symbol is absolute. + ppc64noTocRelax.clear(); if (!config->relocatable) { forEachRelSec(scanRelocations<ELFT>); reportUndefinedSymbols<ELFT>(); @@ -1747,7 +1978,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { if (sym->isUndefined() && !sym->isWeak()) if (auto *f = dyn_cast_or_null<SharedFile>(sym->file)) if (f->allNeededIsKnown) - error(toString(f) + ": undefined reference to " + toString(*sym)); + errorOrWarn(toString(f) + ": undefined reference to " + + toString(*sym) + " [--no-allow-shlib-undefined]"); } // Now that we have defined all possible global symbols including linker- @@ -1785,6 +2017,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { in.mipsGot->build(); removeUnusedSyntheticSections(); + script->diagnoseOrphanHandling(); sortSections(); @@ -1801,6 +2034,15 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { sec->addrExpr = [=] { return i->second; }; } + // With the outputSections available check for GDPLT relocations + // and add __tls_get_addr symbol if needed. + if (config->emachine == EM_HEXAGON && hexagonNeedsTLSSymbol(outputSections)) { + Symbol *sym = symtab->addSymbol(Undefined{ + nullptr, "__tls_get_addr", STB_GLOBAL, STV_DEFAULT, STT_NOTYPE}); + sym->isPreemptible = true; + partitions[0].dynSymTab->addSymbol(sym); + } + // This is a bit of a hack. A value of 0 means undef, so we set it // to 1 to make __ehdr_start defined. The section number is not // particularly relevant. @@ -1864,7 +2106,6 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // Dynamic section must be the last one in this list and dynamic // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { - finalizeSynthetic(part.armExidx); finalizeSynthetic(part.dynSymTab); finalizeSynthetic(part.gnuHashTab); finalizeSynthetic(part.hashTab); @@ -1880,12 +2121,6 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { if (!script->hasSectionsCommand && !config->relocatable) fixSectionAlignments(); - // SHFLinkOrder processing must be processed after relative section placements are - // known but before addresses are allocated. - resolveShfLinkOrder(); - if (errorCount()) - return; - // This is used to: // 1) Create "thunks": // Jump instructions in many ISAs have small displacements, and therefore @@ -1908,11 +2143,19 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // sometimes using forward symbol declarations. We want to set the correct // values. They also might change after adding the thunks. finalizeAddressDependentContent(); + if (errorCount()) + return; // finalizeAddressDependentContent may have added local symbols to the static symbol table. finalizeSynthetic(in.symTab); finalizeSynthetic(in.ppc64LongBranchTarget); + // Relaxation to delete inter-basic block jumps created by basic block + // sections. Run after in.symTab is finalized as optimizeBasicBlockJumps + // can relax jump instructions based on symbol offset. + if (config->optimizeBBJumps) + optimizeBasicBlockJumps(); + // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. @@ -1986,8 +2229,10 @@ void Writer<ELFT>::addStartStopSymbols(OutputSection *sec) { StringRef s = sec->name; if (!isValidCIdentifier(s)) return; - addOptionalRegular(saver.save("__start_" + s), sec, 0, STV_PROTECTED); - addOptionalRegular(saver.save("__stop_" + s), sec, -1, STV_PROTECTED); + addOptionalRegular(saver.save("__start_" + s), sec, 0, + config->zStartStopVisibility); + addOptionalRegular(saver.save("__stop_" + s), sec, -1, + config->zStartStopVisibility); } static bool needsPtLoad(OutputSection *sec) { @@ -2106,12 +2351,11 @@ std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs(Partition &part) { // time, we don't want to create a separate load segment for the headers, // even if the first output section has an AT or AT> attribute. uint64_t newFlags = computeFlags(sec->getPhdrFlags()); - if (!load || - ((sec->lmaExpr || - (sec->lmaRegion && (sec->lmaRegion != load->firstSec->lmaRegion))) && - load->lastSec != Out::programHeaders) || - sec->memRegion != load->firstSec->memRegion || flags != newFlags || - sec == relroEnd) { + bool sameLMARegion = + load && !sec->lmaExpr && sec->lmaRegion == load->firstSec->lmaRegion; + if (!(load && newFlags == flags && sec != relroEnd && + sec->memRegion == load->firstSec->memRegion && + (sameLMARegion || load->lastSec == Out::programHeaders))) { load = addHdr(PT_LOAD, newFlags); flags = newFlags; } @@ -2205,7 +2449,10 @@ template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { const PhdrEntry *prev; auto pageAlign = [&](const PhdrEntry *p) { OutputSection *cmd = p->firstSec; - if (cmd && !cmd->addrExpr) { + if (!cmd) + return; + cmd->alignExpr = [align = cmd->alignment]() { return align; }; + if (!cmd->addrExpr) { // Prefer advancing to align(dot, maxPageSize) + dot%maxPageSize to avoid // padding in the file contents. // @@ -2716,15 +2963,12 @@ template <class ELFT> void Writer<ELFT>::writeBuildId() { part.buildId->writeBuildId(buildId); } -template void createSyntheticSections<ELF32LE>(); -template void createSyntheticSections<ELF32BE>(); -template void createSyntheticSections<ELF64LE>(); -template void createSyntheticSections<ELF64BE>(); - -template void writeResult<ELF32LE>(); -template void writeResult<ELF32BE>(); -template void writeResult<ELF64LE>(); -template void writeResult<ELF64BE>(); +template void elf::createSyntheticSections<ELF32LE>(); +template void elf::createSyntheticSections<ELF32BE>(); +template void elf::createSyntheticSections<ELF64LE>(); +template void elf::createSyntheticSections<ELF64BE>(); -} // namespace elf -} // namespace lld +template void elf::writeResult<ELF32LE>(); +template void elf::writeResult<ELF32BE>(); +template void elf::writeResult<ELF64LE>(); +template void elf::writeResult<ELF64BE>(); |