diff options
Diffstat (limited to 'lld/ELF')
-rw-r--r-- | lld/ELF/Arch/RISCV.cpp | 199 | ||||
-rw-r--r-- | lld/ELF/InputFiles.cpp | 7 | ||||
-rw-r--r-- | lld/ELF/InputSection.cpp | 13 | ||||
-rw-r--r-- | lld/ELF/Relocations.cpp | 50 | ||||
-rw-r--r-- | lld/ELF/Writer.cpp | 12 |
5 files changed, 212 insertions, 69 deletions
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index d7d3d3e47814..8ce92b4badfb 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -61,6 +61,7 @@ enum Op { AUIPC = 0x17, JALR = 0x67, LD = 0x3003, + LUI = 0x37, LW = 0x2003, SRLI = 0x5013, SUB = 0x40000033, @@ -73,6 +74,7 @@ enum Reg { X_T0 = 5, X_T1 = 6, X_T2 = 7, + X_A0 = 10, X_T3 = 28, }; @@ -102,6 +104,26 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) { (extractBits(imm, 4, 0) << 7); } +namespace { +struct SymbolAnchor { + uint64_t offset; + Defined *d; + bool end; // true for the anchor of st_value+st_size +}; +} // namespace + +struct elf::RISCVRelaxAux { + // This records symbol start and end offsets which will be adjusted according + // to the nearest relocDeltas element. + SmallVector<SymbolAnchor, 0> anchors; + // For relocations[i], the actual offset is + // r_offset - (i ? relocDeltas[i-1] : 0). + std::unique_ptr<uint32_t[]> relocDeltas; + // For relocations[i], the actual type is relocTypes[i]. + std::unique_ptr<RelType[]> relocTypes; + SmallVector<uint32_t, 0> writes; +}; + RISCV::RISCV() { copyRel = R_RISCV_COPY; pltRel = R_RISCV_JUMP_SLOT; @@ -119,6 +141,7 @@ RISCV::RISCV() { tlsGotRel = R_RISCV_TLS_TPREL32; } gotRel = symbolicRel; + tlsDescRel = R_RISCV_TLSDESC; // .got[0] = _DYNAMIC gotHeaderEntriesNum = 1; @@ -187,6 +210,8 @@ int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_RISCV_JUMP_SLOT: // These relocations are defined as not having an implicit addend. return 0; + case R_RISCV_TLSDESC: + return config->is64 ? read64le(buf + 8) : read32le(buf + 4); } } @@ -295,6 +320,12 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_PCREL_LO12_I: case R_RISCV_PCREL_LO12_S: return R_RISCV_PC_INDIRECT; + case R_RISCV_TLSDESC_HI20: + case R_RISCV_TLSDESC_LOAD_LO12: + case R_RISCV_TLSDESC_ADD_LO12: + return R_TLSDESC_PC; + case R_RISCV_TLSDESC_CALL: + return R_TLSDESC_CALL; case R_RISCV_TLS_GD_HI20: return R_TLSGD_PC; case R_RISCV_TLS_GOT_HI20: @@ -419,6 +450,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_RISCV_GOT_HI20: case R_RISCV_PCREL_HI20: + case R_RISCV_TLSDESC_HI20: case R_RISCV_TLS_GD_HI20: case R_RISCV_TLS_GOT_HI20: case R_RISCV_TPREL_HI20: @@ -430,6 +462,8 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_PCREL_LO12_I: + case R_RISCV_TLSDESC_LOAD_LO12: + case R_RISCV_TLSDESC_ADD_LO12: case R_RISCV_TPREL_LO12_I: case R_RISCV_LO12_I: { uint64_t hi = (val + 0x800) >> 12; @@ -513,32 +547,133 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { break; case R_RISCV_RELAX: - return; // Ignored (for now) - + return; + case R_RISCV_TLSDESC: + // The addend is stored in the second word. + if (config->is64) + write64le(loc + 8, val); + else + write32le(loc + 4, val); + break; default: llvm_unreachable("unknown relocation"); } } +static bool relaxable(ArrayRef<Relocation> relocs, size_t i) { + return i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX; +} + +static void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) { + switch (rel.type) { + case R_RISCV_TLSDESC_HI20: + case R_RISCV_TLSDESC_LOAD_LO12: + write32le(loc, 0x00000013); // nop + break; + case R_RISCV_TLSDESC_ADD_LO12: + write32le(loc, utype(AUIPC, X_A0, hi20(val))); // auipc a0,<hi20> + break; + case R_RISCV_TLSDESC_CALL: + if (config->is64) + write32le(loc, itype(LD, X_A0, X_A0, lo12(val))); // ld a0,<lo12>(a0) + else + write32le(loc, itype(LW, X_A0, X_A0, lo12(val))); // lw a0,<lo12>(a0) + break; + default: + llvm_unreachable("unsupported relocation for TLSDESC to IE"); + } +} + +static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { + switch (rel.type) { + case R_RISCV_TLSDESC_HI20: + case R_RISCV_TLSDESC_LOAD_LO12: + write32le(loc, 0x00000013); // nop + return; + case R_RISCV_TLSDESC_ADD_LO12: + if (isInt<12>(val)) + write32le(loc, 0x00000013); // nop + else + write32le(loc, utype(LUI, X_A0, hi20(val))); // lui a0,<hi20> + return; + case R_RISCV_TLSDESC_CALL: + if (isInt<12>(val)) + write32le(loc, itype(ADDI, X_A0, 0, val)); // addi a0,zero,<lo12> + else + write32le(loc, itype(ADDI, X_A0, X_A0, lo12(val))); // addi a0,a0,<lo12> + return; + default: + llvm_unreachable("unsupported relocation for TLSDESC to LE"); + } +} + void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast<InputSection>(&sec)) secAddr += s->outSecOff; else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) secAddr += ehIn->getParent()->outSecOff; - for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { - const Relocation &rel = sec.relocs()[i]; + uint64_t tlsdescVal = 0; + bool tlsdescRelax = false, isToLe = false; + const ArrayRef<Relocation> relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { + const Relocation &rel = relocs[i]; uint8_t *loc = buf + rel.offset; - const uint64_t val = + uint64_t val = sec.getRelocTargetVA(sec.file, rel.type, rel.addend, secAddr + rel.offset, *rel.sym, rel.expr); switch (rel.expr) { case R_RELAX_HINT: + continue; + case R_TLSDESC_PC: + // For R_RISCV_TLSDESC_HI20, store &got(sym)-PC to be used by the + // following two instructions L[DW] and ADDI. + if (rel.type == R_RISCV_TLSDESC_HI20) + tlsdescVal = val; + else + val = tlsdescVal; break; + case R_RELAX_TLS_GD_TO_IE: + // Only R_RISCV_TLSDESC_HI20 reaches here. tlsdescVal will be finalized + // after we see R_RISCV_TLSDESC_ADD_LO12 in the R_RELAX_TLS_GD_TO_LE case. + // The net effect is that tlsdescVal will be smaller than `val` to take + // into account of NOP instructions (in the absence of R_RISCV_RELAX) + // before AUIPC. + tlsdescVal = val + rel.offset; + isToLe = false; + tlsdescRelax = relaxable(relocs, i); + if (!tlsdescRelax) + tlsdescToIe(loc, rel, val); + continue; + case R_RELAX_TLS_GD_TO_LE: + // See the comment in handleTlsRelocation. For TLSDESC=>IE, + // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} also reach here. If isToIe is + // true, this is actually TLSDESC=>IE optimization. + if (rel.type == R_RISCV_TLSDESC_HI20) { + tlsdescVal = val; + isToLe = true; + tlsdescRelax = relaxable(relocs, i); + } else { + if (!isToLe && rel.type == R_RISCV_TLSDESC_ADD_LO12) + tlsdescVal -= rel.offset; + val = tlsdescVal; + } + // When NOP conversion is eligible and relaxation applies, don't write a + // NOP in case an unrelated instruction follows the current instruction. + if (tlsdescRelax && + (rel.type == R_RISCV_TLSDESC_HI20 || + rel.type == R_RISCV_TLSDESC_LOAD_LO12 || + (rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val)))) + continue; + if (isToLe) + tlsdescToLe(loc, rel, val); + else + tlsdescToIe(loc, rel, val); + continue; case R_RISCV_LEB128: if (i + 1 < size) { - const Relocation &rel1 = sec.relocs()[i + 1]; + const Relocation &rel1 = relocs[i + 1]; if (rel.type == R_RISCV_SET_ULEB128 && rel1.type == R_RISCV_SUB_ULEB128 && rel.offset == rel1.offset) { auto val = rel.sym->getVA(rel.addend) - rel1.sym->getVA(rel1.addend); @@ -554,32 +689,12 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128"); return; default: - relocate(loc, rel, val); break; } + relocate(loc, rel, val); } } -namespace { -struct SymbolAnchor { - uint64_t offset; - Defined *d; - bool end; // true for the anchor of st_value+st_size -}; -} // namespace - -struct elf::RISCVRelaxAux { - // This records symbol start and end offsets which will be adjusted according - // to the nearest relocDeltas element. - SmallVector<SymbolAnchor, 0> anchors; - // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : - // 0). - std::unique_ptr<uint32_t[]> relocDeltas; - // For relocations[i], the actual type is relocTypes[i]. - std::unique_ptr<RelType[]> relocTypes; - SmallVector<uint32_t, 0> writes; -}; - static void initSymbolAnchors() { SmallVector<InputSection *, 0> storage; for (OutputSection *osec : outputSections) { @@ -715,14 +830,16 @@ static void relaxHi20Lo12(const InputSection &sec, size_t i, uint64_t loc, static bool relax(InputSection &sec) { const uint64_t secAddr = sec.getVA(); + const MutableArrayRef<Relocation> relocs = sec.relocs(); auto &aux = *sec.relaxAux; bool changed = false; ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors); uint64_t delta = 0; + bool tlsdescRelax = false, toLeShortForm = false; - std::fill_n(aux.relocTypes.get(), sec.relocs().size(), R_RISCV_NONE); + std::fill_n(aux.relocTypes.get(), relocs.size(), R_RISCV_NONE); aux.writes.clear(); - for (auto [i, r] : llvm::enumerate(sec.relocs())) { + for (auto [i, r] : llvm::enumerate(relocs)) { const uint64_t loc = secAddr + r.offset - delta; uint32_t &cur = aux.relocDeltas[i], remove = 0; switch (r.type) { @@ -743,25 +860,37 @@ static bool relax(InputSection &sec) { } case R_RISCV_CALL: case R_RISCV_CALL_PLT: - if (i + 1 != sec.relocs().size() && - sec.relocs()[i + 1].type == R_RISCV_RELAX) + if (relaxable(relocs, i)) relaxCall(sec, i, loc, r, remove); break; case R_RISCV_TPREL_HI20: case R_RISCV_TPREL_ADD: case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: - if (i + 1 != sec.relocs().size() && - sec.relocs()[i + 1].type == R_RISCV_RELAX) + if (relaxable(relocs, i)) relaxTlsLe(sec, i, loc, r, remove); break; case R_RISCV_HI20: case R_RISCV_LO12_I: case R_RISCV_LO12_S: - if (i + 1 != sec.relocs().size() && - sec.relocs()[i + 1].type == R_RISCV_RELAX) + if (relaxable(relocs, i)) relaxHi20Lo12(sec, i, loc, r, remove); break; + case R_RISCV_TLSDESC_HI20: + // For TLSDESC=>LE, we can use the short form if hi20 is zero. + tlsdescRelax = relaxable(relocs, i); + toLeShortForm = tlsdescRelax && r.expr == R_RELAX_TLS_GD_TO_LE && + !hi20(r.sym->getVA(r.addend)); + [[fallthrough]]; + case R_RISCV_TLSDESC_LOAD_LO12: + // For TLSDESC=>LE/IE, AUIPC and L[DW] are removed if relaxable. + if (tlsdescRelax && r.expr != R_TLSDESC_PC) + remove = 4; + break; + case R_RISCV_TLSDESC_ADD_LO12: + if (toLeShortForm) + remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 75e5ee1d0da4..a292e873e72f 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1788,7 +1788,12 @@ void BinaryFile::parse() { } InputFile *elf::createInternalFile(StringRef name) { - return make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name)); + auto *file = + make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name)); + // References from an internal file do not lead to --warn-backrefs + // diagnostics. + file->groupId = 0; + return file; } ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index c728dd6c6306..0e0b9783bd88 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -961,12 +961,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // vector. The computed value is st_value plus a non-negative offset. // Negative values are invalid, so -1 can be used as the tombstone value. // - // If the referenced symbol is discarded (made Undefined), or the - // section defining the referenced symbol is garbage collected, - // sym.getOutputSection() is nullptr. `ds->folded` catches the ICF folded - // case. However, resolving a relocation in .debug_line to -1 would stop - // debugger users from setting breakpoints on the folded-in function, so - // exclude .debug_line. + // If the referenced symbol is relative to a discarded section (due to + // --gc-sections, COMDAT, etc), it has been converted to a Undefined. + // `ds->folded` catches the ICF folded case. However, resolving a + // relocation in .debug_line to -1 would stop debugger users from setting + // breakpoints on the folded-in function, so exclude .debug_line. // // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value // (base address selection entry), use 1 (which is used by GNU ld for @@ -974,7 +973,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // // TODO To reduce disruption, we use 0 instead of -1 as the tombstone // value. Enable -1 in a future release. - if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) { + if (!ds || (ds->folded && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. uint64_t value = SignExtend64<bits>(*tombstone); // For a 32-bit local TU reference in .debug_names, X86_64::relocate diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index b6a317bc3b6d..79c8230724ad 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1274,29 +1274,34 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (config->emachine == EM_MIPS) return handleMipsTlsRelocation(type, sym, c, offset, addend, expr); + bool isRISCV = config->emachine == EM_RISCV; if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC, R_TLSDESC_GOTPLT>(expr) && config->shared) { + // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not + // set NEEDS_TLSDESC on the label. if (expr != R_TLSDESC_CALL) { - sym.setFlags(NEEDS_TLSDESC); + if (!isRISCV || type == R_RISCV_TLSDESC_HI20) + sym.setFlags(NEEDS_TLSDESC); c.addReloc({expr, type, offset, addend, &sym}); } return 1; } // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE - // relaxation. + // optimizations. + // RISC-V supports TLSDESC to IE/LE optimizations. // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable - // relaxation as well. - bool toExecRelax = !config->shared && config->emachine != EM_ARM && - config->emachine != EM_HEXAGON && - config->emachine != EM_LOONGARCH && - config->emachine != EM_RISCV && - !c.file->ppc64DisableTLSRelax; + // optimization as well. + bool execOptimize = + !config->shared && config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && config->emachine != EM_LOONGARCH && + !(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) && + !c.file->ppc64DisableTLSRelax; // If we are producing an executable and the symbol is non-preemptable, it - // must be defined and the code sequence can be relaxed to use Local-Exec. + // must be defined and the code sequence can be optimized to use Local-Exec. // // ARM and RISC-V do not support any relaxations for TLS relocations, however, // we can omit the DTPMOD dynamic relocations and resolve them at link time @@ -1309,8 +1314,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, // module index, with a special value of 0 for the current module. GOT[e1] is // unused. There only needs to be one module index entry. if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(expr)) { - // Local-Dynamic relocs can be relaxed to Local-Exec. - if (toExecRelax) { + // Local-Dynamic relocs can be optimized to Local-Exec. + if (execOptimize) { c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type, offset, addend, &sym}); return target->getTlsGdRelaxSkip(type); @@ -1322,16 +1327,17 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, return 1; } - // Local-Dynamic relocs can be relaxed to Local-Exec. + // Local-Dynamic relocs can be optimized to Local-Exec. if (expr == R_DTPREL) { - if (toExecRelax) + if (execOptimize) expr = target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE); c.addReloc({expr, type, offset, addend, &sym}); return 1; } // Local-Dynamic sequence where offset of tls variable relative to dynamic - // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. + // thread pointer is stored in the got. This cannot be optimized to + // Local-Exec. if (expr == R_TLSLD_GOT_OFF) { sym.setFlags(NEEDS_GOT_DTPREL); c.addReloc({expr, type, offset, addend, &sym}); @@ -1341,14 +1347,18 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC, R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_LOONGARCH_TLSGD_PAGE_PC>(expr)) { - if (!toExecRelax) { + if (!execOptimize) { sym.setFlags(NEEDS_TLSGD); c.addReloc({expr, type, offset, addend, &sym}); return 1; } - // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec + // Global-Dynamic/TLSDESC can be optimized to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. + // + // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a non-preemptible + // label, so the LE optimization will be categorized as + // R_RELAX_TLS_GD_TO_LE. We fix the categorization in RISCV::relocateAlloc. if (sym.isPreemptible) { sym.setFlags(NEEDS_TLSGD_TO_IE); c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, @@ -1363,9 +1373,9 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC, R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) { ctx.hasTlsIe.store(true, std::memory_order_relaxed); - // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally - // defined. - if (toExecRelax && isLocalInExecutable) { + // Initial-Exec relocs can be optimized to Local-Exec if the symbol is + // locally defined. + if (execOptimize && isLocalInExecutable) { c.addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { sym.setFlags(NEEDS_TLSIE); @@ -1463,7 +1473,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) { in.got->hasGotOffRel.store(true, std::memory_order_relaxed); } - // Process TLS relocations, including relaxing TLS relocations. Note that + // Process TLS relocations, including TLS optimizations. Note that // R_TPREL and R_TPREL_NEG relocations are resolved in processAux. if (sym.isTls()) { if (unsigned processed = diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6f66f3615fa4..501c10f35849 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1518,12 +1518,12 @@ template <class ELFT> void Writer<ELFT>::sortSections() { if (auto *osd = dyn_cast<OutputDesc>(cmd)) osd->osec.sortRank = getSectionRank(osd->osec); if (!script->hasSectionsCommand) { - // We know that all the OutputSections are contiguous in this case. - auto isSection = [](SectionCommand *cmd) { return isa<OutputDesc>(cmd); }; - std::stable_sort( - llvm::find_if(script->sectionCommands, isSection), - llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(), - compareSections); + // OutputDescs are mostly contiguous, but may be interleaved with + // SymbolAssignments in the presence of INSERT commands. + auto mid = std::stable_partition( + script->sectionCommands.begin(), script->sectionCommands.end(), + [](SectionCommand *cmd) { return isa<OutputDesc>(cmd); }); + std::stable_sort(script->sectionCommands.begin(), mid, compareSections); } // Process INSERT commands and update output section attributes. From this |