summaryrefslogtreecommitdiff
path: root/lld/MachO/SyntheticSections.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /lld/MachO/SyntheticSections.cpp
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'lld/MachO/SyntheticSections.cpp')
-rw-r--r--lld/MachO/SyntheticSections.cpp409
1 files changed, 409 insertions, 0 deletions
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
new file mode 100644
index 000000000000..cc0d5a93c40d
--- /dev/null
+++ b/lld/MachO/SyntheticSections.cpp
@@ -0,0 +1,409 @@
+//===- SyntheticSections.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SyntheticSections.h"
+#include "Config.h"
+#include "ExportTrie.h"
+#include "InputFiles.h"
+#include "MachOStructs.h"
+#include "MergedOutputSection.h"
+#include "OutputSegment.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "Writer.h"
+
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::macho;
+
+InStruct macho::in;
+std::vector<SyntheticSection *> macho::syntheticSections;
+
+SyntheticSection::SyntheticSection(const char *segname, const char *name)
+ : OutputSection(SyntheticKind, name), segname(segname) {
+ syntheticSections.push_back(this);
+}
+
+// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
+// from the beginning of the file (i.e. the header).
+MachHeaderSection::MachHeaderSection()
+ : SyntheticSection(segment_names::text, section_names::header) {}
+
+void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
+ loadCommands.push_back(lc);
+ sizeOfCmds += lc->getSize();
+}
+
+uint64_t MachHeaderSection::getSize() const {
+ return sizeof(MachO::mach_header_64) + sizeOfCmds;
+}
+
+void MachHeaderSection::writeTo(uint8_t *buf) const {
+ auto *hdr = reinterpret_cast<MachO::mach_header_64 *>(buf);
+ hdr->magic = MachO::MH_MAGIC_64;
+ hdr->cputype = MachO::CPU_TYPE_X86_64;
+ hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64;
+ hdr->filetype = config->outputType;
+ hdr->ncmds = loadCommands.size();
+ hdr->sizeofcmds = sizeOfCmds;
+ hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL;
+ if (config->outputType == MachO::MH_DYLIB && !config->hasReexports)
+ hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS;
+
+ uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
+ for (LoadCommand *lc : loadCommands) {
+ lc->writeTo(p);
+ p += lc->getSize();
+ }
+}
+
+PageZeroSection::PageZeroSection()
+ : SyntheticSection(segment_names::pageZero, section_names::pageZero) {}
+
+GotSection::GotSection()
+ : SyntheticSection(segment_names::dataConst, section_names::got) {
+ align = 8;
+ flags = MachO::S_NON_LAZY_SYMBOL_POINTERS;
+
+ // TODO: section_64::reserved1 should be an index into the indirect symbol
+ // table, which we do not currently emit
+}
+
+void GotSection::addEntry(Symbol &sym) {
+ if (entries.insert(&sym)) {
+ sym.gotIndex = entries.size() - 1;
+ }
+}
+
+void GotSection::writeTo(uint8_t *buf) const {
+ for (size_t i = 0, n = entries.size(); i < n; ++i)
+ if (auto *defined = dyn_cast<Defined>(entries[i]))
+ write64le(&buf[i * WordSize], defined->getVA());
+}
+
+BindingSection::BindingSection()
+ : SyntheticSection(segment_names::linkEdit, section_names::binding) {}
+
+bool BindingSection::isNeeded() const {
+ return bindings.size() != 0 || in.got->isNeeded();
+}
+
+namespace {
+struct Binding {
+ OutputSegment *segment = nullptr;
+ uint64_t offset = 0;
+ int64_t addend = 0;
+ uint8_t ordinal = 0;
+};
+} // namespace
+
+// Encode a sequence of opcodes that tell dyld to write the address of dysym +
+// addend at osec->addr + outSecOff.
+//
+// The bind opcode "interpreter" remembers the values of each binding field, so
+// we only need to encode the differences between bindings. Hence the use of
+// lastBinding.
+static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec,
+ uint64_t outSecOff, int64_t addend,
+ Binding &lastBinding, raw_svector_ostream &os) {
+ using namespace llvm::MachO;
+ OutputSegment *seg = osec->parent;
+ uint64_t offset = osec->getSegmentOffset() + outSecOff;
+ if (lastBinding.segment != seg) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+ seg->index);
+ encodeULEB128(offset, os);
+ lastBinding.segment = seg;
+ lastBinding.offset = offset;
+ } else if (lastBinding.offset != offset) {
+ assert(lastBinding.offset <= offset);
+ os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
+ encodeULEB128(offset - lastBinding.offset, os);
+ lastBinding.offset = offset;
+ }
+
+ if (lastBinding.ordinal != dysym.file->ordinal) {
+ if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+ dysym.file->ordinal);
+ } else {
+ error("TODO: Support larger dylib symbol ordinals");
+ return;
+ }
+ lastBinding.ordinal = dysym.file->ordinal;
+ }
+
+ if (lastBinding.addend != addend) {
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
+ encodeSLEB128(addend, os);
+ lastBinding.addend = addend;
+ }
+
+ os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+ << dysym.getName() << '\0'
+ << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
+ << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
+ // DO_BIND causes dyld to both perform the binding and increment the offset
+ lastBinding.offset += WordSize;
+}
+
+// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
+// interprets to update a record with the following fields:
+// * segment index (of the segment to write the symbol addresses to, typically
+// the __DATA_CONST segment which contains the GOT)
+// * offset within the segment, indicating the next location to write a binding
+// * symbol type
+// * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)
+// * symbol name
+// * addend
+// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
+// a symbol in the GOT, and increments the segment offset to point to the next
+// entry. It does *not* clear the record state after doing the bind, so
+// subsequent opcodes only need to encode the differences between bindings.
+void BindingSection::finalizeContents() {
+ raw_svector_ostream os{contents};
+ Binding lastBinding;
+ bool didEncode = false;
+ size_t gotIdx = 0;
+ for (const Symbol *sym : in.got->getEntries()) {
+ if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
+ didEncode = true;
+ encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os);
+ }
+ ++gotIdx;
+ }
+
+ // Sorting the relocations by segment and address allows us to encode them
+ // more compactly.
+ llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
+ OutputSegment *segA = a.isec->parent->parent;
+ OutputSegment *segB = b.isec->parent->parent;
+ if (segA != segB)
+ return segA->fileOff < segB->fileOff;
+ OutputSection *osecA = a.isec->parent;
+ OutputSection *osecB = b.isec->parent;
+ if (osecA != osecB)
+ return osecA->addr < osecB->addr;
+ if (a.isec != b.isec)
+ return a.isec->outSecOff < b.isec->outSecOff;
+ return a.offset < b.offset;
+ });
+ for (const BindingEntry &b : bindings) {
+ didEncode = true;
+ encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset,
+ b.addend, lastBinding, os);
+ }
+ if (didEncode)
+ os << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
+}
+
+void BindingSection::writeTo(uint8_t *buf) const {
+ memcpy(buf, contents.data(), contents.size());
+}
+
+StubsSection::StubsSection()
+ : SyntheticSection(segment_names::text, "__stubs") {}
+
+uint64_t StubsSection::getSize() const {
+ return entries.size() * target->stubSize;
+}
+
+void StubsSection::writeTo(uint8_t *buf) const {
+ size_t off = 0;
+ for (const DylibSymbol *sym : in.stubs->getEntries()) {
+ target->writeStub(buf + off, *sym);
+ off += target->stubSize;
+ }
+}
+
+void StubsSection::addEntry(DylibSymbol &sym) {
+ if (entries.insert(&sym))
+ sym.stubsIndex = entries.size() - 1;
+}
+
+StubHelperSection::StubHelperSection()
+ : SyntheticSection(segment_names::text, "__stub_helper") {}
+
+uint64_t StubHelperSection::getSize() const {
+ return target->stubHelperHeaderSize +
+ in.stubs->getEntries().size() * target->stubHelperEntrySize;
+}
+
+bool StubHelperSection::isNeeded() const {
+ return !in.stubs->getEntries().empty();
+}
+
+void StubHelperSection::writeTo(uint8_t *buf) const {
+ target->writeStubHelperHeader(buf);
+ size_t off = target->stubHelperHeaderSize;
+ for (const DylibSymbol *sym : in.stubs->getEntries()) {
+ target->writeStubHelperEntry(buf + off, *sym, addr + off);
+ off += target->stubHelperEntrySize;
+ }
+}
+
+void StubHelperSection::setup() {
+ stubBinder = dyn_cast_or_null<DylibSymbol>(symtab->find("dyld_stub_binder"));
+ if (stubBinder == nullptr) {
+ error("symbol dyld_stub_binder not found (normally in libSystem.dylib). "
+ "Needed to perform lazy binding.");
+ return;
+ }
+ in.got->addEntry(*stubBinder);
+
+ inputSections.push_back(in.imageLoaderCache);
+ symtab->addDefined("__dyld_private", in.imageLoaderCache, 0);
+}
+
+ImageLoaderCacheSection::ImageLoaderCacheSection() {
+ segname = segment_names::data;
+ name = "__data";
+ uint8_t *arr = bAlloc.Allocate<uint8_t>(WordSize);
+ memset(arr, 0, WordSize);
+ data = {arr, WordSize};
+}
+
+LazyPointerSection::LazyPointerSection()
+ : SyntheticSection(segment_names::data, "__la_symbol_ptr") {
+ align = 8;
+ flags = MachO::S_LAZY_SYMBOL_POINTERS;
+}
+
+uint64_t LazyPointerSection::getSize() const {
+ return in.stubs->getEntries().size() * WordSize;
+}
+
+bool LazyPointerSection::isNeeded() const {
+ return !in.stubs->getEntries().empty();
+}
+
+void LazyPointerSection::writeTo(uint8_t *buf) const {
+ size_t off = 0;
+ for (const DylibSymbol *sym : in.stubs->getEntries()) {
+ uint64_t stubHelperOffset = target->stubHelperHeaderSize +
+ sym->stubsIndex * target->stubHelperEntrySize;
+ write64le(buf + off, in.stubHelper->addr + stubHelperOffset);
+ off += WordSize;
+ }
+}
+
+LazyBindingSection::LazyBindingSection()
+ : SyntheticSection(segment_names::linkEdit, section_names::lazyBinding) {}
+
+bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); }
+
+void LazyBindingSection::finalizeContents() {
+ // TODO: Just precompute output size here instead of writing to a temporary
+ // buffer
+ for (DylibSymbol *sym : in.stubs->getEntries())
+ sym->lazyBindOffset = encode(*sym);
+}
+
+void LazyBindingSection::writeTo(uint8_t *buf) const {
+ memcpy(buf, contents.data(), contents.size());
+}
+
+// Unlike the non-lazy binding section, the bind opcodes in this section aren't
+// interpreted all at once. Rather, dyld will start interpreting opcodes at a
+// given offset, typically only binding a single symbol before it finds a
+// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
+// we cannot encode just the differences between symbols; we have to emit the
+// complete bind information for each symbol.
+uint32_t LazyBindingSection::encode(const DylibSymbol &sym) {
+ uint32_t opstreamOffset = contents.size();
+ OutputSegment *dataSeg = in.lazyPointers->parent;
+ os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+ dataSeg->index);
+ uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
+ sym.stubsIndex * WordSize;
+ encodeULEB128(offset, os);
+ if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK)
+ os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+ sym.file->ordinal);
+ else
+ fatal("TODO: Support larger dylib symbol ordinals");
+
+ os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+ << sym.getName() << '\0'
+ << static_cast<uint8_t>(MachO::BIND_OPCODE_DO_BIND)
+ << static_cast<uint8_t>(MachO::BIND_OPCODE_DONE);
+ return opstreamOffset;
+}
+
+ExportSection::ExportSection()
+ : SyntheticSection(segment_names::linkEdit, section_names::export_) {}
+
+void ExportSection::finalizeContents() {
+ // TODO: We should check symbol visibility.
+ for (const Symbol *sym : symtab->getSymbols())
+ if (auto *defined = dyn_cast<Defined>(sym))
+ trieBuilder.addSymbol(*defined);
+ size = trieBuilder.build();
+}
+
+void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
+
+SymtabSection::SymtabSection(StringTableSection &stringTableSection)
+ : SyntheticSection(segment_names::linkEdit, section_names::symbolTable),
+ stringTableSection(stringTableSection) {
+ // TODO: When we introduce the SyntheticSections superclass, we should make
+ // all synthetic sections aligned to WordSize by default.
+ align = WordSize;
+}
+
+uint64_t SymtabSection::getSize() const {
+ return symbols.size() * sizeof(structs::nlist_64);
+}
+
+void SymtabSection::finalizeContents() {
+ // TODO support other symbol types
+ for (Symbol *sym : symtab->getSymbols())
+ if (isa<Defined>(sym))
+ symbols.push_back({sym, stringTableSection.addString(sym->getName())});
+}
+
+void SymtabSection::writeTo(uint8_t *buf) const {
+ auto *nList = reinterpret_cast<structs::nlist_64 *>(buf);
+ for (const SymtabEntry &entry : symbols) {
+ nList->n_strx = entry.strx;
+ // TODO support other symbol types
+ // TODO populate n_desc
+ if (auto *defined = dyn_cast<Defined>(entry.sym)) {
+ nList->n_type = MachO::N_EXT | MachO::N_SECT;
+ nList->n_sect = defined->isec->parent->index;
+ // For the N_SECT symbol type, n_value is the address of the symbol
+ nList->n_value = defined->value + defined->isec->getVA();
+ }
+ ++nList;
+ }
+}
+
+StringTableSection::StringTableSection()
+ : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {}
+
+uint32_t StringTableSection::addString(StringRef str) {
+ uint32_t strx = size;
+ strings.push_back(str);
+ size += str.size() + 1; // account for null terminator
+ return strx;
+}
+
+void StringTableSection::writeTo(uint8_t *buf) const {
+ uint32_t off = 0;
+ for (StringRef str : strings) {
+ memcpy(buf + off, str.data(), str.size());
+ off += str.size() + 1; // account for null terminator
+ }
+}