diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
commit | fb911942f1434f3d1750f83f25f5e42c80e60638 (patch) | |
tree | 1678c4a4f0182e4029a86d135aa4a1b7d09e3c41 /lib/ReaderWriter/MachO/CompactUnwindPass.cpp |
Notes
Diffstat (limited to 'lib/ReaderWriter/MachO/CompactUnwindPass.cpp')
-rw-r--r-- | lib/ReaderWriter/MachO/CompactUnwindPass.cpp | 530 |
1 files changed, 530 insertions, 0 deletions
diff --git a/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/lib/ReaderWriter/MachO/CompactUnwindPass.cpp new file mode 100644 index 000000000000..fc8608383e5d --- /dev/null +++ b/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -0,0 +1,530 @@ +//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file A pass to convert MachO's __compact_unwind sections into the final +/// __unwind_info format used during runtime. See +/// mach-o/compact_unwind_encoding.h for more details on the formats involved. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +#define DEBUG_TYPE "macho-compact-unwind" + +namespace lld { +namespace mach_o { + +namespace { +struct CompactUnwindEntry { + const Atom *rangeStart; + const Atom *personalityFunction; + const Atom *lsdaLocation; + const Atom *ehFrame; + + uint32_t rangeLength; + + // There are 3 types of compact unwind entry, distinguished by the encoding + // value: 0 indicates a function with no unwind info; + // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to + // __eh_frame, and that the ehFrame entry will be valid; any other value is a + // real compact unwind entry -- personalityFunction will be set and + // lsdaLocation may be. + uint32_t encoding; + + CompactUnwindEntry(const DefinedAtom *function) + : rangeStart(function), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), + encoding(0) {} + + CompactUnwindEntry() + : rangeStart(nullptr), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} +}; + +struct UnwindInfoPage { + std::vector<CompactUnwindEntry> entries; +}; +} + +class UnwindInfoAtom : public SimpleDefinedAtom { +public: + UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, + std::vector<const Atom *> &personalities, + std::vector<uint32_t> &commonEncodings, + std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) + : SimpleDefinedAtom(file), _archHandler(archHandler), + _commonEncodingsOffset(7 * sizeof(uint32_t)), + _personalityArrayOffset(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)), + _topLevelIndexOffset(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)), + _lsdaIndexOffset(_topLevelIndexOffset + + 3 * (pages.size() + 1) * sizeof(uint32_t)), + _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), + _isBig(isBig) { + + addHeader(commonEncodings.size(), personalities.size(), pages.size()); + addCommonEncodings(commonEncodings); + addPersonalityFunctions(personalities); + addTopLevelIndexes(pages); + addLSDAIndexes(pages, numLSDAs); + addSecondLevelPages(pages); + } + + ContentType contentType() const override { + return DefinedAtom::typeProcessedUnwindInfo; + } + + Alignment alignment() const override { return Alignment(2); } + + uint64_t size() const override { return _contents.size(); } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef<uint8_t> rawContent() const override { return _contents; } + + void addHeader(uint32_t numCommon, uint32_t numPersonalities, + uint32_t numPages) { + using normalized::write32; + + uint32_t headerSize = 7 * sizeof(uint32_t); + _contents.resize(headerSize); + + uint8_t *headerEntries = _contents.data(); + // version + write32(headerEntries, 1, _isBig); + // commonEncodingsArraySectionOffset + write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); + // commonEncodingsArrayCount + write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); + // personalityArraySectionOffset + write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, + _isBig); + // personalityArrayCount + write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); + // indexSectionOffset + write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); + // indexCount + write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); + } + + /// Add the list of common encodings to the section; this is simply an array + /// of uint32_t compact values. Size has already been specified in the header. + void addCommonEncodings(std::vector<uint32_t> &commonEncodings) { + using normalized::write32; + + _contents.resize(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)); + uint8_t *commonEncodingsArea = + reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset); + + for (uint32_t encoding : commonEncodings) { + write32(commonEncodingsArea, encoding, _isBig); + commonEncodingsArea += sizeof(uint32_t); + } + } + + void addPersonalityFunctions(std::vector<const Atom *> personalities) { + _contents.resize(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)); + + for (unsigned i = 0; i < personalities.size(); ++i) + addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), + personalities[i]); + } + + void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) { + using normalized::write32; + + uint32_t numIndexes = pages.size() + 1; + _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); + + uint32_t pageLoc = _firstPageOffset; + + // The most difficult job here is calculating the LSDAs; everything else + // follows fairly naturally, but we can't state where the first + uint8_t *indexData = &_contents[_topLevelIndexOffset]; + uint32_t numLSDAs = 0; + for (unsigned i = 0; i < pages.size(); ++i) { + // functionOffset + addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), + pages[i].entries[0].rangeStart); + // secondLevelPagesSectionOffset + write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); + write32(indexData + (3 * i + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + + for (auto &entry : pages[i].entries) + if (entry.lsdaLocation) + ++numLSDAs; + } + + // Finally, write out the final sentinel index + CompactUnwindEntry &finalEntry = pages[pages.size() - 1].entries.back(); + addImageReference(_topLevelIndexOffset + + 3 * pages.size() * sizeof(uint32_t), + finalEntry.rangeStart, finalEntry.rangeLength); + // secondLevelPagesSectionOffset => 0 + write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + } + + void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) { + _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); + + uint32_t curOffset = _lsdaIndexOffset; + for (auto &page : pages) { + for (auto &entry : page.entries) { + if (!entry.lsdaLocation) + continue; + + addImageReference(curOffset, entry.rangeStart); + addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); + curOffset += 2 * sizeof(uint32_t); + } + } + } + + void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) { + for (auto &page : pages) { + addRegularSecondLevelPage(page); + } + } + + void addRegularSecondLevelPage(const UnwindInfoPage &page) { + uint32_t curPageOffset = _contents.size(); + const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); + uint32_t curPageSize = + headerSize + 2 * page.entries.size() * sizeof(uint32_t); + _contents.resize(curPageOffset + curPageSize); + + using normalized::write32; + using normalized::write16; + // 2 => regular page + write32(&_contents[curPageOffset], 2, _isBig); + // offset of 1st entry + write16(&_contents[curPageOffset + 4], headerSize, _isBig); + write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); + + uint32_t pagePos = curPageOffset + headerSize; + for (auto &entry : page.entries) { + addImageReference(pagePos, entry.rangeStart); + + write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, + _isBig); + if ((entry.encoding & 0x0f000000U) == + _archHandler.dwarfCompactUnwindType()) + addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); + + pagePos += 2 * sizeof(uint32_t); + } + } + + void addEhFrameReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); + } + + void addImageReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKind(), offset, dest, addend); + } + + void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKindIndirect(), offset, dest, 0); + } + +private: + mach_o::ArchHandler &_archHandler; + std::vector<uint8_t> _contents; + uint32_t _commonEncodingsOffset; + uint32_t _personalityArrayOffset; + uint32_t _topLevelIndexOffset; + uint32_t _lsdaIndexOffset; + uint32_t _firstPageOffset; + bool _isBig; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class CompactUnwindPass : public Pass { +public: + CompactUnwindPass(const MachOLinkingContext &context) + : _context(context), _archHandler(_context.archHandler()), + _file("<mach-o Compact Unwind Pass>"), + _isBig(MachOLinkingContext::isBigEndian(_context.arch())) {} + +private: + void perform(std::unique_ptr<MutableFile> &mergedFile) override { + DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); + + std::map<const Atom *, CompactUnwindEntry> unwindLocs; + std::map<const Atom *, const Atom *> dwarfFrames; + std::vector<const Atom *> personalities; + uint32_t numLSDAs = 0; + + // First collect all __compact_unwind and __eh_frame entries, addressable by + // the function referred to. + collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, + numLSDAs); + + collectDwarfFrameEntries(mergedFile, dwarfFrames); + + // Skip rest of pass if no unwind info. + if (unwindLocs.empty() && dwarfFrames.empty()) + return; + + // FIXME: if there are more than 4 personality functions then we need to + // defer to DWARF info for the ones we don't put in the list. They should + // also probably be sorted by frequency. + assert(personalities.size() <= 4); + + // TODO: Find commmon encodings for use by compressed pages. + std::vector<uint32_t> commonEncodings; + + // Now sort the entries by final address and fixup the compact encoding to + // its final form (i.e. set personality function bits & create DWARF + // references where needed). + std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries( + mergedFile, unwindLocs, personalities, dwarfFrames); + + // Finally, we can start creating pages based on these entries. + + DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); + // FIXME: we split the entries into pages naively: lots of 4k pages followed + // by a small one. ld64 tried to minimize space and align them to real 4k + // boundaries. That might be worth doing, or perhaps we could perform some + // minor balancing for expected number of lookups. + std::vector<UnwindInfoPage> pages; + unsigned pageStart = 0; + do { + pages.push_back(UnwindInfoPage()); + + // FIXME: we only create regular pages at the moment. These can hold up to + // 1021 entries according to the documentation. + unsigned entriesInPage = + std::min(1021U, (unsigned)unwindInfos.size() - pageStart); + + std::copy(unwindInfos.begin() + pageStart, + unwindInfos.begin() + pageStart + entriesInPage, + std::back_inserter(pages.back().entries)); + pageStart += entriesInPage; + + DEBUG(llvm::dbgs() + << " Page from " << pages.back().entries[0].rangeStart->name() + << " to " << pages.back().entries.back().rangeStart->name() << " + " + << llvm::format("0x%x", pages.back().entries.back().rangeLength) + << " has " << entriesInPage << " entries\n"); + } while (pageStart < unwindInfos.size()); + + UnwindInfoAtom *unwind = new (_file.allocator()) + UnwindInfoAtom(_archHandler, _file, _isBig, personalities, + commonEncodings, pages, numLSDAs); + mergedFile->addAtom(*unwind); + + // Finally, remove all __compact_unwind atoms now that we've processed them. + mergedFile->removeDefinedAtomsIf([](const DefinedAtom *atom) { + return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; + }); + } + + void collectCompactUnwindEntries( + std::unique_ptr<MutableFile> &mergedFile, + std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + std::vector<const Atom *> &personalities, uint32_t &numLSDAs) { + DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); + + for (const DefinedAtom *atom : mergedFile->defined()) { + if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) + continue; + + auto unwindEntry = extractCompactUnwindEntry(atom); + unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); + + DEBUG(llvm::dbgs() << " Entry for " << unwindEntry.rangeStart->name() + << ", encoding=" + << llvm::format("0x%08x", unwindEntry.encoding)); + if (unwindEntry.personalityFunction) + DEBUG(llvm::dbgs() << ", personality=" + << unwindEntry.personalityFunction->name() + << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); + DEBUG(llvm::dbgs() << '\n'); + + // Count number of LSDAs we see, since we need to know how big the index + // will be while laying out the section. + if (unwindEntry.lsdaLocation) + ++numLSDAs; + + // Gather the personality functions now, so that they're in deterministic + // order (derived from the DefinedAtom order). + if (unwindEntry.personalityFunction) { + auto pFunc = std::find(personalities.begin(), personalities.end(), + unwindEntry.personalityFunction); + if (pFunc == personalities.end()) + personalities.push_back(unwindEntry.personalityFunction); + } + } + } + + CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { + CompactUnwindEntry entry; + + for (const Reference *ref : *atom) { + switch (ref->offsetInAtom()) { + case 0: + // FIXME: there could legitimately be functions with multiple encoding + // entries. However, nothing produces them at the moment. + assert(ref->addend() == 0 && "unexpected offset into function"); + entry.rangeStart = ref->target(); + break; + case 0x10: + assert(ref->addend() == 0 && "unexpected offset into personality fn"); + entry.personalityFunction = ref->target(); + break; + case 0x18: + assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); + entry.lsdaLocation = ref->target(); + break; + } + } + + if (atom->rawContent().size() < 4 * sizeof(uint32_t)) + return entry; + + using normalized::read32; + entry.rangeLength = + read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); + entry.encoding = + read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); + return entry; + } + + void + collectDwarfFrameEntries(std::unique_ptr<MutableFile> &mergedFile, + std::map<const Atom *, const Atom *> &dwarfFrames) { + for (const DefinedAtom *ehFrameAtom : mergedFile->defined()) { + if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) + continue; + if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) + continue; + + if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) + dwarfFrames[function] = ehFrameAtom; + } + } + + /// Every atom defined in __TEXT,__text needs an entry in the final + /// __unwind_info section (in order). These comes from two sources: + /// + Input __compact_unwind sections where possible (after adding the + /// personality function offset which is only known now). + /// + A synthesised reference to __eh_frame if there's no __compact_unwind + /// or too many personality functions to be accommodated. + std::vector<CompactUnwindEntry> createUnwindInfoEntries( + const std::unique_ptr<MutableFile> &mergedFile, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + std::vector<CompactUnwindEntry> unwindInfos; + + DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); + // The final order in the __unwind_info section must be derived from the + // order of typeCode atoms, since that's how they'll be put into the object + // file eventually (yuck!). + for (const DefinedAtom *atom : mergedFile->defined()) { + if (atom->contentType() != DefinedAtom::typeCode) + continue; + + unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( + atom, unwindLocs, personalities, dwarfFrames)); + + DEBUG(llvm::dbgs() << " Entry for " << atom->name() + << ", final encoding=" + << llvm::format("0x%08x", unwindInfos.back().encoding) + << '\n'); + } + + return unwindInfos; + } + + CompactUnwindEntry finalizeUnwindInfoEntryForAtom( + const DefinedAtom *function, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + auto unwindLoc = unwindLocs.find(function); + + CompactUnwindEntry entry; + if (unwindLoc == unwindLocs.end()) { + // Default entry has correct encoding (0 => no unwind), but we need to + // synthesise the function. + entry.rangeStart = function; + entry.rangeLength = function->size(); + } else + entry = unwindLoc->second; + + + // If there's no __compact_unwind entry, or it explicitly says to use + // __eh_frame, we need to try and fill in the correct DWARF atom. + if (entry.encoding == _archHandler.dwarfCompactUnwindType() || + entry.encoding == 0) { + auto dwarfFrame = dwarfFrames.find(function); + if (dwarfFrame != dwarfFrames.end()) { + entry.encoding = _archHandler.dwarfCompactUnwindType(); + entry.ehFrame = dwarfFrame->second; + } + } + + + auto personality = std::find(personalities.begin(), personalities.end(), + entry.personalityFunction); + uint32_t personalityIdx = personality == personalities.end() + ? 0 + : personality - personalities.begin() + 1; + + // FIXME: We should also use DWARF when there isn't enough room for the + // personality function in the compact encoding. + assert(personalityIdx < 4 && "too many personality functions"); + + entry.encoding |= personalityIdx << 28; + + if (entry.lsdaLocation) + entry.encoding |= 1U << 30; + + return entry; + } + + const MachOLinkingContext &_context; + mach_o::ArchHandler &_archHandler; + MachOFile _file; + bool _isBig; +}; + +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsCompactUnwindPass()); + pm.add(llvm::make_unique<CompactUnwindPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld |