diff options
Diffstat (limited to 'contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp')
| -rw-r--r-- | contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp | 1634 |
1 files changed, 1634 insertions, 0 deletions
diff --git a/contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp new file mode 100644 index 000000000000..879f07fb4760 --- /dev/null +++ b/contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -0,0 +1,1634 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory normalized mach-o to in-memory Atoms. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// | +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "ArchHandler.h" +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Common/LLVM.h" +#include "lld/Core/Error.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +#define DEBUG_TYPE "normalized-file-to-atoms" + +namespace lld { +namespace mach_o { + + +namespace { // anonymous + + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachORelocatableSectionToAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachORelocatableSectionToAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("", "", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), + ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), + ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), + ENTRY("", "", S_INTERPOSING, typeInterposingTuples), + ENTRY("__LD", "__compact_unwind", S_REGULAR, + typeCompactUnwindInfo), + ENTRY("", "", S_REGULAR, typeUnknown) +}; +#undef ENTRY + + +/// Figures out ContentType of a mach-o section. +DefinedAtom::ContentType atomTypeFromSection(const Section §ion, + bool &customSectionName) { + // First look for match of name and type. Empty names in table are wildcards. + customSectionName = false; + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->sectionType != section.type) + continue; + if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) + continue; + if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) + continue; + customSectionName = p->segmentName.empty() && p->sectionName.empty(); + return p->atomType; + } + // Look for code denoted by section attributes + if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) + return DefinedAtom::typeCode; + + return DefinedAtom::typeUnknown; +} + +enum AtomizeModel { + atomizeAtSymbols, + atomizeFixedSize, + atomizePointerSize, + atomizeUTF8, + atomizeUTF16, + atomizeCFI, + atomizeCU, + atomizeCFString +}; + +/// Returns info on how to atomize a section of the specified ContentType. +void sectionParseInfo(DefinedAtom::ContentType atomType, + unsigned int &sizeMultiple, + DefinedAtom::Scope &scope, + DefinedAtom::Merge &merge, + AtomizeModel &atomizeModel) { + struct ParseInfo { + DefinedAtom::ContentType atomType; + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + }; + + #define ENTRY(type, size, scope, merge, model) \ + {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } + + static const ParseInfo parseInfo[] = { + ENTRY(typeCode, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstant, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF8), + ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF16), + ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, + atomizeCFI), + ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, + atomizeCFString), + ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, + atomizeCU), + ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, + atomizeAtSymbols) + }; + #undef ENTRY + const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); + for (int i=0; i < tableLen; ++i) { + if (parseInfo[i].atomType == atomType) { + sizeMultiple = parseInfo[i].sizeMultiple; + scope = parseInfo[i].scope; + merge = parseInfo[i].merge; + atomizeModel = parseInfo[i].atomizeModel; + return; + } + } + + // Unknown type is atomized by symbols. + sizeMultiple = 1; + scope = DefinedAtom::scopeGlobal; + merge = DefinedAtom::mergeNo; + atomizeModel = atomizeAtSymbols; +} + + +Atom::Scope atomScope(uint8_t scope) { + switch (scope) { + case N_EXT: + return Atom::scopeGlobal; + case N_PEXT: + case N_PEXT | N_EXT: + return Atom::scopeLinkageUnit; + case 0: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("unknown scope value!"); +} + +void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, + uint32_t sectionIndex, + SmallVector<const Symbol *, 64> &outSyms) { + for (const Symbol &sym : inSymbols) { + // Only look at definition symbols. + if ((sym.type & N_TYPE) != N_SECT) + continue; + if (sym.sect != sectionIndex) + continue; + outSyms.push_back(&sym); + } +} + +void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, + MachOFile &file, uint64_t symbolAddr, StringRef symbolName, + uint16_t symbolDescFlags, Atom::Scope symbolScope, + uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { + // Mach-O symbol table does have size in it. Instead the size is the + // difference between this and the next symbol. + uint64_t size = nextSymbolAddr - symbolAddr; + uint64_t offset = symbolAddr - section.address; + bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; + if (isZeroFillSection(section.type)) { + file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, + noDeadStrip, copyRefs, §ion); + } else { + DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) + ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; + bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); + if (atomType == DefinedAtom::typeUnknown) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, + merge, thumb, noDeadStrip, offset, + size, segSectName, true, §ion); + } else { + if ((atomType == lld::DefinedAtom::typeCode) && + (symbolDescFlags & N_SYMBOL_RESOLVER)) { + atomType = lld::DefinedAtom::typeResolver; + } + file.addDefinedAtom(symbolName, symbolScope, atomType, merge, + offset, size, thumb, noDeadStrip, copyRefs, §ion); + } + } +} + +llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, + const Section §ion, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + // Find section's index. + uint32_t sectIndex = 1; + for (auto § : normalizedFile.sections) { + if (§ == §ion) + break; + ++sectIndex; + } + + // Find all symbols in this section. + SmallVector<const Symbol *, 64> symbols; + appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); + appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); + + // Sort symbols. + std::sort(symbols.begin(), symbols.end(), + [](const Symbol *lhs, const Symbol *rhs) -> bool { + if (lhs == rhs) + return false; + // First by address. + uint64_t lhsAddr = lhs->value; + uint64_t rhsAddr = rhs->value; + if (lhsAddr != rhsAddr) + return lhsAddr < rhsAddr; + // If same address, one is an alias so sort by scope. + Atom::Scope lScope = atomScope(lhs->scope); + Atom::Scope rScope = atomScope(rhs->scope); + if (lScope != rScope) + return lScope < rScope; + // If same address and scope, see if one might be better as + // the alias. + bool lPrivate = (lhs->name.front() == 'l'); + bool rPrivate = (rhs->name.front() == 'l'); + if (lPrivate != rPrivate) + return lPrivate; + // If same address and scope, sort by name. + return lhs->name < rhs->name; + }); + + // Debug logging of symbols. + //for (const Symbol *sym : symbols) + // llvm::errs() << " sym: " + // << llvm::format("0x%08llx ", (uint64_t)sym->value) + // << ", " << sym->name << "\n"; + + // If section has no symbols and no content, there are no atoms. + if (symbols.empty() && section.content.empty()) + return llvm::Error::success(); + + if (symbols.empty()) { + // Section has no symbols, put all content in one anoymous atom. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, + section.address + section.content.size(), + scatterable, copyRefs); + } + else if (symbols.front()->value != section.address) { + // Section has anonymous content before first symbol. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, symbols.front()->value, + scatterable, copyRefs); + } + + const Symbol *lastSym = nullptr; + for (const Symbol *sym : symbols) { + if (lastSym != nullptr) { + // Ignore any assembler added "ltmpNNN" symbol at start of section + // if there is another symbol at the start. + if ((lastSym->value != sym->value) + || lastSym->value != section.address + || !lastSym->name.startswith("ltmp")) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), sym->value, + scatterable, copyRefs); + } + } + lastSym = sym; + } + if (lastSym != nullptr) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), + section.address + section.content.size(), + scatterable, copyRefs); + } + + // If object built without .subsections_via_symbols, add reference chain. + if (!scatterable) { + MachODefinedAtom *prevAtom = nullptr; + file.eachAtomInSection(section, + [&](MachODefinedAtom *atom, uint64_t offset)->void { + if (prevAtom) + prevAtom->addReference(Reference::KindNamespace::all, + Reference::KindArch::all, + Reference::kindLayoutAfter, 0, atom, 0); + prevAtom = atom; + }); + } + + return llvm::Error::success(); +} + +llvm::Error processSection(DefinedAtom::ContentType atomType, + const Section §ion, + bool customSectionName, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + + // Get info on how to atomize section. + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); + + // Validate section size. + if ((section.content.size() % sizeMultiple) != 0) + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " has size (" + + Twine(section.content.size()) + + ") which is not a multiple of " + + Twine(sizeMultiple)); + + if (atomizeModel == atomizeAtSymbols) { + // Break section up into atoms each with a fixed size. + return processSymboledSection(atomType, section, normalizedFile, file, + scatterable, copyRefs); + } else { + unsigned int size; + for (unsigned int offset = 0, e = section.content.size(); offset != e;) { + switch (atomizeModel) { + case atomizeFixedSize: + // Break section up into atoms each with a fixed size. + size = sizeMultiple; + break; + case atomizePointerSize: + // Break section up into atoms each the size of a pointer. + size = is64 ? 8 : 4; + break; + case atomizeUTF8: + // Break section up into zero terminated c-strings. + size = 0; + for (unsigned int i = offset; i < e; ++i) { + if (section.content[i] == 0) { + size = i + 1 - offset; + break; + } + } + break; + case atomizeUTF16: + // Break section up into zero terminated UTF16 strings. + size = 0; + for (unsigned int i = offset; i < e; i += 2) { + if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { + size = i + 2 - offset; + break; + } + } + break; + case atomizeCFI: + // Break section up into dwarf unwind CFIs (FDE or CIE). + size = read32(§ion.content[offset], isBig) + 4; + if (offset+size > section.content.size()) { + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. Size of CFI " + "starting at offset (" + + Twine(offset) + + ") is past end of section."); + } + break; + case atomizeCU: + // Break section up into compact unwind entries. + size = is64 ? 32 : 20; + break; + case atomizeCFString: + // Break section up into NS/CFString objects. + size = is64 ? 32 : 16; + break; + case atomizeAtSymbols: + break; + } + if (size == 0) { + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. The last atom " + "is not zero terminated."); + } + if (customSectionName) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, + merge, false, false, offset, + size, segSectName, true, §ion); + } else { + file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, + false, false, copyRefs, §ion); + } + offset += size; + } + } + return llvm::Error::success(); +} + +const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, + uint64_t address) { + for (const Section &s : normalizedFile.sections) { + uint64_t sAddr = s.address; + if ((sAddr <= address) && (address < sAddr+s.content.size())) { + return &s; + } + } + return nullptr; +} + +const MachODefinedAtom * +findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, + uint64_t addr, Reference::Addend &addend) { + const Section *sect = nullptr; + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return nullptr; + + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + auto atom = + file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + addend = offsetInTarget; + return atom; +} + +// Walks all relocations for a section in a normalized .o file and +// creates corresponding lld::Reference objects. +llvm::Error convertRelocs(const Section §ion, + const NormalizedFile &normalizedFile, + bool scatterable, + MachOFile &file, + ArchHandler &handler) { + // Utility function for ArchHandler to find atom by its address. + auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, + const lld::Atom **atom, Reference::Addend *addend) + -> llvm::Error { + if (sectIndex > normalizedFile.sections.size()) + return llvm::make_error<GenericError>(Twine("out of range section " + "index (") + Twine(sectIndex) + ")"); + const Section *sect = nullptr; + if (sectIndex == 0) { + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) + + ") is not in any section")); + } else { + sect = &normalizedFile.sections[sectIndex-1]; + } + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return llvm::Error::success(); + }; + + // Utility function for ArchHandler to find atom by its symbol index. + auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) + -> llvm::Error { + // Find symbol from index. + const Symbol *sym = nullptr; + uint32_t numStabs = normalizedFile.stabsSymbols.size(); + uint32_t numLocal = normalizedFile.localSymbols.size(); + uint32_t numGlobal = normalizedFile.globalSymbols.size(); + uint32_t numUndef = normalizedFile.undefinedSymbols.size(); + assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); + if (symbolIndex < numStabs+numLocal) { + sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; + } else if (symbolIndex < numStabs+numLocal+numGlobal) { + sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; + } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { + sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- + numGlobal]; + } else { + return llvm::make_error<GenericError>(Twine("symbol index (") + + Twine(symbolIndex) + ") out of range"); + } + + // Find atom from symbol. + if ((sym->type & N_TYPE) == N_SECT) { + if (sym->sect > normalizedFile.sections.size()) + return llvm::make_error<GenericError>(Twine("symbol section index (") + + Twine(sym->sect) + ") out of range "); + const Section &symSection = normalizedFile.sections[sym->sect-1]; + uint64_t targetOffsetInSect = sym->value - symSection.address; + MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, + targetOffsetInSect); + if (target) { + *result = target; + return llvm::Error::success(); + } + return llvm::make_error<GenericError>("no atom found for defined symbol"); + } else if ((sym->type & N_TYPE) == N_UNDF) { + const lld::Atom *target = file.findUndefAtom(sym->name); + if (target) { + *result = target; + return llvm::Error::success(); + } + return llvm::make_error<GenericError>("no undefined atom found for sym"); + } else { + // Search undefs + return llvm::make_error<GenericError>("no atom found for symbol"); + } + }; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + // Use old-school iterator so that paired relocations can be grouped. + for (auto it=section.relocations.begin(), e=section.relocations.end(); + it != e; ++it) { + const Relocation &reloc = *it; + // Find atom this relocation is in. + if (reloc.offset > section.content.size()) + return llvm::make_error<GenericError>( + Twine("r_address (") + Twine(reloc.offset) + + ") is larger than section size (" + + Twine(section.content.size()) + ")"); + uint32_t offsetInAtom; + MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, + reloc.offset, + &offsetInAtom); + assert(inAtom && "r_address in range, should have found atom"); + uint64_t fixupAddress = section.address + reloc.offset; + + const lld::Atom *target = nullptr; + Reference::Addend addend = 0; + Reference::KindValue kind; + if (handler.isPairedReloc(reloc)) { + // Handle paired relocations together. + const Relocation &reloc2 = *++it; + auto relocErr = handler.getPairReferenceInfo( + reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, + atomByAddr, atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr<GenericError> GE) { + return llvm::make_error<GenericError>( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r1_address=" + Twine::utohexstr(reloc.offset) + + ", r1_type=" + Twine(reloc.type) + + ", r1_extern=" + Twine(reloc.isExtern) + + ", r1_length=" + Twine((int)reloc.length) + + ", r1_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r1_symbolnum=") + + Twine(reloc.symbol)) + : (Twine(", r1_scattered=1, r1_value=") + + Twine(reloc.value))) + + ")" + + ", (r2_address=" + Twine::utohexstr(reloc2.offset) + + ", r2_type=" + Twine(reloc2.type) + + ", r2_extern=" + Twine(reloc2.isExtern) + + ", r2_length=" + Twine((int)reloc2.length) + + ", r2_pcrel=" + Twine(reloc2.pcRel) + + (!reloc2.scattered ? (Twine(", r2_symbolnum=") + + Twine(reloc2.symbol)) + : (Twine(", r2_scattered=1, r2_value=") + + Twine(reloc2.value))) + + ")" ); + }); + } + } + else { + // Use ArchHandler to convert relocation record into information + // needed to instantiate an lld::Reference object. + auto relocErr = handler.getReferenceInfo( + reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, + atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr<GenericError> GE) { + return llvm::make_error<GenericError>( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r_address=" + Twine::utohexstr(reloc.offset) + + ", r_type=" + Twine(reloc.type) + + ", r_extern=" + Twine(reloc.isExtern) + + ", r_length=" + Twine((int)reloc.length) + + ", r_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) + : (Twine(", r_scattered=1, r_value=") + + Twine(reloc.value))) + + ")" ); + }); + } + } + // Instantiate an lld::Reference object and add to its atom. + inAtom->addReference(Reference::KindNamespace::mach_o, + handler.kindArch(), + kind, offsetInAtom, target, addend); + } + + return llvm::Error::success(); +} + +bool isDebugInfoSection(const Section §ion) { + if ((section.attributes & S_ATTR_DEBUG) == 0) + return false; + return section.segmentName.equals("__DWARF"); +} + +static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { + std::string strName = name.str(); + for (auto *atom : file.defined()) + if (atom->name() == strName) + return atom; + return nullptr; +} + +static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { + char *strCopy = alloc.Allocate<char>(str.size() + 1); + memcpy(strCopy, str.data(), str.size()); + strCopy[str.size()] = '\0'; + return strCopy; +} + +llvm::Error parseStabs(MachOFile &file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + + if (normalizedFile.stabsSymbols.empty()) + return llvm::Error::success(); + + // FIXME: Kill this off when we can move to sane yaml parsing. + std::unique_ptr<BumpPtrAllocator> allocator; + if (copyRefs) + allocator = llvm::make_unique<BumpPtrAllocator>(); + + enum { start, inBeginEnd } state = start; + + const Atom *currentAtom = nullptr; + uint64_t currentAtomAddress = 0; + StabsDebugInfo::StabsList stabsList; + for (const auto &stabSym : normalizedFile.stabsSymbols) { + Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, + stabSym.value, stabSym.name); + switch (state) { + case start: + switch (static_cast<StabType>(stabSym.type)) { + case N_BNSYM: + state = inBeginEnd; + currentAtomAddress = stabSym.value; + Reference::Addend addend; + currentAtom = findAtomCoveringAddress(normalizedFile, file, + currentAtomAddress, addend); + if (addend != 0) + return llvm::make_error<GenericError>( + "Non-zero addend for BNSYM '" + stabSym.name + "' in " + + file.path()); + if (currentAtom) + stab.atom = currentAtom; + else { + // FIXME: ld64 just issues a warning here - should we match that? + return llvm::make_error<GenericError>( + "can't find atom for stabs BNSYM at " + + Twine::utohexstr(stabSym.value) + " in " + file.path()); + } + break; + case N_SO: + case N_OSO: + // Not associated with an atom, just copy. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + case N_GSYM: { + auto colonIdx = stabSym.name.find(':'); + if (colonIdx != StringRef::npos) { + StringRef name = stabSym.name.substr(0, colonIdx); + currentAtom = findDefinedAtomByName(file, "_" + name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } else { + currentAtom = findDefinedAtomByName(file, stabSym.name); + stab.atom = currentAtom; + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + } + if (stab.atom == nullptr) + return llvm::make_error<GenericError>( + "can't find atom for N_GSYM stabs" + stabSym.name + + " in " + file.path()); + break; + } + case N_FUN: + return llvm::make_error<GenericError>( + "old-style N_FUN stab '" + stabSym.name + "' unsupported"); + default: + return llvm::make_error<GenericError>( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + break; + case inBeginEnd: + stab.atom = currentAtom; + switch (static_cast<StabType>(stabSym.type)) { + case N_ENSYM: + state = start; + currentAtom = nullptr; + break; + case N_FUN: + // Just copy the string. + if (copyRefs) + stab.str = copyDebugString(stabSym.name, *allocator); + else + stab.str = stabSym.name; + break; + default: + return llvm::make_error<GenericError>( + "unrecognized stab symbol '" + stabSym.name + "'"); + } + } + llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; + stabsList.push_back(stab); + } + + file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList))); + + // FIXME: Kill this off when we fix YAML memory ownership. + file.debugInfo()->setAllocator(std::move(allocator)); + + return llvm::Error::success(); +} + +static llvm::DataExtractor +dataExtractorFromSection(const NormalizedFile &normalizedFile, + const Section &S) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + StringRef SecData(reinterpret_cast<const char*>(S.content.data()), + S.content.size()); + return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, + uint64_t abbrCode) { + uint64_t curCode; + uint32_t offset = 0; + while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) { + // Tag + abbrevData.getULEB128(&offset); + // DW_CHILDREN + abbrevData.getU8(&offset); + // Attributes + while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) + ; + } + return offset; +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static Expected<const char *> +getIndexedString(const NormalizedFile &normalizedFile, + llvm::dwarf::Form form, llvm::DataExtractor infoData, + uint32_t &infoOffset, const Section &stringsSection) { + if (form == llvm::dwarf::DW_FORM_string) + return infoData.getCStr(&infoOffset); + if (form != llvm::dwarf::DW_FORM_strp) + return llvm::make_error<GenericError>( + "string field encoded without DW_FORM_strp"); + uint32_t stringOffset = infoData.getU32(&infoOffset); + llvm::DataExtractor stringsData = + dataExtractorFromSection(normalizedFile, stringsSection); + return stringsData.getCStr(&stringOffset); +} + +// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE +// inspection" code if possible. +static llvm::Expected<TranslationUnitSource> +readCompUnit(const NormalizedFile &normalizedFile, + const Section &info, + const Section &abbrev, + const Section &strings, + StringRef path) { + // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE + // inspection" code if possible. + uint32_t offset = 0; + llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; + auto infoData = dataExtractorFromSection(normalizedFile, info); + uint32_t length = infoData.getU32(&offset); + if (length == 0xffffffff) { + Format = llvm::dwarf::DwarfFormat::DWARF64; + infoData.getU64(&offset); + } + else if (length > 0xffffff00) + return llvm::make_error<GenericError>("Malformed DWARF in " + path); + + uint16_t version = infoData.getU16(&offset); + + if (version < 2 || version > 4) + return llvm::make_error<GenericError>("Unsupported DWARF version in " + + path); + + infoData.getU32(&offset); // Abbrev offset (should be zero) + uint8_t addrSize = infoData.getU8(&offset); + + uint32_t abbrCode = infoData.getULEB128(&offset); + auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); + uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); + uint64_t tag = abbrevData.getULEB128(&abbrevOffset); + if (tag != llvm::dwarf::DW_TAG_compile_unit) + return llvm::make_error<GenericError>("top level DIE is not a compile unit"); + // DW_CHILDREN + abbrevData.getU8(&abbrevOffset); + uint32_t name; + llvm::dwarf::Form form; + llvm::dwarf::FormParams formParams = {version, addrSize, Format}; + TranslationUnitSource tu; + while ((name = abbrevData.getULEB128(&abbrevOffset)) | + (form = static_cast<llvm::dwarf::Form>( + abbrevData.getULEB128(&abbrevOffset))) && + (name != 0 || form != 0)) { + switch (name) { + case llvm::dwarf::DW_AT_name: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.name = *eName; + else + return eName.takeError(); + break; + } + case llvm::dwarf::DW_AT_comp_dir: { + if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, + strings)) + tu.path = *eName; + else + return eName.takeError(); + break; + } + default: + llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); + } + } + return tu; +} + +llvm::Error parseDebugInfo(MachOFile &file, + const NormalizedFile &normalizedFile, bool copyRefs) { + + // Find the interesting debug info sections. + const Section *debugInfo = nullptr; + const Section *debugAbbrev = nullptr; + const Section *debugStrings = nullptr; + + for (auto &s : normalizedFile.sections) { + if (s.segmentName == "__DWARF") { + if (s.sectionName == "__debug_info") + debugInfo = &s; + else if (s.sectionName == "__debug_abbrev") + debugAbbrev = &s; + else if (s.sectionName == "__debug_str") + debugStrings = &s; + } + } + + if (!debugInfo) + return parseStabs(file, normalizedFile, copyRefs); + + if (debugInfo->content.size() == 0) + return llvm::Error::success(); + + if (debugInfo->content.size() < 12) + return llvm::make_error<GenericError>("Malformed __debug_info section in " + + file.path() + ": too small"); + + if (!debugAbbrev) + return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + + file.path()); + + if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, + *debugStrings, file.path())) { + // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML + // memory ownership. + std::unique_ptr<BumpPtrAllocator> allocator; + if (copyRefs) { + allocator = llvm::make_unique<BumpPtrAllocator>(); + tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); + tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); + } + file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); + if (copyRefs) + file.debugInfo()->setAllocator(std::move(allocator)); + } else + return tuOrErr.takeError(); + + return llvm::Error::success(); +} + +static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { + if (is64) + return read64(addr, isBig); + + int32_t res = read32(addr, isBig); + return res; +} + +/// --- Augmentation String Processing --- + +struct CIEInfo { + bool _augmentationDataPresent = false; + bool _mayHaveEH = false; + uint32_t _offsetOfLSDA = ~0U; + uint32_t _offsetOfPersonality = ~0U; + uint32_t _offsetOfFDEPointerEncoding = ~0U; + uint32_t _augmentationDataLength = ~0U; +}; + +typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; + +static llvm::Error processAugmentationString(const uint8_t *augStr, + CIEInfo &cieInfo, + unsigned &len) { + + if (augStr[0] == '\0') { + len = 1; + return llvm::Error::success(); + } + + if (augStr[0] != 'z') + return llvm::make_error<GenericError>("expected 'z' at start of " + "augmentation string"); + + cieInfo._augmentationDataPresent = true; + uint64_t idx = 1; + + uint32_t offsetInAugmentationData = 0; + while (augStr[idx] != '\0') { + if (augStr[idx] == 'L') { + cieInfo._offsetOfLSDA = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'P') { + cieInfo._offsetOfPersonality = offsetInAugmentationData; + // This adds a single byte to the augmentation data for the encoding, + // then a number of bytes for the pointer data. + // FIXME: We are assuming 4 is correct here for the pointer size as we + // always currently use delta32ToGOT. + offsetInAugmentationData += 5; + ++idx; + continue; + } + if (augStr[idx] == 'R') { + cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'e') { + if (augStr[idx + 1] != 'h') + return llvm::make_error<GenericError>("expected 'eh' in " + "augmentation string"); + cieInfo._mayHaveEH = true; + idx += 2; + continue; + } + ++idx; + } + + cieInfo._augmentationDataLength = offsetInAugmentationData; + + len = idx + 1; + return llvm::Error::success(); +} + +static llvm::Error processCIE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + CIEInfoMap &cieInfos) { + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const uint8_t *frameData = atom->rawContent().data(); + + CIEInfo cieInfo; + + uint32_t size = read32(frameData, isBig); + uint64_t cieIDField = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + uint64_t versionField = cieIDField + sizeof(uint32_t); + uint64_t augmentationStringField = versionField + sizeof(uint8_t); + + unsigned augmentationStringLength = 0; + if (auto err = processAugmentationString(frameData + augmentationStringField, + cieInfo, augmentationStringLength)) + return err; + + if (cieInfo._offsetOfPersonality != ~0U) { + // If we have augmentation data for the personality function, then we may + // need to implicitly generate its relocation. + + // Parse the EH Data field which is pointer sized. + uint64_t EHDataField = augmentationStringField + augmentationStringLength; + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); + + // Parse Code Align Factor which is a ULEB128. + uint64_t CodeAlignField = EHDataField + EHDataFieldSize; + unsigned lengthFieldSize = 0; + llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); + + // Parse Data Align Factor which is a SLEB128. + uint64_t DataAlignField = CodeAlignField + lengthFieldSize; + llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); + + // Parse Return Address Register which is a byte. + uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; + + // Parse the augmentation length which is a ULEB128. + uint64_t AugmentationLengthField = ReturnAddressField + 1; + uint64_t AugmentationLength = + llvm::decodeULEB128(frameData + AugmentationLengthField, + &lengthFieldSize); + + if (AugmentationLength != cieInfo._augmentationDataLength) + return llvm::make_error<GenericError>("CIE augmentation data length " + "mismatch"); + + // Get the start address of the augmentation data. + uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; + + // Parse the personality function from the augmentation data. + uint64_t PersonalityField = + AugmentationDataField + cieInfo._offsetOfPersonality; + + // Parse the personality encoding. + // FIXME: Verify that this is a 32-bit pcrel offset. + uint64_t PersonalityFunctionField = PersonalityField + 1; + + if (atom->begin() != atom->end()) { + // If we have an explicit relocation, then make sure it matches this + // offset as this is where we'd expect it to be applied to. + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + if (CurrentRef->offsetInAtom() != PersonalityFunctionField) + return llvm::make_error<GenericError>("CIE personality reloc at " + "wrong offset"); + + if (++CurrentRef != atom->end()) + return llvm::make_error<GenericError>("CIE contains too many relocs"); + } else { + // Implicitly generate the personality function reloc. It's assumed to + // be a delta32 offset to a GOT entry. + // FIXME: Parse the encoding and check this. + int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); + uint64_t funcAddress = ehFrameSection->address + offset + + PersonalityFunctionField; + funcAddress += funcDelta; + + const MachODefinedAtom *func = nullptr; + Reference::Addend addend; + func = findAtomCoveringAddress(normalizedFile, file, funcAddress, + addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + handler.unwindRefToPersonalityFunctionKind(), + PersonalityFunctionField, func, addend); + } + } else if (atom->begin() != atom->end()) { + // Otherwise, we expect there to be no relocations in this atom as the only + // relocation would have been to the personality function. + return llvm::make_error<GenericError>("unexpected relocation in CIE"); + } + + + cieInfos[atom] = std::move(cieInfo); + + return llvm::Error::success(); +} + +static llvm::Error processFDE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + const CIEInfoMap &cieInfos) { + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + + // Compiler wasn't lazy and actually told us what it meant. + // Unfortunately, the compiler may not have generated references for all of + // [cie, func, lsda] and so we still need to parse the FDE and add references + // for any the compiler didn't generate. + if (atom->begin() != atom->end()) + atom->sortReferences(); + + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + + // This helper returns the reference (if one exists) at the offset we are + // currently processing. It automatically increments the ref iterator if we + // do return a ref, and throws an error if we pass over a ref without + // comsuming it. + auto currentRefGetter = [&CurrentRef, + &atom](uint64_t Offset)->const Reference* { + // If there are no more refs found, then we are done. + if (CurrentRef == atom->end()) + return nullptr; + + const Reference *Ref = *CurrentRef; + + // If we haven't reached the offset for this reference, then return that + // we don't yet have a reference to process. + if (Offset < Ref->offsetInAtom()) + return nullptr; + + // If the offset is equal, then we want to process this ref. + if (Offset == Ref->offsetInAtom()) { + ++CurrentRef; + return Ref; + } + + // The current ref is at an offset which is earlier than the current + // offset, then we failed to consume it when we should have. In this case + // throw an error. + llvm::report_fatal_error("Skipped reference when processing FDE"); + }; + + // Helper to either get the reference at this current location, and verify + // that it is of the expected type, or add a reference of that type. + // Returns the reference target. + auto verifyOrAddReference = [&](uint64_t targetAddress, + Reference::KindValue refKind, + uint64_t refAddress, + bool allowsAddend)->const Atom* { + if (auto *ref = currentRefGetter(refAddress)) { + // The compiler already emitted a relocation for the CIE ref. This should + // have been converted to the correct type of reference in + // get[Pair]ReferenceInfo(). + assert(ref->kindValue() == refKind && + "Incorrect EHFrame reference kind"); + return ref->target(); + } + Reference::Addend addend; + auto *target = findAtomCoveringAddress(normalizedFile, file, + targetAddress, addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + refKind, refAddress, target, addend); + + if (!allowsAddend) + assert(!addend && "EHFrame reference cannot have addend"); + return target; + }; + + const uint8_t *startFrameData = atom->rawContent().data(); + const uint8_t *frameData = startFrameData; + + uint32_t size = read32(frameData, isBig); + uint64_t cieFieldInFDE = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + + // Linker needs to fixup a reference from the FDE to its parent CIE (a + // 32-bit byte offset backwards in the __eh_frame section). + uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); + uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; + cieAddress -= cieDelta; + + auto *cieRefTarget = verifyOrAddReference(cieAddress, + handler.unwindRefToCIEKind(), + cieFieldInFDE, false); + const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); + assert(cie && cie->contentType() == DefinedAtom::typeCFI && + "FDE's CIE field does not point at the start of a CIE."); + + const CIEInfo &cieInfo = cieInfos.find(cie)->second; + + // Linker needs to fixup reference from the FDE to the function it's + // describing. FIXME: there are actually different ways to do this, and the + // particular method used is specified in the CIE's augmentation fields + // (hopefully) + uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); + + int64_t functionFromFDE = readSPtr(is64, isBig, + frameData + rangeFieldInFDE); + uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; + rangeStart += functionFromFDE; + + verifyOrAddReference(rangeStart, + handler.unwindRefToFunctionKind(), + rangeFieldInFDE, true); + + // Handle the augmentation data if there is any. + if (cieInfo._augmentationDataPresent) { + // First process the augmentation data length field. + uint64_t augmentationDataLengthFieldInFDE = + rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); + unsigned lengthFieldSize = 0; + uint64_t augmentationDataLength = + llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, + &lengthFieldSize); + + if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { + + // Look at the augmentation data field. + uint64_t augmentationDataFieldInFDE = + augmentationDataLengthFieldInFDE + lengthFieldSize; + + int64_t lsdaFromFDE = readSPtr(is64, isBig, + frameData + augmentationDataFieldInFDE); + uint64_t lsdaStart = + ehFrameSection->address + offset + augmentationDataFieldInFDE + + lsdaFromFDE; + + verifyOrAddReference(lsdaStart, + handler.unwindRefToFunctionKind(), + augmentationDataFieldInFDE, true); + } + } + + return llvm::Error::success(); +} + +llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler) { + + const Section *ehFrameSection = nullptr; + for (auto §ion : normalizedFile.sections) + if (section.segmentName == "__TEXT" && + section.sectionName == "__eh_frame") { + ehFrameSection = §ion; + break; + } + + // No __eh_frame so nothing to do. + if (!ehFrameSection) + return llvm::Error::success(); + + llvm::Error ehFrameErr = llvm::Error::success(); + CIEInfoMap cieInfos; + + file.eachAtomInSection(*ehFrameSection, + [&](MachODefinedAtom *atom, uint64_t offset) -> void { + assert(atom->contentType() == DefinedAtom::typeCFI); + + // Bail out if we've encountered an error. + if (ehFrameErr) + return; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + if (ArchHandler::isDwarfCIE(isBig, atom)) + ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + else + ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + }); + + return ehFrameErr; +} + +llvm::Error parseObjCImageInfo(const Section §, + const NormalizedFile &normalizedFile, + MachOFile &file) { + + // struct objc_image_info { + // uint32_t version; // initially 0 + // uint32_t flags; + // }; + + ArrayRef<uint8_t> content = sect.content; + if (content.size() != 8) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should be 8 bytes in size"); + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + uint32_t version = read32(content.data(), isBig); + if (version) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should have version=0"); + + uint32_t flags = read32(content.data() + 4, isBig); + if (flags & (MachOLinkingContext::objc_supports_gc | + MachOLinkingContext::objc_gc_only)) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " uses GC. This is not supported"); + + if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) + file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); + else + file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); + + file.setSwiftVersion((flags >> 8) & 0xFF); + + return llvm::Error::success(); +} + +/// Converts normalized mach-o file into an lld::File and lld::Atoms. +llvm::Expected<std::unique_ptr<lld::File>> +objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + std::unique_ptr<MachOFile> file(new MachOFile(path)); + if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr<File>(std::move(file)); +} + +llvm::Expected<std::unique_ptr<lld::File>> +dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + // Instantiate SharedLibraryFile object. + std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); + if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr<File>(std::move(file)); +} + +} // anonymous namespace + +namespace normalized { + +static bool isObjCImageInfo(const Section §) { + return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || + (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); +} + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " + << file->path() << "\n"); + bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); + + // Create atoms from each section. + for (auto § : normalizedFile.sections) { + + // If this is a debug-info section parse it specially. + if (isDebugInfoSection(sect)) + continue; + + // If the file contains an objc_image_info struct, then we should parse the + // ObjC flags and Swift version. + if (isObjCImageInfo(sect)) { + if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) + return ec; + // We then skip adding atoms for this section as we use the ObjCPass to + // re-emit this data after it has been aggregated for all files. + continue; + } + + bool customSectionName; + DefinedAtom::ContentType atomType = atomTypeFromSection(sect, + customSectionName); + if (auto ec = processSection(atomType, sect, customSectionName, + normalizedFile, *file, scatterable, copyRefs)) + return ec; + } + // Create atoms from undefined symbols. + for (auto &sym : normalizedFile.undefinedSymbols) { + // Undefinded symbols with n_value != 0 are actually tentative definitions. + if (sym.value == Hex64(0)) { + file->addUndefinedAtom(sym.name, copyRefs); + } else { + file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, + DefinedAtom::Alignment(1 << (sym.desc >> 8)), + copyRefs); + } + } + + // Convert mach-o relocations to References + std::unique_ptr<mach_o::ArchHandler> handler + = ArchHandler::create(normalizedFile.arch); + for (auto § : normalizedFile.sections) { + if (isDebugInfoSection(sect)) + continue; + if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, + *file, *handler)) + return ec; + } + + // Add additional arch-specific References + file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { + handler->addAdditionalReferences(*atom); + }); + + // Each __eh_frame section needs references to both __text (the function we're + // providing unwind info for) and itself (FDE -> CIE). These aren't + // represented in the relocations on some architectures, so we have to add + // them back in manually there. + if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) + return ec; + + // Process mach-o data-in-code regions array. That information is encoded in + // atoms as References at each transition point. + unsigned nextIndex = 0; + for (const DataInCode &entry : normalizedFile.dataInCode) { + ++nextIndex; + const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); + if (!s) { + return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" + + Twine(entry.offset) + + ") is not in any section")); + } + uint64_t offsetInSect = entry.offset - s->address; + uint32_t offsetInAtom; + MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, + &offsetInAtom); + if (offsetInAtom + entry.length > atom->size()) { + return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " + "(offset=" + + Twine(entry.offset) + + ", length=" + + Twine(entry.length) + + ") crosses atom boundary.")); + } + // Add reference that marks start of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionStart(*atom), + offsetInAtom, atom, entry.kind); + + // Peek at next entry, if it starts where this one ends, skip ending ref. + if (nextIndex < normalizedFile.dataInCode.size()) { + const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; + if (nextEntry.offset == (entry.offset + entry.length)) + continue; + } + + // If data goes to end of function, skip ending ref. + if ((offsetInAtom + entry.length) == atom->size()) + continue; + + // Add reference that marks end of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionEnd(*atom), + offsetInAtom+entry.length, atom, 0); + } + + // Cache some attributes on the file for use later. + file->setFlags(normalizedFile.flags); + file->setArch(normalizedFile.arch); + file->setOS(normalizedFile.os); + file->setMinVersion(normalizedFile.minOSverson); + file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); + + // Sort references in each atom to their canonical order. + for (const DefinedAtom* defAtom : file->defined()) { + reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); + } + + if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) + return err; + + return llvm::Error::success(); +} + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + file->setInstallName(normalizedFile.installName); + file->setCompatVersion(normalizedFile.compatVersion); + file->setCurrentVersion(normalizedFile.currentVersion); + + // Tell MachODylibFile object about all symbols it exports. + if (!normalizedFile.exportInfo.empty()) { + // If exports trie exists, use it instead of traditional symbol table. + for (const Export &exp : normalizedFile.exportInfo) { + bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + // StringRefs from export iterator are ephemeral, so force copy. + file->addExportedSymbol(exp.name, weakDef, true); + } + } else { + for (auto &sym : normalizedFile.globalSymbols) { + assert((sym.scope & N_EXT) && "only expect external symbols here"); + bool weakDef = (sym.desc & N_WEAK_DEF); + file->addExportedSymbol(sym.name, weakDef, copyRefs); + } + } + // Tell MachODylibFile object about all dylibs it re-exports. + for (const DependentDylib &dep : normalizedFile.dependentDylibs) { + if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) + file->addReExportedDylib(dep.path); + } + return llvm::Error::success(); +} + +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit) { + + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->atomType != atomType) + continue; + // Wild carded entries are ignored for reverse lookups. + if (p->segmentName.empty() || p->sectionName.empty()) + continue; + segmentName = p->segmentName; + sectionName = p->sectionName; + sectionType = p->sectionType; + sectionAttrs = 0; + relocsToDefinedCanBeImplicit = false; + if (atomType == DefinedAtom::typeCode) + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; + if (atomType == DefinedAtom::typeCFI) + relocsToDefinedCanBeImplicit = true; + return; + } + llvm_unreachable("content type not yet supported"); +} + +llvm::Expected<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + switch (normalizedFile.fileType) { + case MH_DYLIB: + case MH_DYLIB_STUB: + return dylibToAtoms(normalizedFile, path, copyRefs); + case MH_OBJECT: + return objectToAtoms(normalizedFile, path, copyRefs); + default: + llvm_unreachable("unhandled MachO file type!"); + } +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld |
