diff options
Diffstat (limited to 'llvm/tools/llvm-objcopy/MachO')
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp | 350 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h | 50 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 74 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h | 31 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 282 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOReader.h | 51 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 491 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOWriter.h | 65 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/Object.cpp | 15 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/Object.h | 259 |
10 files changed, 1668 insertions, 0 deletions
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp new file mode 100644 index 000000000000..f621f3aa09cf --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -0,0 +1,350 @@ +//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOLayoutBuilder.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { + uint32_t Size = 0; + for (const auto &LC : O.LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + return Size; +} + +void MachOLayoutBuilder::constructStringTable() { + for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) + StrTableBuilder.add(Sym->Name); + StrTableBuilder.finalize(); +} + +void MachOLayoutBuilder::updateSymbolIndexes() { + uint32_t Index = 0; + for (auto &Symbol : O.SymTable.Symbols) + Symbol->Index = Index++; +} + +// Updates the index and the number of local/external/undefined symbols. +void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { + assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); + // Make sure that nlist entries in the symbol table are sorted by the those + // types. The order is: local < defined external < undefined external. + assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(), + [](const std::unique_ptr<SymbolEntry> &A, + const std::unique_ptr<SymbolEntry> &B) { + return (A->isLocalSymbol() && !B->isLocalSymbol()) || + (!A->isUndefinedSymbol() && + B->isUndefinedSymbol()); + }) && + "Symbols are not sorted by their types."); + + uint32_t NumLocalSymbols = 0; + auto Iter = O.SymTable.Symbols.begin(); + auto End = O.SymTable.Symbols.end(); + for (; Iter != End; ++Iter) { + if ((*Iter)->isExternalSymbol()) + break; + + ++NumLocalSymbols; + } + + uint32_t NumExtDefSymbols = 0; + for (; Iter != End; ++Iter) { + if ((*Iter)->isUndefinedSymbol()) + break; + + ++NumExtDefSymbols; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; + MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; + MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; + MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +uint64_t MachOLayoutBuilder::layoutSegments() { + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + const bool IsObjectFile = + O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; + uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + StringRef Segname; + uint64_t SegmentVmAddr; + uint64_t SegmentVmSize; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + SegmentVmAddr = MLC.segment_command_data.vmaddr; + SegmentVmSize = MLC.segment_command_data.vmsize; + Segname = StringRef(MLC.segment_command_data.segname, + strnlen(MLC.segment_command_data.segname, + sizeof(MLC.segment_command_data.segname))); + break; + case MachO::LC_SEGMENT_64: + SegmentVmAddr = MLC.segment_command_64_data.vmaddr; + SegmentVmSize = MLC.segment_command_64_data.vmsize; + Segname = StringRef(MLC.segment_command_64_data.segname, + strnlen(MLC.segment_command_64_data.segname, + sizeof(MLC.segment_command_64_data.segname))); + break; + default: + continue; + } + + if (Segname == "__LINKEDIT") { + // We update the __LINKEDIT segment later (in layoutTail). + assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); + LinkEditLoadCommand = &MLC; + continue; + } + + // Update file offsets and sizes of sections. + uint64_t SegOffset = Offset; + uint64_t SegFileSize = 0; + uint64_t VMSize = 0; + for (auto &Sec : LC.Sections) { + if (IsObjectFile) { + if (Sec.isVirtualSection()) { + Sec.Offset = 0; + } else { + uint64_t PaddingSize = + offsetToAlignment(SegFileSize, Align(1ull << Sec.Align)); + Sec.Offset = SegOffset + SegFileSize + PaddingSize; + Sec.Size = Sec.Content.size(); + SegFileSize += PaddingSize + Sec.Size; + } + VMSize = std::max(VMSize, Sec.Addr + Sec.Size); + } else { + if (Sec.isVirtualSection()) { + Sec.Offset = 0; + VMSize += Sec.Size; + } else { + uint32_t SectOffset = Sec.Addr - SegmentVmAddr; + Sec.Offset = SegOffset + SectOffset; + Sec.Size = Sec.Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size); + VMSize = std::max(VMSize, SegFileSize); + } + } + } + + if (IsObjectFile) { + Offset += SegFileSize; + } else { + Offset = alignTo(Offset + SegFileSize, PageSize); + SegFileSize = alignTo(SegFileSize, PageSize); + // Use the original vmsize if the segment is __PAGEZERO. + VMSize = + Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); + } + + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = SegOffset; + MLC.segment_command_data.vmsize = VMSize; + MLC.segment_command_data.filesize = SegFileSize; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = SegOffset; + MLC.segment_command_64_data.vmsize = VMSize; + MLC.segment_command_64_data.filesize = SegFileSize; + break; + } + } + + return Offset; +} + +uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) { + Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; + Sec.NReloc = Sec.Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + } + + return Offset; +} + +Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // The order of LINKEDIT elements is as follows: + // rebase info, binding info, weak binding info, lazy binding info, export + // trie, data-in-code, symbol table, indirect symbol table, symbol table + // strings. + uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + uint64_t StartOfLinkEdit = Offset; + uint64_t StartOfRebaseInfo = StartOfLinkEdit; + uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); + uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); + uint64_t StartOfLazyBindingInfo = + StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); + uint64_t StartOfExportTrie = + StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); + uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); + uint64_t StartOfDataInCode = + StartOfFunctionStarts + O.FunctionStarts.Data.size(); + uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfIndirectSymbols = + StartOfSymbols + NListSize * O.SymTable.Symbols.size(); + uint64_t StartOfSymbolStrings = + StartOfIndirectSymbols + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t LinkEditSize = + (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit; + + // Now we have determined the layout of the contents of the __LINKEDIT + // segment. Update its load command. + if (LinkEditLoadCommand) { + MachO::macho_load_command *MLC = LinkEditLoadCommand; + switch (LinkEditLoadCommand->load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); + MLC->segment_command_data.fileoff = StartOfLinkEdit; + MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_data.filesize = LinkEditSize; + break; + case MachO::LC_SEGMENT_64: + MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); + MLC->segment_command_64_data.fileoff = StartOfLinkEdit; + MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_64_data.filesize = LinkEditSize; + break; + } + } + + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = StartOfSymbols; + MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); + MLC.symtab_command_data.stroff = StartOfSymbolStrings; + MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (!O.IndirectSymTable.Symbols.empty()) { + MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; + MLC.dysymtab_command_data.nindirectsyms = + O.IndirectSymTable.Symbols.size(); + } + + updateDySymTab(MLC); + break; + } + case MachO::LC_DATA_IN_CODE: + MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; + MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); + break; + case MachO::LC_FUNCTION_STARTS: + MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; + MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + MLC.dyld_info_command_data.rebase_off = + O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; + MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); + MLC.dyld_info_command_data.bind_off = + O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; + MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); + MLC.dyld_info_command_data.weak_bind_off = + O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; + MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); + MLC.dyld_info_command_data.lazy_bind_off = + O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; + MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); + MLC.dyld_info_command_data.export_off = + O.Exports.Trie.empty() ? 0 : StartOfExportTrie; + MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); + break; + case MachO::LC_LOAD_DYLINKER: + case MachO::LC_MAIN: + case MachO::LC_RPATH: + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOLayoutBuilder::layout() { + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = computeSizeOfCmds(); + constructStringTable(); + updateSymbolIndexes(); + uint64_t Offset = layoutSegments(); + Offset = layoutRelocations(Offset); + return layoutTail(Offset); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h new file mode 100644 index 000000000000..21cbe56605de --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h @@ -0,0 +1,50 @@ +//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H +#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H + +#include "MachOObjcopy.h" +#include "Object.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +class MachOLayoutBuilder { + Object &O; + bool Is64Bit; + uint64_t PageSize; + + // Points to the __LINKEDIT segment if it exists. + MachO::macho_load_command *LinkEditLoadCommand = nullptr; + StringTableBuilder StrTableBuilder{StringTableBuilder::MachO}; + + uint32_t computeSizeOfCmds() const; + void constructStringTable(); + void updateSymbolIndexes(); + void updateDySymTab(MachO::macho_load_command &MLC); + uint64_t layoutSegments(); + uint64_t layoutRelocations(uint64_t Offset); + Error layoutTail(uint64_t Offset); + +public: + MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize) + : O(O), Is64Bit(Is64Bit), PageSize(PageSize) {} + + // Recomputes and updates fields in the given object such as file offsets. + Error layout(); + + StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; } +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp new file mode 100644 index 000000000000..6d586e7d73f1 --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -0,0 +1,74 @@ +//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOObjcopy.h" +#include "../CopyConfig.h" +#include "MachOReader.h" +#include "MachOWriter.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +using namespace object; + +static Error handleArgs(const CopyConfig &Config, Object &Obj) { + if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() || + Config.BuildIdLinkInput || Config.BuildIdLinkOutput || + !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() || + !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() || + !Config.DumpSection.empty() || !Config.KeepSection.empty() || + Config.NewSymbolVisibility || !Config.OnlySection.empty() || + !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() || + !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() || + !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || + !Config.SymbolsToRename.empty() || + !Config.UnneededSymbolsToRemove.empty() || + !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || + !Config.ToRemove.empty() || Config.ExtractDWO || Config.KeepFileSymbols || + Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO || + Config.StripNonAlloc || Config.StripSections || Config.Weaken || + Config.DecompressDebugSections || Config.StripDebug || + Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded || + Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() || + Config.EntryExpr) { + return createStringError(llvm::errc::invalid_argument, + "option not supported by llvm-objcopy for MachO"); + } + + return Error::success(); +} + +Error executeObjcopyOnBinary(const CopyConfig &Config, + object::MachOObjectFile &In, Buffer &Out) { + MachOReader Reader(In); + std::unique_ptr<Object> O = Reader.create(); + if (!O) + return createFileError( + Config.InputFilename, + createStringError(object_error::parse_failed, + "unable to deserialize MachO object")); + + if (Error E = handleArgs(Config, *O)) + return createFileError(Config.InputFilename, std::move(E)); + + // TODO: Support 16KB pages which are employed in iOS arm64 binaries: + // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb + const uint64_t PageSize = 4096; + + MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); + if (auto E = Writer.finalize()) + return E; + return Writer.write(); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h new file mode 100644 index 000000000000..f34e361db7ea --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h @@ -0,0 +1,31 @@ +//===- MachOObjcopy.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H +#define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H + +namespace llvm { +class Error; + +namespace object { +class MachOObjectFile; +class MachOUniversalBinary; +} // end namespace object + +namespace objcopy { +struct CopyConfig; +class Buffer; + +namespace macho { +Error executeObjcopyOnBinary(const CopyConfig &Config, + object::MachOObjectFile &In, Buffer &Out); +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp new file mode 100644 index 000000000000..b48a0d8952d0 --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -0,0 +1,282 @@ +//===- MachOReader.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOReader.h" +#include "../llvm-objcopy.h" +#include "Object.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include <memory> + +namespace llvm { +namespace objcopy { +namespace macho { + +void MachOReader::readHeader(Object &O) const { + O.Header.Magic = MachOObj.getHeader().magic; + O.Header.CPUType = MachOObj.getHeader().cputype; + O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; + O.Header.FileType = MachOObj.getHeader().filetype; + O.Header.NCmds = MachOObj.getHeader().ncmds; + O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; + O.Header.Flags = MachOObj.getHeader().flags; +} + +template <typename SectionType> +Section constructSectionCommon(SectionType Sec) { + Section S; + S.Sectname = + StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))) + .str(); + S.Segname = + StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str(); + S.Addr = Sec.addr; + S.Size = Sec.size; + S.Offset = Sec.offset; + S.Align = Sec.align; + S.RelOff = Sec.reloff; + S.NReloc = Sec.nreloc; + S.Flags = Sec.flags; + S.Reserved1 = Sec.reserved1; + S.Reserved2 = Sec.reserved2; + S.Reserved3 = 0; + return S; +} + +template <typename SectionType> Section constructSection(SectionType Sec); + +template <> Section constructSection(MachO::section Sec) { + return constructSectionCommon(Sec); +} + +template <> Section constructSection(MachO::section_64 Sec) { + Section S = constructSectionCommon(Sec); + S.Reserved3 = Sec.reserved3; + return S; +} + +// TODO: get rid of reportError and make MachOReader return Expected<> instead. +template <typename SectionType, typename SegmentType> +std::vector<Section> +extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, + const object::MachOObjectFile &MachOObj, + size_t &NextSectionIndex) { + auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; + const SectionType *Curr = + reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); + std::vector<Section> Sections; + for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + SectionType Sec; + memcpy((void *)&Sec, Curr, sizeof(SectionType)); + MachO::swapStruct(Sec); + Sections.push_back(constructSection(Sec)); + } else { + Sections.push_back(constructSection(*Curr)); + } + + Section &S = Sections.back(); + + Expected<object::SectionRef> SecRef = + MachOObj.getSection(NextSectionIndex++); + if (!SecRef) + reportError(MachOObj.getFileName(), SecRef.takeError()); + + if (Expected<ArrayRef<uint8_t>> E = + MachOObj.getSectionContents(SecRef->getRawDataRefImpl())) + S.Content = + StringRef(reinterpret_cast<const char *>(E->data()), E->size()); + else + reportError(MachOObj.getFileName(), E.takeError()); + + S.Relocations.reserve(S.NReloc); + for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), + RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); + RI != RE; ++RI) { + RelocationInfo R; + R.Symbol = nullptr; // We'll fill this field later. + R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); + R.Scattered = MachOObj.isRelocationScattered(R.Info); + S.Relocations.push_back(R); + } + + assert(S.NReloc == S.Relocations.size() && + "Incorrect number of relocations"); + } + return Sections; +} + +void MachOReader::readLoadCommands(Object &O) const { + // For MachO sections indices start from 1. + size_t NextSectionIndex = 1; + for (auto LoadCmd : MachOObj.load_commands()) { + LoadCommand LC; + switch (LoadCmd.C.cmd) { + case MachO::LC_SEGMENT: + LC.Sections = extractSections<MachO::section, MachO::segment_command>( + LoadCmd, MachOObj, NextSectionIndex); + break; + case MachO::LC_SEGMENT_64: + LC.Sections = + extractSections<MachO::section_64, MachO::segment_command_64>( + LoadCmd, MachOObj, NextSectionIndex); + break; + case MachO::LC_SYMTAB: + O.SymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYSYMTAB: + O.DySymTabCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + O.DyLdInfoCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_DATA_IN_CODE: + O.DataInCodeCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_FUNCTION_STARTS: + O.FunctionStartsCommandIndex = O.LoadCommands.size(); + break; + } +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ + sizeof(MachO::LCStruct)); \ + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ + MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ + LC.Payload = ArrayRef<uint8_t>( \ + reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ + sizeof(MachO::LCStruct), \ + LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ + break; + + switch (LoadCmd.C.cmd) { + default: + memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, + sizeof(MachO::load_command)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(LC.MachOLoadCommand.load_command_data); + LC.Payload = ArrayRef<uint8_t>( + reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + + sizeof(MachO::load_command), + LoadCmd.C.cmdsize - sizeof(MachO::load_command)); + break; +#include "llvm/BinaryFormat/MachO.def" + } + O.LoadCommands.push_back(std::move(LC)); + } +} + +template <typename nlist_t> +SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { + assert(nlist.n_strx < StrTable.size() && + "n_strx exceeds the size of the string table"); + SymbolEntry SE; + SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); + SE.n_type = nlist.n_type; + SE.n_sect = nlist.n_sect; + SE.n_desc = nlist.n_desc; + SE.n_value = nlist.n_value; + return SE; +} + +void MachOReader::readSymbolTable(Object &O) const { + StringRef StrTable = MachOObj.getStringTableData(); + for (auto Symbol : MachOObj.symbols()) { + SymbolEntry SE = + (MachOObj.is64Bit() + ? constructSymbolEntry( + StrTable, + MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) + : constructSymbolEntry( + StrTable, + MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); + + O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); + } +} + +void MachOReader::setSymbolInRelocationInfo(Object &O) const { + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) + for (auto &Reloc : Sec.Relocations) + if (!Reloc.Scattered) { + auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info); + Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum); + } +} + +void MachOReader::readRebaseInfo(Object &O) const { + O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); +} + +void MachOReader::readBindInfo(Object &O) const { + O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); +} + +void MachOReader::readWeakBindInfo(Object &O) const { + O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); +} + +void MachOReader::readLazyBindInfo(Object &O) const { + O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); +} + +void MachOReader::readExportInfo(Object &O) const { + O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); +} + +void MachOReader::readDataInCodeData(Object &O) const { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.DataInCode.Data = arrayRefFromStringRef( + MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +} + +void MachOReader::readFunctionStartsData(Object &O) const { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.FunctionStarts.Data = arrayRefFromStringRef( + MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +} + +void MachOReader::readIndirectSymbolTable(Object &O) const { + MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); + for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) + O.IndirectSymTable.Symbols.push_back( + MachOObj.getIndirectSymbolTableEntry(DySymTab, i)); +} + +std::unique_ptr<Object> MachOReader::create() const { + auto Obj = std::make_unique<Object>(); + readHeader(*Obj); + readLoadCommands(*Obj); + readSymbolTable(*Obj); + setSymbolInRelocationInfo(*Obj); + readRebaseInfo(*Obj); + readBindInfo(*Obj); + readWeakBindInfo(*Obj); + readLazyBindInfo(*Obj); + readExportInfo(*Obj); + readDataInCodeData(*Obj); + readFunctionStartsData(*Obj); + readIndirectSymbolTable(*Obj); + return Obj; +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h new file mode 100644 index 000000000000..00c8f0d55f61 --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -0,0 +1,51 @@ +//===- MachOReader.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOObjcopy.h" +#include "Object.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include <memory> + +namespace llvm { +namespace objcopy { +namespace macho { + +// The hierarchy of readers is responsible for parsing different inputs: +// raw binaries and regular MachO object files. +class Reader { +public: + virtual ~Reader(){}; + virtual std::unique_ptr<Object> create() const = 0; +}; + +class MachOReader : public Reader { + const object::MachOObjectFile &MachOObj; + + void readHeader(Object &O) const; + void readLoadCommands(Object &O) const; + void readSymbolTable(Object &O) const; + void setSymbolInRelocationInfo(Object &O) const; + void readRebaseInfo(Object &O) const; + void readBindInfo(Object &O) const; + void readWeakBindInfo(Object &O) const; + void readLazyBindInfo(Object &O) const; + void readExportInfo(Object &O) const; + void readDataInCodeData(Object &O) const; + void readFunctionStartsData(Object &O) const; + void readIndirectSymbolTable(Object &O) const; + +public: + explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} + + std::unique_ptr<Object> create() const override; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp new file mode 100644 index 000000000000..4ec91cc9eb7a --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -0,0 +1,491 @@ +//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "MachOLayoutBuilder.h" +#include "Object.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include <memory> + +namespace llvm { +namespace objcopy { +namespace macho { + +size_t MachOWriter::headerSize() const { + return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); +} + +size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; } + +size_t MachOWriter::symTableSize() const { + return O.SymTable.Symbols.size() * + (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); +} + +size_t MachOWriter::totalSize() const { + // Going from tail to head and looking for an appropriate "anchor" to + // calculate the total size assuming that all the offsets are either valid + // ("true") or 0 (0 indicates that the corresponding part is missing). + + SmallVector<size_t, 7> Ends; + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Ends.push_back(SymTabCommand.symoff + symTableSize()); + if (SymTabCommand.stroff) + Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); + } + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) { + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size); + } + if (DyLdInfoCommand.bind_off) { + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size); + } + if (DyLdInfoCommand.weak_bind_off) { + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + Ends.push_back(DyLdInfoCommand.weak_bind_off + + DyLdInfoCommand.weak_bind_size); + } + if (DyLdInfoCommand.lazy_bind_off) { + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + Ends.push_back(DyLdInfoCommand.lazy_bind_off + + DyLdInfoCommand.lazy_bind_size); + } + if (DyLdInfoCommand.export_off) { + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect trie size"); + Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size); + } + } + + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Ends.push_back(DySymTabCommand.indirectsymoff + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + // Otherwise, use the last section / reloction. + for (const auto &LC : O.LoadCommands) + for (const auto &S : LC.Sections) { + Ends.push_back(S.Offset + S.Size); + if (S.RelOff) + Ends.push_back(S.RelOff + + S.NReloc * sizeof(MachO::any_relocation_info)); + } + + if (!Ends.empty()) + return *std::max_element(Ends.begin(), Ends.end()); + + // Otherwise, we have only Mach header and load commands. + return headerSize() + loadCommandsSize(); +} + +void MachOWriter::writeHeader() { + MachO::mach_header_64 Header; + + Header.magic = O.Header.Magic; + Header.cputype = O.Header.CPUType; + Header.cpusubtype = O.Header.CPUSubType; + Header.filetype = O.Header.FileType; + Header.ncmds = O.Header.NCmds; + Header.sizeofcmds = O.Header.SizeOfCmds; + Header.flags = O.Header.Flags; + Header.reserved = O.Header.Reserved; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Header); + + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + memcpy(B.getBufferStart(), &Header, HeaderSize); +} + +void MachOWriter::writeLoadCommands() { + uint8_t *Begin = B.getBufferStart() + headerSize(); + for (const auto &LC : O.LoadCommands) { + // Construct a load command. + MachO::macho_load_command MLC = LC.MachOLoadCommand; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_data); + memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); + Begin += sizeof(MachO::segment_command); + + for (const auto &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section>(Sec, Begin); + continue; + case MachO::LC_SEGMENT_64: + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.segment_command_64_data); + memcpy(Begin, &MLC.segment_command_64_data, + sizeof(MachO::segment_command_64)); + Begin += sizeof(MachO::segment_command_64); + + for (const auto &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section_64>(Sec, Begin); + continue; + } + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \ + MLC.load_command_data.cmdsize); \ + if (IsLittleEndian != sys::IsLittleEndianHost) \ + MachO::swapStruct(MLC.LCStruct##_data); \ + memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \ + Begin += sizeof(MachO::LCStruct); \ + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \ + Begin += LC.Payload.size(); \ + break; + + // Copy the load command as it is. + switch (MLC.load_command_data.cmd) { + default: + assert(sizeof(MachO::load_command) + LC.Payload.size() == + MLC.load_command_data.cmdsize); + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(MLC.load_command_data); + memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command)); + Begin += sizeof(MachO::load_command); + memcpy(Begin, LC.Payload.data(), LC.Payload.size()); + Begin += LC.Payload.size(); + break; +#include "llvm/BinaryFormat/MachO.def" + } + } +} + +template <typename StructType> +void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { + StructType Temp; + assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name"); + assert(Sec.Sectname.size() <= sizeof(Temp.sectname) && + "too long section name"); + memset(&Temp, 0, sizeof(StructType)); + memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size()); + memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size()); + Temp.addr = Sec.Addr; + Temp.size = Sec.Size; + Temp.offset = Sec.Offset; + Temp.align = Sec.Align; + Temp.reloff = Sec.RelOff; + Temp.nreloc = Sec.NReloc; + Temp.flags = Sec.Flags; + Temp.reserved1 = Sec.Reserved1; + Temp.reserved2 = Sec.Reserved2; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Temp); + memcpy(Out, &Temp, sizeof(StructType)); + Out += sizeof(StructType); +} + +void MachOWriter::writeSections() { + for (const auto &LC : O.LoadCommands) + for (const auto &Sec : LC.Sections) { + if (Sec.isVirtualSection()) + continue; + + assert(Sec.Offset && "Section offset can not be zero"); + assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); + memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), + Sec.Content.size()); + for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) { + auto RelocInfo = Sec.Relocations[Index]; + if (!RelocInfo.Scattered) { + auto *Info = + reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info); + Info->r_symbolnum = RelocInfo.Symbol->Index; + } + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct( + reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); + memcpy(B.getBufferStart() + Sec.RelOff + + Index * sizeof(MachO::any_relocation_info), + &RelocInfo.Info, sizeof(RelocInfo.Info)); + } + } +} + +template <typename NListType> +void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, + uint32_t Nstrx) { + NListType ListEntry; + ListEntry.n_strx = Nstrx; + ListEntry.n_type = SE.n_type; + ListEntry.n_sect = SE.n_sect; + ListEntry.n_desc = SE.n_desc; + ListEntry.n_value = SE.n_value; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(ListEntry); + memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType)); + Out += sizeof(NListType); +} + +void MachOWriter::writeStringTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + + uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff; + LayoutBuilder.getStringTableBuilder().write(StrTable); +} + +void MachOWriter::writeSymbolTable() { + if (!O.SymTabCommandIndex) + return; + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + + char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff; + for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); + Iter != End; Iter++) { + SymbolEntry *Sym = Iter->get(); + uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); + + if (Is64Bit) + writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx); + else + writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx); + } +} + +void MachOWriter::writeRebaseInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off; + assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && + "Incorrect rebase opcodes size"); + memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); +} + +void MachOWriter::writeBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off; + assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && + "Incorrect bind opcodes size"); + memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); +} + +void MachOWriter::writeWeakBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off; + assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && + "Incorrect weak bind opcodes size"); + memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); +} + +void MachOWriter::writeLazyBindInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off; + assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && + "Incorrect lazy bind opcodes size"); + memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); +} + +void MachOWriter::writeExportInfo() { + if (!O.DyLdInfoCommandIndex) + return; + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off; + assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && + "Incorrect export trie size"); + memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); +} + +void MachOWriter::writeIndirectSymbolTable() { + if (!O.DySymTabCommandIndex) + return; + + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff; + assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) && + "Incorrect indirect symbol table size"); + memcpy(Out, O.IndirectSymTable.Symbols.data(), + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); +} + +void MachOWriter::writeDataInCodeData() { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && + "Incorrect data in code data size"); + memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); +} + +void MachOWriter::writeFunctionStartsData() { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && + "Incorrect function starts data size"); + memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); +} + +void MachOWriter::writeTail() { + typedef void (MachOWriter::*WriteHandlerType)(void); + typedef std::pair<uint64_t, WriteHandlerType> WriteOperation; + SmallVector<WriteOperation, 7> Queue; + + if (O.SymTabCommandIndex) { + const MachO::symtab_command &SymTabCommand = + O.LoadCommands[*O.SymTabCommandIndex] + .MachOLoadCommand.symtab_command_data; + if (SymTabCommand.symoff) + Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable}); + if (SymTabCommand.stroff) + Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable}); + } + + if (O.DyLdInfoCommandIndex) { + const MachO::dyld_info_command &DyLdInfoCommand = + O.LoadCommands[*O.DyLdInfoCommandIndex] + .MachOLoadCommand.dyld_info_command_data; + if (DyLdInfoCommand.rebase_off) + Queue.push_back( + {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo}); + if (DyLdInfoCommand.bind_off) + Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo}); + if (DyLdInfoCommand.weak_bind_off) + Queue.push_back( + {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo}); + if (DyLdInfoCommand.lazy_bind_off) + Queue.push_back( + {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo}); + if (DyLdInfoCommand.export_off) + Queue.push_back( + {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); + } + + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Queue.emplace_back(DySymTabCommand.indirectsymoff, + &MachOWriter::writeIndirectSymbolTable); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeDataInCodeData); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeFunctionStartsData); + } + + llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { + return LHS.first < RHS.first; + }); + + for (auto WriteOp : Queue) + (this->*WriteOp.second)(); +} + +Error MachOWriter::finalize() { return LayoutBuilder.layout(); } + +Error MachOWriter::write() { + if (Error E = B.allocate(totalSize())) + return E; + memset(B.getBufferStart(), 0, totalSize()); + writeHeader(); + writeLoadCommands(); + writeSections(); + writeTail(); + return B.commit(); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h new file mode 100644 index 000000000000..22abbad56f41 --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -0,0 +1,65 @@ +//===- MachOWriter.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../Buffer.h" +#include "MachOLayoutBuilder.h" +#include "MachOObjcopy.h" +#include "Object.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/MachO.h" + +namespace llvm { +class Error; + +namespace objcopy { +namespace macho { + +class MachOWriter { + Object &O; + bool Is64Bit; + bool IsLittleEndian; + uint64_t PageSize; + Buffer &B; + MachOLayoutBuilder LayoutBuilder; + + size_t headerSize() const; + size_t loadCommandsSize() const; + size_t symTableSize() const; + size_t strTableSize() const; + + void writeHeader(); + void writeLoadCommands(); + template <typename StructType> + void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out); + void writeSections(); + void writeSymbolTable(); + void writeStringTable(); + void writeRebaseInfo(); + void writeBindInfo(); + void writeWeakBindInfo(); + void writeLazyBindInfo(); + void writeExportInfo(); + void writeIndirectSymbolTable(); + void writeDataInCodeData(); + void writeFunctionStartsData(); + void writeTail(); + +public: + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize, + Buffer &B) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), + PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {} + + size_t totalSize() const; + Error finalize(); + Error write(); +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp new file mode 100644 index 000000000000..264f39c28ed2 --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -0,0 +1,15 @@ +#include "Object.h" +#include "../llvm-objcopy.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { + assert(Index < Symbols.size() && "invalid symbol index"); + return Symbols[Index].get(); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h new file mode 100644 index 000000000000..1cebf8253d19 --- /dev/null +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -0,0 +1,259 @@ +//===- Object.h - Mach-O object file model ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_OBJECT_H +#define LLVM_OBJCOPY_MACHO_OBJECT_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/ObjectYAML/DWARFYAML.h" +#include "llvm/Support/YAMLTraits.h" +#include <cstdint> +#include <string> +#include <vector> + +namespace llvm { +namespace objcopy { +namespace macho { + +struct MachHeader { + uint32_t Magic; + uint32_t CPUType; + uint32_t CPUSubType; + uint32_t FileType; + uint32_t NCmds; + uint32_t SizeOfCmds; + uint32_t Flags; + uint32_t Reserved = 0; +}; + +struct RelocationInfo; +struct Section { + std::string Sectname; + std::string Segname; + uint64_t Addr; + uint64_t Size; + uint32_t Offset; + uint32_t Align; + uint32_t RelOff; + uint32_t NReloc; + uint32_t Flags; + uint32_t Reserved1; + uint32_t Reserved2; + uint32_t Reserved3; + + StringRef Content; + std::vector<RelocationInfo> Relocations; + + MachO::SectionType getType() const { + return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE); + } + + bool isVirtualSection() const { + return (getType() == MachO::S_ZEROFILL || + getType() == MachO::S_GB_ZEROFILL || + getType() == MachO::S_THREAD_LOCAL_ZEROFILL); + } +}; + +struct LoadCommand { + // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h + // and it is a union of all the structs corresponding to various load + // commands. + MachO::macho_load_command MachOLoadCommand; + + // The raw content of the payload of the load command (located right after the + // corresponding struct). In some cases it is either empty or can be + // copied-over without digging into its structure. + ArrayRef<uint8_t> Payload; + + // Some load commands can contain (inside the payload) an array of sections, + // though the contents of the sections are stored separately. The struct + // Section describes only sections' metadata and where to find the + // corresponding content inside the binary. + std::vector<Section> Sections; +}; + +// A symbol information. Fields which starts with "n_" are same as them in the +// nlist. +struct SymbolEntry { + std::string Name; + uint32_t Index; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; + + bool isExternalSymbol() const { + return n_type & ((MachO::N_EXT | MachO::N_PEXT)); + } + + bool isLocalSymbol() const { return !isExternalSymbol(); } + + bool isUndefinedSymbol() const { + return (n_type & MachO::N_TYPE) == MachO::N_UNDF; + } +}; + +/// The location of the symbol table inside the binary is described by LC_SYMTAB +/// load command. +struct SymbolTable { + std::vector<std::unique_ptr<SymbolEntry>> Symbols; + + const SymbolEntry *getSymbolByIndex(uint32_t Index) const; +}; + +struct IndirectSymbolTable { + std::vector<uint32_t> Symbols; +}; + +/// The location of the string table inside the binary is described by LC_SYMTAB +/// load command. +struct StringTable { + std::vector<std::string> Strings; +}; + +struct RelocationInfo { + const SymbolEntry *Symbol; + // True if Info is a scattered_relocation_info. + bool Scattered; + MachO::any_relocation_info Info; +}; + +/// The location of the rebase info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at +/// an address different from its preferred address. The rebase information is +/// a stream of byte sized opcodes whose symbolic names start with +/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples: +/// <seg-index, seg-offset, type> +/// The opcodes are a compressed way to encode the table by only +/// encoding when a column changes. In addition simple patterns +/// like "every n'th offset for m times" can be encoded in a few +/// bytes. +struct RebaseInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the bind info inside the binary is described by +/// LC_DYLD_INFO load command. Dyld binds an image during the loading process, +/// if the image requires any pointers to be initialized to symbols in other +/// images. The bind information is a stream of byte sized opcodes whose +/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is +/// a table of tuples: <seg-index, seg-offset, type, symbol-library-ordinal, +/// symbol-name, addend> The opcodes are a compressed way to encode the table by +/// only encoding when a column changes. In addition simple patterns like for +/// runs of pointers initialized to the same value can be encoded in a few +/// bytes. +struct BindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the weak bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols +/// so that all images in the process use the same copy of some code/data. This +/// step is done after binding. The content of the weak_bind info is an opcode +/// stream like the bind_info. But it is sorted alphabetically by symbol name. +/// This enable dyld to walk all images with weak binding information in order +/// and look for collisions. If there are no collisions, dyld does no updating. +/// That means that some fixups are also encoded in the bind_info. For +/// instance, all calls to "operator new" are first bound to libstdc++.dylib +/// using the information in bind_info. Then if some image overrides operator +/// new that is detected when the weak_bind information is processed and the +/// call to operator new is then rebound. +struct WeakBindInfo { + // At the moment we do not parse this info (and it is simply copied over), + // but the proper support will be added later. + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the lazy bind info inside the binary is described by +/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be +/// bound immediately. Instead they can be lazily bound on first use. The +/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal +/// use is that dyld ignores the lazy_bind section when loading an image. +/// Instead the static linker arranged for the lazy pointer to initially point +/// to a helper function which pushes the offset into the lazy_bind area for the +/// symbol needing to be bound, then jumps to dyld which simply adds the offset +/// to lazy_bind_off to get the information on what to bind. +struct LazyBindInfo { + ArrayRef<uint8_t> Opcodes; +}; + +/// The location of the export info inside the binary is described by +/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a +/// trie. This is a compact representation that factors out common prefixes. It +/// also reduces LINKEDIT pages in RAM because it encodes all information (name, +/// address, flags) in one small, contiguous range. The export area is a stream +/// of nodes. The first node sequentially is the start node for the trie. Nodes +/// for a symbol start with a uleb128 that is the length of the exported symbol +/// information for the string so far. If there is no exported symbol, the node +/// starts with a zero byte. If there is exported info, it follows the length. +/// First is a uleb128 containing flags. Normally, it is followed by +/// a uleb128 encoded offset which is location of the content named +/// by the symbol from the mach_header for the image. If the flags +/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is +/// a uleb128 encoded library ordinal, then a zero terminated +/// UTF8 string. If the string is zero length, then the symbol +/// is re-export from the specified dylib with the same name. +/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following +/// the flags is two uleb128s: the stub offset and the resolver offset. +/// The stub is used by non-lazy pointers. The resolver is used +/// by lazy pointers and must be called to get the actual address to use. +/// After the optional exported symbol information is a byte of +/// how many edges (0-255) that this node has leaving it, +/// followed by each edge. +/// Each edge is a zero terminated UTF8 of the addition chars +/// in the symbol, followed by a uleb128 offset for the node that +/// edge points to. +struct ExportInfo { + ArrayRef<uint8_t> Trie; +}; + +struct LinkData { + ArrayRef<uint8_t> Data; +}; + +struct Object { + MachHeader Header; + std::vector<LoadCommand> LoadCommands; + + SymbolTable SymTable; + StringTable StrTable; + + RebaseInfo Rebases; + BindInfo Binds; + WeakBindInfo WeakBinds; + LazyBindInfo LazyBinds; + ExportInfo Exports; + IndirectSymbolTable IndirectSymTable; + LinkData DataInCode; + LinkData FunctionStarts; + + /// The index of LC_SYMTAB load command if present. + Optional<size_t> SymTabCommandIndex; + /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. + Optional<size_t> DyLdInfoCommandIndex; + /// The index LC_DYSYMTAB load comamnd if present. + Optional<size_t> DySymTabCommandIndex; + /// The index LC_DATA_IN_CODE load comamnd if present. + Optional<size_t> DataInCodeCommandIndex; + /// The index LC_FUNCTION_STARTS load comamnd if present. + Optional<size_t> FunctionStartsCommandIndex; +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_OBJECT_H |