diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/tools/llvm-objcopy/MachO | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/tools/llvm-objcopy/MachO')
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp | 103 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 248 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 120 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOReader.h | 3 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 100 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/MachOWriter.h | 2 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/Object.cpp | 94 | ||||
-rw-r--r-- | llvm/tools/llvm-objcopy/MachO/Object.h | 56 |
8 files changed, 544 insertions, 182 deletions
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp index 380f2e989fe4..256c830a44a4 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -17,7 +17,7 @@ namespace macho { uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { uint32_t Size = 0; - for (const auto &LC : O.LoadCommands) { + for (const LoadCommand &LC : O.LoadCommands) { const MachO::macho_load_command &MLC = LC.MachOLoadCommand; auto cmd = MLC.load_command_data.cmd; switch (cmd) { @@ -61,15 +61,16 @@ void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); // Make sure that nlist entries in the symbol table are sorted by the those // types. The order is: local < defined external < undefined external. - assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(), - [](const std::unique_ptr<SymbolEntry> &A, - const std::unique_ptr<SymbolEntry> &B) { - bool AL = A->isLocalSymbol(), BL = B->isLocalSymbol(); - if (AL != BL) - return AL; - return !AL && !A->isUndefinedSymbol() && - B->isUndefinedSymbol(); - }) && + assert(llvm::is_sorted(O.SymTable.Symbols, + [](const std::unique_ptr<SymbolEntry> &A, + const std::unique_ptr<SymbolEntry> &B) { + bool AL = A->isLocalSymbol(), + BL = B->isLocalSymbol(); + if (AL != BL) + return AL; + return !AL && !A->isUndefinedSymbol() && + B->isUndefinedSymbol(); + }) && "Symbols are not sorted by their types."); uint32_t NumLocalSymbols = 0; @@ -107,7 +108,7 @@ uint64_t MachOLayoutBuilder::layoutSegments() { const bool IsObjectFile = O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; - for (auto &LC : O.LoadCommands) { + for (LoadCommand &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; StringRef Segname; uint64_t SegmentVmAddr; @@ -142,30 +143,30 @@ uint64_t MachOLayoutBuilder::layoutSegments() { uint64_t SegOffset = Offset; uint64_t SegFileSize = 0; uint64_t VMSize = 0; - for (auto &Sec : LC.Sections) { + for (std::unique_ptr<Section> &Sec : LC.Sections) { + assert(SegmentVmAddr <= Sec->Addr && + "Section's address cannot be smaller than Segment's one"); + uint32_t SectOffset = Sec->Addr - SegmentVmAddr; if (IsObjectFile) { - if (Sec.isVirtualSection()) { - Sec.Offset = 0; + if (Sec->isVirtualSection()) { + Sec->Offset = 0; } else { uint64_t PaddingSize = - offsetToAlignment(SegFileSize, Align(1ull << Sec.Align)); - Sec.Offset = SegOffset + SegFileSize + PaddingSize; - Sec.Size = Sec.Content.size(); - SegFileSize += PaddingSize + Sec.Size; + offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); + Sec->Offset = SegOffset + SegFileSize + PaddingSize; + Sec->Size = Sec->Content.size(); + SegFileSize += PaddingSize + Sec->Size; } - VMSize = std::max(VMSize, Sec.Addr + Sec.Size); } else { - if (Sec.isVirtualSection()) { - Sec.Offset = 0; - VMSize += Sec.Size; + if (Sec->isVirtualSection()) { + Sec->Offset = 0; } else { - uint32_t SectOffset = Sec.Addr - SegmentVmAddr; - Sec.Offset = SegOffset + SectOffset; - Sec.Size = Sec.Content.size(); - SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size); - VMSize = std::max(VMSize, SegFileSize); + Sec->Offset = SegOffset + SectOffset; + Sec->Size = Sec->Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); } } + VMSize = std::max(VMSize, SectOffset + Sec->Size); } if (IsObjectFile) { @@ -204,21 +205,33 @@ uint64_t MachOLayoutBuilder::layoutSegments() { } uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { - for (auto &LC : O.LoadCommands) - for (auto &Sec : LC.Sections) { - Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; - Sec.NReloc = Sec.Relocations.size(); - Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) { + Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; + Sec->NReloc = Sec->Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; } return Offset; } Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // If we are building the layout of an executable or dynamic library + // which does not have any segments other than __LINKEDIT, + // the Offset can be equal to zero by this time. It happens because of the + // convention that in such cases the file offsets specified by LC_SEGMENT + // start with zero (unlike the case of a relocatable object file). + const uint64_t HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || + Offset >= HeaderSize + O.Header.SizeOfCmds) && + "Incorrect tail offset"); + Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); + // The order of LINKEDIT elements is as follows: // rebase info, binding info, weak binding info, lazy binding info, export // trie, data-in-code, symbol table, indirect symbol table, symbol table - // strings. + // strings, code signature. uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); uint64_t StartOfLinkEdit = Offset; uint64_t StartOfRebaseInfo = StartOfLinkEdit; @@ -237,8 +250,10 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { uint64_t StartOfSymbolStrings = StartOfIndirectSymbols + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t StartOfCodeSignature = + StartOfSymbolStrings + StrTableBuilder.getSize(); uint64_t LinkEditSize = - (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit; + (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit; // Now we have determined the layout of the contents of the __LINKEDIT // segment. Update its load command. @@ -260,10 +275,14 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { } } - for (auto &LC : O.LoadCommands) { + for (LoadCommand &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; auto cmd = MLC.load_command_data.cmd; switch (cmd) { + case MachO::LC_CODE_SIGNATURE: + MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; + MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size(); + break; case MachO::LC_SYMTAB: MLC.symtab_command_data.symoff = StartOfSymbols; MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); @@ -314,6 +333,19 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { O.Exports.Trie.empty() ? 0 : StartOfExportTrie; MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); break; + // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in + // <mach-o/loader.h> is not an offset in the binary file, instead, it is a + // relative virtual address. At the moment modification of the __TEXT + // segment of executables isn't supported anyway (e.g. data in code entries + // are not recalculated). Moreover, in general + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because + // without making additional assumptions (e.g. that the entire __TEXT + // segment should be encrypted) we do not know how to recalculate the + // boundaries of the encrypted part. For now just copy over these load + // commands until we encounter a real world usecase where + // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. + case MachO::LC_ENCRYPTION_INFO: + case MachO::LC_ENCRYPTION_INFO_64: case MachO::LC_LOAD_DYLINKER: case MachO::LC_MAIN: case MachO::LC_RPATH: @@ -326,6 +358,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { case MachO::LC_BUILD_VERSION: case MachO::LC_ID_DYLIB: case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: case MachO::LC_UUID: case MachO::LC_SOURCE_VERSION: // Nothing to update. diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 4578d0bb75d4..5ca5b133572b 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -10,6 +10,7 @@ #include "../CopyConfig.h" #include "MachOReader.h" #include "MachOWriter.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -18,21 +19,44 @@ namespace objcopy { namespace macho { using namespace object; -using SectionPred = std::function<bool(const Section &Sec)>; +using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>; +using LoadCommandPred = std::function<bool(const LoadCommand &LC)>; + +#ifndef NDEBUG +static bool isLoadCommandWithPayloadString(const LoadCommand &LC) { + // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and + // LC_LAZY_LOAD_DYLIB + return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB; +} +#endif + +static StringRef getPayloadString(const LoadCommand &LC) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + return StringRef(reinterpret_cast<const char *>(LC.Payload.data()), + LC.Payload.size()) + .rtrim('\0'); +} -static void removeSections(const CopyConfig &Config, Object &Obj) { - SectionPred RemovePred = [](const Section &) { return false; }; +static Error removeSections(const CopyConfig &Config, Object &Obj) { + SectionPred RemovePred = [](const std::unique_ptr<Section> &) { + return false; + }; if (!Config.ToRemove.empty()) { - RemovePred = [&Config, RemovePred](const Section &Sec) { - return Config.ToRemove.matches(Sec.CanonicalName); + RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) { + return Config.ToRemove.matches(Sec->CanonicalName); }; } if (Config.StripAll || Config.StripDebug) { // Remove all debug sections. - RemovePred = [RemovePred](const Section &Sec) { - if (Sec.Segname == "__DWARF") + RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) { + if (Sec->Segname == "__DWARF") return true; return RemovePred(Sec); @@ -41,8 +65,8 @@ static void removeSections(const CopyConfig &Config, Object &Obj) { if (!Config.OnlySection.empty()) { // Overwrite RemovePred because --only-section takes priority. - RemovePred = [&Config](const Section &Sec) { - return !Config.OnlySection.matches(Sec.CanonicalName); + RemovePred = [&Config](const std::unique_ptr<Section> &Sec) { + return !Config.OnlySection.matches(Sec->CanonicalName); }; } @@ -60,41 +84,158 @@ static void updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { for (SymbolEntry &Sym : Obj.SymTable) { auto I = Config.SymbolsToRename.find(Sym.Name); if (I != Config.SymbolsToRename.end()) - Sym.Name = I->getValue(); + Sym.Name = std::string(I->getValue()); } - auto RemovePred = [Config](const std::unique_ptr<SymbolEntry> &N) { + auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) { if (N->Referenced) return false; - return Config.StripAll; + if (Config.StripAll) + return true; + if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) + return true; + // This behavior is consistent with cctools' strip. + if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) && + Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol()) + return true; + return false; }; Obj.SymTable.removeSymbols(RemovePred); } +template <typename LCType> +static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) { + assert(isLoadCommandWithPayloadString(LC) && + "unsupported load command encountered"); + + uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8); + + LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize; + LC.Payload.assign(NewCmdsize - sizeof(LCType), 0); + std::copy(S.begin(), S.end(), LC.Payload.begin()); +} + static LoadCommand buildRPathLoadCommand(StringRef Path) { LoadCommand LC; MachO::rpath_command RPathLC; RPathLC.cmd = MachO::LC_RPATH; RPathLC.path = sizeof(MachO::rpath_command); - RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size(), 8); + RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8); LC.MachOLoadCommand.rpath_command_data = RPathLC; LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0); std::copy(Path.begin(), Path.end(), LC.Payload.begin()); return LC; } +static Error processLoadCommands(const CopyConfig &Config, Object &Obj) { + // Remove RPaths. + DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(), + Config.RPathsToRemove.end()); + + LoadCommandPred RemovePred = [&RPathsToRemove](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) { + StringRef RPath = getPayloadString(LC); + if (RPathsToRemove.count(RPath)) { + RPathsToRemove.erase(RPath); + return true; + } + } + return false; + }; + + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + + // Emit an error if the Mach-O binary does not contain an rpath path name + // specified in -delete_rpath. + for (StringRef RPath : Config.RPathsToRemove) { + if (RPathsToRemove.count(RPath)) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: %s", + RPath.str().c_str()); + } + + DenseSet<StringRef> RPaths; + + // Get all existing RPaths. + for (LoadCommand &LC : Obj.LoadCommands) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) + RPaths.insert(getPayloadString(LC)); + } + + // Throw errors for invalid RPaths. + for (const auto &OldNew : Config.RPathsToUpdate) { + StringRef Old = OldNew.getFirst(); + StringRef New = OldNew.getSecond(); + if (RPaths.count(Old) == 0) + return createStringError(errc::invalid_argument, + "no LC_RPATH load command with path: " + Old); + if (RPaths.count(New) != 0) + return createStringError(errc::invalid_argument, + "rpath " + New + + " would create a duplicate load command"); + } + + // Update load commands. + for (LoadCommand &LC : Obj.LoadCommands) { + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_ID_DYLIB: + if (Config.SharedLibId) { + StringRef Id = Config.SharedLibId.getValue(); + if (Id.empty()) + return createStringError(errc::invalid_argument, + "cannot specify an empty id"); + updateLoadCommandPayloadString<MachO::dylib_command>(LC, Id); + } + break; + + case MachO::LC_RPATH: { + StringRef RPath = getPayloadString(LC); + StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath); + if (!NewRPath.empty()) + updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath); + break; + } + + // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB + // here once llvm-objcopy supports them. + case MachO::LC_LOAD_DYLIB: + case MachO::LC_LOAD_WEAK_DYLIB: + StringRef InstallName = getPayloadString(LC); + StringRef NewInstallName = + Config.InstallNamesToUpdate.lookup(InstallName); + if (!NewInstallName.empty()) + updateLoadCommandPayloadString<MachO::dylib_command>(LC, + NewInstallName); + break; + } + } + + // Add new RPaths. + for (StringRef RPath : Config.RPathToAdd) { + if (RPaths.count(RPath) != 0) + return createStringError(errc::invalid_argument, + "rpath " + RPath + + " would create a duplicate load command"); + RPaths.insert(RPath); + Obj.addLoadCommand(buildRPathLoadCommand(RPath)); + } + + return Error::success(); +} + static Error dumpSectionToFile(StringRef SecName, StringRef Filename, Object &Obj) { for (LoadCommand &LC : Obj.LoadCommands) - for (Section &Sec : LC.Sections) { - if (Sec.CanonicalName == SecName) { + for (const std::unique_ptr<Section> &Sec : LC.Sections) { + if (Sec->CanonicalName == SecName) { Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(Filename, Sec.Content.size()); + FileOutputBuffer::create(Filename, Sec->Content.size()); if (!BufferOrErr) return BufferOrErr.takeError(); std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr); - llvm::copy(Sec.Content, Buf->getBufferStart()); + llvm::copy(Sec->Content, Buf->getBufferStart()); if (Error E = Buf->commit()) return E; @@ -122,7 +263,7 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) { for (LoadCommand &LC : Obj.LoadCommands) { Optional<StringRef> SegName = LC.getSegmentName(); if (SegName && SegName == TargetSegName) { - LC.Sections.push_back(Sec); + LC.Sections.push_back(std::make_unique<Section>(Sec)); return Error::success(); } } @@ -130,7 +271,7 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) { // There's no segment named TargetSegName. Create a new load command and // Insert a new section into it. LoadCommand &NewSegment = Obj.addSegment(TargetSegName); - NewSegment.Sections.push_back(Sec); + NewSegment.Sections.push_back(std::make_unique<Section>(Sec)); return Error::success(); } @@ -167,17 +308,27 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { !Config.SectionsToRename.empty() || !Config.UnneededSymbolsToRemove.empty() || !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripAllGNU || Config.StripDWO || - Config.StripNonAlloc || Config.StripSections || Config.Weaken || - Config.DecompressDebugSections || Config.StripNonAlloc || - Config.StripSections || Config.StripUnneeded || - Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() || - Config.EntryExpr) { + Config.ExtractDWO || Config.LocalizeHidden || Config.PreserveDates || + Config.StripAllGNU || Config.StripDWO || Config.StripNonAlloc || + Config.StripSections || Config.Weaken || Config.DecompressDebugSections || + Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded || + Config.DiscardMode == DiscardType::Locals || + !Config.SymbolsToAdd.empty() || Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, "option not supported by llvm-objcopy for MachO"); } - removeSections(Config, Obj); + + // Dump sections before add/remove for compatibility with GNU objcopy. + for (StringRef Flag : Config.DumpSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = dumpSectionToFile(SectionName, FileName, Obj)) + return E; + } + + if (Error E = removeSections(Config, Obj)) + return E; // Mark symbols to determine which symbols are still needed. if (Config.StripAll) @@ -187,16 +338,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { if (Config.StripAll) for (LoadCommand &LC : Obj.LoadCommands) - for (Section &Sec : LC.Sections) - Sec.Relocations.clear(); - - for (const StringRef &Flag : Config.DumpSection) { - std::pair<StringRef, StringRef> SecPair = Flag.split("="); - StringRef SecName = SecPair.first; - StringRef File = SecPair.second; - if (Error E = dumpSectionToFile(SecName, File, Obj)) - return E; - } + for (std::unique_ptr<Section> &Sec : LC.Sections) + Sec->Relocations.clear(); for (const auto &Flag : Config.AddSection) { std::pair<StringRef, StringRef> SecPair = Flag.split("="); @@ -208,19 +351,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return E; } - for (StringRef RPath : Config.RPathToAdd) { - for (LoadCommand &LC : Obj.LoadCommands) { - if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH && - RPath == StringRef(reinterpret_cast<char *>(LC.Payload.data()), - LC.Payload.size()) - .trim(0)) { - return createStringError(errc::invalid_argument, - "rpath " + RPath + - " would create a duplicate load command"); - } - } - Obj.addLoadCommand(buildRPathLoadCommand(RPath)); - } + if (Error E = processLoadCommands(Config, Obj)) + return E; + return Error::success(); } @@ -237,9 +370,18 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, if (Error E = handleArgs(Config, *O)) return createFileError(Config.InputFilename, std::move(E)); - // TODO: Support 16KB pages which are employed in iOS arm64 binaries: - // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb - const uint64_t PageSize = 4096; + // Page size used for alignment of segment sizes in Mach-O executables and + // dynamic libraries. + uint64_t PageSize; + switch (In.getArch()) { + case Triple::ArchType::arm: + case Triple::ArchType::aarch64: + case Triple::ArchType::aarch64_32: + PageSize = 16384; + break; + default: + PageSize = 4096; + } MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); if (auto E = Writer.finalize()) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index 46bb11727322..99bcec7f6b51 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -28,10 +28,11 @@ void MachOReader::readHeader(Object &O) const { } template <typename SectionType> -Section constructSectionCommon(SectionType Sec) { +Section constructSectionCommon(SectionType Sec, uint32_t Index) { StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); Section S(SegName, SectName); + S.Index = Index; S.Addr = Sec.addr; S.Size = Sec.size; S.Offset = Sec.offset; @@ -45,39 +46,42 @@ Section constructSectionCommon(SectionType Sec) { return S; } -template <typename SectionType> Section constructSection(SectionType Sec); +template <typename SectionType> +Section constructSection(SectionType Sec, uint32_t Index); -template <> Section constructSection(MachO::section Sec) { - return constructSectionCommon(Sec); +template <> Section constructSection(MachO::section Sec, uint32_t Index) { + return constructSectionCommon(Sec, Index); } -template <> Section constructSection(MachO::section_64 Sec) { - Section S = constructSectionCommon(Sec); +template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) { + Section S = constructSectionCommon(Sec, Index); S.Reserved3 = Sec.reserved3; return S; } // TODO: get rid of reportError and make MachOReader return Expected<> instead. template <typename SectionType, typename SegmentType> -std::vector<Section> +std::vector<std::unique_ptr<Section>> extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, const object::MachOObjectFile &MachOObj, - size_t &NextSectionIndex) { + uint32_t &NextSectionIndex) { auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; const SectionType *Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); - std::vector<Section> Sections; + std::vector<std::unique_ptr<Section>> Sections; for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { SectionType Sec; memcpy((void *)&Sec, Curr, sizeof(SectionType)); MachO::swapStruct(Sec); - Sections.push_back(constructSection(Sec)); + Sections.push_back( + std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); } else { - Sections.push_back(constructSection(*Curr)); + Sections.push_back( + std::make_unique<Section>(constructSection(*Curr, NextSectionIndex))); } - Section &S = Sections.back(); + Section &S = *Sections.back(); Expected<object::SectionRef> SecRef = MachOObj.getSection(NextSectionIndex++); @@ -99,6 +103,7 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, R.Symbol = nullptr; // We'll fill this field later. R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); R.Scattered = MachOObj.isRelocationScattered(R.Info); + R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); S.Relocations.push_back(R); } @@ -110,10 +115,13 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, void MachOReader::readLoadCommands(Object &O) const { // For MachO sections indices start from 1. - size_t NextSectionIndex = 1; + uint32_t NextSectionIndex = 1; for (auto LoadCmd : MachOObj.load_commands()) { LoadCommand LC; switch (LoadCmd.C.cmd) { + case MachO::LC_CODE_SIGNATURE: + O.CodeSignatureCommandIndex = O.LoadCommands.size(); + break; case MachO::LC_SEGMENT: LC.Sections = extractSections<MachO::section, MachO::segment_command>( LoadCmd, MachOObj, NextSectionIndex); @@ -189,24 +197,36 @@ void MachOReader::readSymbolTable(Object &O) const { for (auto Symbol : MachOObj.symbols()) { SymbolEntry SE = (MachOObj.is64Bit() - ? constructSymbolEntry( - StrTable, - MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) - : constructSymbolEntry( - StrTable, - MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); + ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( + Symbol.getRawDataRefImpl())) + : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( + Symbol.getRawDataRefImpl()))); O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); } } void MachOReader::setSymbolInRelocationInfo(Object &O) const { + std::vector<const Section *> Sections; for (auto &LC : O.LoadCommands) - for (auto &Sec : LC.Sections) - for (auto &Reloc : Sec.Relocations) + for (std::unique_ptr<Section> &Sec : LC.Sections) + Sections.push_back(Sec.get()); + + for (LoadCommand &LC : O.LoadCommands) + for (std::unique_ptr<Section> &Sec : LC.Sections) + for (auto &Reloc : Sec->Relocations) if (!Reloc.Scattered) { - auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info); - Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum); + const uint32_t SymbolNum = + Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); + if (Reloc.Extern) { + Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); + } else { + // FIXME: Refactor error handling in MachOReader and report an error + // if we encounter an invalid relocation. + assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && + "Invalid section index."); + Reloc.Sec = Sections[SymbolNum - 1]; + } } } @@ -230,26 +250,26 @@ void MachOReader::readExportInfo(Object &O) const { O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); } -void MachOReader::readDataInCodeData(Object &O) const { - if (!O.DataInCodeCommandIndex) +void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex, + LinkData &LD) const { + if (!LCIndex) return; - const MachO::linkedit_data_command &LDC = - O.LoadCommands[*O.DataInCodeCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; + const MachO::linkedit_data_command &LC = + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; + LD.Data = + arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); +} - O.DataInCode.Data = arrayRefFromStringRef( - MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +void MachOReader::readCodeSignature(Object &O) const { + return readLinkData(O, O.CodeSignatureCommandIndex, O.CodeSignature); } -void MachOReader::readFunctionStartsData(Object &O) const { - if (!O.FunctionStartsCommandIndex) - return; - const MachO::linkedit_data_command &LDC = - O.LoadCommands[*O.FunctionStartsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; +void MachOReader::readDataInCodeData(Object &O) const { + return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); +} - O.FunctionStarts.Data = arrayRefFromStringRef( - MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +void MachOReader::readFunctionStartsData(Object &O) const { + return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); } void MachOReader::readIndirectSymbolTable(Object &O) const { @@ -266,6 +286,28 @@ void MachOReader::readIndirectSymbolTable(Object &O) const { } } +void MachOReader::readSwiftVersion(Object &O) const { + struct ObjCImageInfo { + uint32_t Version; + uint32_t Flags; + } ImageInfo; + + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) + if (Sec->Sectname == "__objc_imageinfo" && + (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || + Sec->Segname == "__DATA_DIRTY") && + Sec->Content.size() >= sizeof(ObjCImageInfo)) { + memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(ImageInfo.Version); + sys::swapByteOrder(ImageInfo.Flags); + } + O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; + return; + } +} + std::unique_ptr<Object> MachOReader::create() const { auto Obj = std::make_unique<Object>(); readHeader(*Obj); @@ -277,9 +319,11 @@ std::unique_ptr<Object> MachOReader::create() const { readWeakBindInfo(*Obj); readLazyBindInfo(*Obj); readExportInfo(*Obj); + readCodeSignature(*Obj); readDataInCodeData(*Obj); readFunctionStartsData(*Obj); readIndirectSymbolTable(*Obj); + readSwiftVersion(*Obj); return Obj; } diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h index 00c8f0d55f61..65824b6eb389 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -36,9 +36,12 @@ class MachOReader : public Reader { void readWeakBindInfo(Object &O) const; void readLazyBindInfo(Object &O) const; void readExportInfo(Object &O) const; + void readLinkData(Object &O, Optional<size_t> LCIndex, LinkData &LD) const; + void readCodeSignature(Object &O) const; void readDataInCodeData(Object &O) const; void readFunctionStartsData(Object &O) const; void readIndirectSymbolTable(Object &O) const; + void readSwiftVersion(Object &O) const; public: explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp index 0d9590612eca..3c41e73b2b01 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -89,6 +89,15 @@ size_t MachOWriter::totalSize() const { sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); } + if (O.CodeSignatureCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.CodeSignatureCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + if (O.DataInCodeCommandIndex) { const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*O.DataInCodeCommandIndex] @@ -110,12 +119,12 @@ size_t MachOWriter::totalSize() const { } // Otherwise, use the last section / reloction. - for (const auto &LC : O.LoadCommands) - for (const auto &S : LC.Sections) { - Ends.push_back(S.Offset + S.Size); - if (S.RelOff) - Ends.push_back(S.RelOff + - S.NReloc * sizeof(MachO::any_relocation_info)); + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &S : LC.Sections) { + Ends.push_back(S->Offset + S->Size); + if (S->RelOff) + Ends.push_back(S->RelOff + + S->NReloc * sizeof(MachO::any_relocation_info)); } if (!Ends.empty()) @@ -147,7 +156,7 @@ void MachOWriter::writeHeader() { void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); - for (const auto &LC : O.LoadCommands) { + for (const LoadCommand &LC : O.LoadCommands) { // Construct a load command. MachO::macho_load_command MLC = LC.MachOLoadCommand; switch (MLC.load_command_data.cmd) { @@ -157,8 +166,8 @@ void MachOWriter::writeLoadCommands() { memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command)); Begin += sizeof(MachO::segment_command); - for (const auto &Sec : LC.Sections) - writeSectionInLoadCommand<MachO::section>(Sec, Begin); + for (const std::unique_ptr<Section> &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section>(*Sec, Begin); continue; case MachO::LC_SEGMENT_64: if (IsLittleEndian != sys::IsLittleEndianHost) @@ -167,8 +176,8 @@ void MachOWriter::writeLoadCommands() { sizeof(MachO::segment_command_64)); Begin += sizeof(MachO::segment_command_64); - for (const auto &Sec : LC.Sections) - writeSectionInLoadCommand<MachO::section_64>(Sec, Begin); + for (const std::unique_ptr<Section> &Sec : LC.Sections) + writeSectionInLoadCommand<MachO::section_64>(*Sec, Begin); continue; } @@ -229,27 +238,27 @@ void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) { } void MachOWriter::writeSections() { - for (const auto &LC : O.LoadCommands) - for (const auto &Sec : LC.Sections) { - if (Sec.isVirtualSection()) + for (const LoadCommand &LC : O.LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) { + if (Sec->isVirtualSection()) continue; - assert(Sec.Offset && "Section offset can not be zero"); - assert((Sec.Size == Sec.Content.size()) && "Incorrect section size"); - memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(), - Sec.Content.size()); - for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) { - auto RelocInfo = Sec.Relocations[Index]; + assert(Sec->Offset && "Section offset can not be zero"); + assert((Sec->Size == Sec->Content.size()) && "Incorrect section size"); + memcpy(B.getBufferStart() + Sec->Offset, Sec->Content.data(), + Sec->Content.size()); + for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { + RelocationInfo RelocInfo = Sec->Relocations[Index]; if (!RelocInfo.Scattered) { - auto *Info = - reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info); - Info->r_symbolnum = RelocInfo.Symbol->Index; + const uint32_t SymbolNum = RelocInfo.Extern + ? (*RelocInfo.Symbol)->Index + : (*RelocInfo.Sec)->Index; + RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); } - if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct( reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info)); - memcpy(B.getBufferStart() + Sec.RelOff + + memcpy(B.getBufferStart() + Sec->RelOff + Index * sizeof(MachO::any_relocation_info), &RelocInfo.Info, sizeof(RelocInfo.Info)); } @@ -381,28 +390,27 @@ void MachOWriter::writeIndirectSymbolTable() { } } -void MachOWriter::writeDataInCodeData() { - if (!O.DataInCodeCommandIndex) +void MachOWriter::writeLinkData(Optional<size_t> LCIndex, const LinkData &LD) { + if (!LCIndex) return; const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.DataInCodeCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; + O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; - assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && - "Incorrect data in code data size"); - memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); + assert((LinkEditDataCommand.datasize == LD.Data.size()) && + "Incorrect data size"); + memcpy(Out, LD.Data.data(), LD.Data.size()); +} + +void MachOWriter::writeCodeSignatureData() { + return writeLinkData(O.CodeSignatureCommandIndex, O.CodeSignature); +} + +void MachOWriter::writeDataInCodeData() { + return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode); } void MachOWriter::writeFunctionStartsData() { - if (!O.FunctionStartsCommandIndex) - return; - const MachO::linkedit_data_command &LinkEditDataCommand = - O.LoadCommands[*O.FunctionStartsCommandIndex] - .MachOLoadCommand.linkedit_data_command_data; - char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; - assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && - "Incorrect function starts data size"); - memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); + return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts); } void MachOWriter::writeTail() { @@ -450,6 +458,16 @@ void MachOWriter::writeTail() { &MachOWriter::writeIndirectSymbolTable); } + if (O.CodeSignatureCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.CodeSignatureCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeCodeSignatureData); + } + if (O.DataInCodeCommandIndex) { const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*O.DataInCodeCommandIndex] diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h index 22abbad56f41..c2c6f5a55e9a 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -45,6 +45,8 @@ class MachOWriter { void writeLazyBindInfo(); void writeExportInfo(); void writeIndirectSymbolTable(); + void writeLinkData(Optional<size_t> LCIndex, const LinkData &LD); + void writeCodeSignatureData(); void writeDataInCodeData(); void writeFunctionStartsData(); void writeTail(); diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp index d3b4fdc2f633..de8cb0af108d 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -1,5 +1,15 @@ +//===- Object.cpp - Mach-O object file model --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "Object.h" #include "../llvm-objcopy.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <unordered_set> namespace llvm { namespace objcopy { @@ -22,11 +32,83 @@ void SymbolTable::removeSymbols( std::end(Symbols)); } -void Object::removeSections(function_ref<bool(const Section &)> ToRemove) { - for (LoadCommand &LC : LoadCommands) - LC.Sections.erase(std::remove_if(std::begin(LC.Sections), - std::end(LC.Sections), ToRemove), - std::end(LC.Sections)); +void Object::updateLoadCommandIndexes() { + // Update indices of special load commands + for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { + LoadCommand &LC = LoadCommands[Index]; + switch (LC.MachOLoadCommand.load_command_data.cmd) { + case MachO::LC_SYMTAB: + SymTabCommandIndex = Index; + break; + case MachO::LC_DYSYMTAB: + DySymTabCommandIndex = Index; + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + DyLdInfoCommandIndex = Index; + break; + case MachO::LC_DATA_IN_CODE: + DataInCodeCommandIndex = Index; + break; + case MachO::LC_FUNCTION_STARTS: + FunctionStartsCommandIndex = Index; + break; + } + } +} + +Error Object::removeLoadCommands( + function_ref<bool(const LoadCommand &)> ToRemove) { + auto It = std::stable_partition( + LoadCommands.begin(), LoadCommands.end(), + [&](const LoadCommand &LC) { return !ToRemove(LC); }); + LoadCommands.erase(It, LoadCommands.end()); + + updateLoadCommandIndexes(); + return Error::success(); +} + +Error Object::removeSections( + function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { + DenseMap<uint32_t, const Section *> OldIndexToSection; + uint32_t NextSectionIndex = 1; + for (LoadCommand &LC : LoadCommands) { + auto It = std::stable_partition( + std::begin(LC.Sections), std::end(LC.Sections), + [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); + for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { + OldIndexToSection[(*I)->Index] = I->get(); + (*I)->Index = NextSectionIndex++; + } + LC.Sections.erase(It, LC.Sections.end()); + } + + auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { + Optional<uint32_t> Section = S->section(); + return (Section && !OldIndexToSection.count(*Section)); + }; + + SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; + for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) + if (IsDead(Sym)) + DeadSymbols.insert(Sym.get()); + + for (const LoadCommand &LC : LoadCommands) + for (const std::unique_ptr<Section> &Sec : LC.Sections) + for (const RelocationInfo &R : Sec->Relocations) + if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) + return createStringError(std::errc::invalid_argument, + "symbol '%s' defined in section with index " + "'%u' cannot be removed because it is " + "referenced by a relocation in section '%s'", + (*R.Symbol)->Name.c_str(), + *((*R.Symbol)->section()), + Sec->CanonicalName.c_str()); + SymTable.removeSymbols(IsDead); + for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) + if (S->section()) + S->n_sect = OldIndexToSection[S->n_sect]->Index; + return Error::success(); } void Object::addLoadCommand(LoadCommand LC) { @@ -52,7 +134,7 @@ LoadCommand &Object::addSegment(StringRef SegName) { constructSegment(LC.MachOLoadCommand.segment_command_data, MachO::LC_SEGMENT, SegName); - LoadCommands.push_back(LC); + LoadCommands.push_back(std::move(LC)); return LoadCommands.back(); } diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h index dc2606eefa4a..e825d1867b09 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -37,6 +37,7 @@ struct MachHeader { struct RelocationInfo; struct Section { + uint32_t Index; std::string Segname; std::string Sectname; // CanonicalName is a string formatted as “<Segname>,<Sectname>". @@ -55,11 +56,11 @@ struct Section { std::vector<RelocationInfo> Relocations; Section(StringRef SegName, StringRef SectName) - : Segname(SegName), Sectname(SectName), + : Segname(std::string(SegName)), Sectname(std::string(SectName)), CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} Section(StringRef SegName, StringRef SectName, StringRef Content) - : Segname(SegName), Sectname(SectName), + : Segname(std::string(SegName)), Sectname(std::string(SectName)), CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), Content(Content) {} @@ -83,13 +84,13 @@ struct LoadCommand { // The raw content of the payload of the load command (located right after the // corresponding struct). In some cases it is either empty or can be // copied-over without digging into its structure. - std::vector<uint8_t> Payload; + std::vector<uint8_t> Payload; // Some load commands can contain (inside the payload) an array of sections, // though the contents of the sections are stored separately. The struct // Section describes only sections' metadata and where to find the // corresponding content inside the binary. - std::vector<Section> Sections; + std::vector<std::unique_ptr<Section>> Sections; // Returns the segment name if the load command is a segment command. Optional<StringRef> getSegmentName() const; @@ -106,15 +107,22 @@ struct SymbolEntry { uint16_t n_desc; uint64_t n_value; - bool isExternalSymbol() const { - return n_type & ((MachO::N_EXT | MachO::N_PEXT)); - } + bool isExternalSymbol() const { return n_type & MachO::N_EXT; } bool isLocalSymbol() const { return !isExternalSymbol(); } bool isUndefinedSymbol() const { return (n_type & MachO::N_TYPE) == MachO::N_UNDF; } + + bool isSwiftSymbol() const { + return StringRef(Name).startswith("_$s") || + StringRef(Name).startswith("_$S"); + } + + Optional<uint32_t> section() const { + return n_sect == MachO::NO_SECT ? None : Optional<uint32_t>(n_sect); + } }; /// The location of the symbol table inside the binary is described by LC_SYMTAB @@ -157,10 +165,29 @@ struct StringTable { }; struct RelocationInfo { - const SymbolEntry *Symbol; + // The referenced symbol entry. Set if !Scattered && Extern. + Optional<const SymbolEntry *> Symbol; + // The referenced section. Set if !Scattered && !Extern. + Optional<const Section *> Sec; // True if Info is a scattered_relocation_info. bool Scattered; + // True if the r_symbolnum points to a section number (i.e. r_extern=0). + bool Extern; MachO::any_relocation_info Info; + + unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) { + if (IsLittleEndian) + return Info.r_word1 & 0xffffff; + return Info.r_word1 >> 8; + } + + void setPlainRelocationSymbolNum(unsigned SymbolNum, bool IsLittleEndian) { + assert(SymbolNum < (1 << 24) && "SymbolNum out of range"); + if (IsLittleEndian) + Info.r_word1 = (Info.r_word1 & ~0x00ffffff) | SymbolNum; + else + Info.r_word1 = (Info.r_word1 & ~0xffffff00) | (SymbolNum << 8); + } }; /// The location of the rebase info inside the binary is described by @@ -275,7 +302,12 @@ struct Object { IndirectSymbolTable IndirectSymTable; LinkData DataInCode; LinkData FunctionStarts; + LinkData CodeSignature; + + Optional<uint32_t> SwiftVersion; + /// The index of LC_CODE_SIGNATURE load command if present. + Optional<size_t> CodeSignatureCommandIndex; /// The index of LC_SYMTAB load command if present. Optional<size_t> SymTabCommandIndex; /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. @@ -292,7 +324,13 @@ struct Object { Object() : NewSectionsContents(Alloc) {} - void removeSections(function_ref<bool(const Section &)> ToRemove); + Error + removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove); + + Error removeLoadCommands(function_ref<bool(const LoadCommand &)> ToRemove); + + void updateLoadCommandIndexes(); + void addLoadCommand(LoadCommand LC); /// Creates a new segment load command in the object and returns a reference |