diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Object/ArchiveWriter.cpp | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) |
Notes
Diffstat (limited to 'lib/Object/ArchiveWriter.cpp')
-rw-r--r-- | lib/Object/ArchiveWriter.cpp | 188 |
1 files changed, 144 insertions, 44 deletions
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index ea17b2220a0b..da93602cbb28 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -27,6 +27,8 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" +#include <map> + #if !defined(_MSC_VER) && !defined(__MINGW32__) #include <unistd.h> #else @@ -119,6 +121,11 @@ static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { OS.indent(Size - SizeSoFar); } +static bool isDarwin(object::Archive::Kind Kind) { + return Kind == object::Archive::K_DARWIN || + Kind == object::Archive::K_DARWIN64; +} + static bool isBSDLike(object::Archive::Kind Kind) { switch (Kind) { case object::Archive::K_GNU: @@ -126,8 +133,8 @@ static bool isBSDLike(object::Archive::Kind Kind) { return false; case object::Archive::K_BSD: case object::Archive::K_DARWIN: - return true; case object::Archive::K_DARWIN64: + return true; case object::Archive::K_COFF: break; } @@ -243,20 +250,33 @@ static void addToStringTable(raw_ostream &Out, StringRef ArcName, static void printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, + StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind, bool Thin, StringRef ArcName, const NewArchiveMember &M, + sys::TimePoint<std::chrono::seconds> ModTime, unsigned Size) { + if (isBSDLike(Kind)) - return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID, + return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, M.Perms, Size); if (!useStringTable(Thin, M.MemberName)) - return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID, + return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, M.Perms, Size); Out << '/'; - uint64_t NamePos = StringTable.tell(); - addToStringTable(StringTable, ArcName, M, Thin); + uint64_t NamePos; + if (Thin) { + NamePos = StringTable.tell(); + addToStringTable(StringTable, ArcName, M, Thin); + } else { + auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); + if (Insertion.second) { + Insertion.first->second = StringTable.tell(); + addToStringTable(StringTable, ArcName, M, Thin); + } + NamePos = Insertion.first->second; + } printWithSpacePadding(Out, NamePos, 15); - printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size); + printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size); } namespace { @@ -310,7 +330,9 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, bool Deterministic, ArrayRef<MemberData> Members, StringRef StringTable) { - if (StringTable.empty()) + // We don't write a symbol table on an archive with no members -- except on + // Darwin, where the linker will abort unless the archive has a symbol table. + if (StringTable.empty() && !isDarwin(Kind)) return; unsigned NumSyms = 0; @@ -318,15 +340,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, NumSyms += M.Symbols.size(); unsigned Size = 0; - Size += is64BitKind(Kind) ? 8 : 4; // Number of entries + unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t); + + Size += OffsetSize; // Number of entries if (isBSDLike(Kind)) - Size += NumSyms * 8; // Table - else if (is64BitKind(Kind)) - Size += NumSyms * 8; // Table + Size += NumSyms * OffsetSize * 2; // Table else - Size += NumSyms * 4; // Table + Size += NumSyms * OffsetSize; // Table if (isBSDLike(Kind)) - Size += 4; // byte count + Size += OffsetSize; // byte count Size += StringTable.size(); // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding @@ -336,25 +358,26 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, unsigned Pad = OffsetToAlignment(Size, Alignment); Size += Pad; - if (isBSDLike(Kind)) - printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0, - 0, Size); - else if (is64BitKind(Kind)) - printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size); - else - printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size); + if (isBSDLike(Kind)) { + const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; + printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, + Size); + } else { + const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; + printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); + } uint64_t Pos = Out.tell() + Size; if (isBSDLike(Kind)) - print<uint32_t>(Out, Kind, NumSyms * 8); + printNBits(Out, Kind, NumSyms * 2 * OffsetSize); else printNBits(Out, Kind, NumSyms); for (const MemberData &M : Members) { for (unsigned StringOffset : M.Symbols) { if (isBSDLike(Kind)) - print<uint32_t>(Out, Kind, StringOffset); + printNBits(Out, Kind, StringOffset); printNBits(Out, Kind, Pos); // member offset } Pos += M.Header.size() + M.Data.size() + M.Padding.size(); @@ -362,7 +385,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, if (isBSDLike(Kind)) // byte count of the string table - print<uint32_t>(Out, Kind, StringTable.size()); + printNBits(Out, Kind, StringTable.size()); Out << StringTable; while (Pad--) @@ -372,20 +395,32 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, static Expected<std::vector<unsigned>> getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { std::vector<unsigned> Ret; - LLVMContext Context; - Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr = - object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown, - &Context); - if (!ObjOrErr) { - // FIXME: check only for "not an object file" errors. - consumeError(ObjOrErr.takeError()); - return Ret; + // In the scenario when LLVMContext is populated SymbolicFile will contain a + // reference to it, thus SymbolicFile should be destroyed first. + LLVMContext Context; + std::unique_ptr<object::SymbolicFile> Obj; + if (identify_magic(Buf.getBuffer()) == file_magic::bitcode) { + auto ObjOrErr = object::SymbolicFile::createSymbolicFile( + Buf, file_magic::bitcode, &Context); + if (!ObjOrErr) { + // FIXME: check only for "not an object file" errors. + consumeError(ObjOrErr.takeError()); + return Ret; + } + Obj = std::move(*ObjOrErr); + } else { + auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); + if (!ObjOrErr) { + // FIXME: check only for "not an object file" errors. + consumeError(ObjOrErr.takeError()); + return Ret; + } + Obj = std::move(*ObjOrErr); } HasObject = true; - object::SymbolicFile &Obj = *ObjOrErr.get(); - for (const object::BasicSymbolRef &S : Obj.symbols()) { + for (const object::BasicSymbolRef &S : Obj->symbols()) { if (!isArchiveSymbol(S)) continue; Ret.push_back(SymNames.tell()); @@ -399,7 +434,7 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { static Expected<std::vector<MemberData>> computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, StringRef ArcName, - ArrayRef<NewArchiveMember> NewMembers) { + bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; // This ignores the symbol table, but we only need the value mod 8 and the @@ -408,6 +443,62 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, std::vector<MemberData> Ret; bool HasObject = false; + + // Deduplicate long member names in the string table and reuse earlier name + // offsets. This especially saves space for COFF Import libraries where all + // members have the same name. + StringMap<uint64_t> MemberNames; + + // UniqueTimestamps is a special case to improve debugging on Darwin: + // + // The Darwin linker does not link debug info into the final + // binary. Instead, it emits entries of type N_OSO in in the output + // binary's symbol table, containing references to the linked-in + // object files. Using that reference, the debugger can read the + // debug data directly from the object files. Alternatively, an + // invocation of 'dsymutil' will link the debug data from the object + // files into a dSYM bundle, which can be loaded by the debugger, + // instead of the object files. + // + // For an object file, the N_OSO entries contain the absolute path + // path to the file, and the file's timestamp. For an object + // included in an archive, the path is formatted like + // "/absolute/path/to/archive.a(member.o)", and the timestamp is the + // archive member's timestamp, rather than the archive's timestamp. + // + // However, this doesn't always uniquely identify an object within + // an archive -- an archive file can have multiple entries with the + // same filename. (This will happen commonly if the original object + // files started in different directories.) The only way they get + // distinguished, then, is via the timestamp. But this process is + // unable to find the correct object file in the archive when there + // are two files of the same name and timestamp. + // + // Additionally, timestamp==0 is treated specially, and causes the + // timestamp to be ignored as a match criteria. + // + // That will "usually" work out okay when creating an archive not in + // deterministic timestamp mode, because the objects will probably + // have been created at different timestamps. + // + // To ameliorate this problem, in deterministic archive mode (which + // is the default), on Darwin we will emit a unique non-zero + // timestamp for each entry with a duplicated name. This is still + // deterministic: the only thing affecting that timestamp is the + // order of the files in the resultant archive. + // + // See also the functions that handle the lookup: + // in lldb: ObjectContainerBSDArchive::Archive::FindObject() + // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). + bool UniqueTimestamps = Deterministic && isDarwin(Kind); + std::map<StringRef, unsigned> FilenameCount; + if (UniqueTimestamps) { + for (const NewArchiveMember &M : NewMembers) + FilenameCount[M.MemberName]++; + for (auto &Entry : FilenameCount) + Entry.second = Entry.second > 1 ? 1 : 0; + } + for (const NewArchiveMember &M : NewMembers) { std::string Header; raw_string_ostream Out(Header); @@ -419,14 +510,19 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. This matches the behaviour with cctools and ensures that ld64 // is happy with archives that we generate. - unsigned MemberPadding = Kind == object::Archive::K_DARWIN - ? OffsetToAlignment(Data.size(), 8) - : 0; + unsigned MemberPadding = + isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0; unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); - printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M, - Buf.getBufferSize() + MemberPadding); + sys::TimePoint<std::chrono::seconds> ModTime; + if (UniqueTimestamps) + // Increment timestamp for each file of a given name. + ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); + else + ModTime = M.ModTime; + printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName, + M, ModTime, Buf.getBufferSize() + MemberPadding); Out.flush(); Expected<std::vector<unsigned>> Symbols = @@ -457,8 +553,8 @@ Error llvm::writeArchive(StringRef ArcName, SmallString<0> StringTableBuf; raw_svector_ostream StringTable(StringTableBuf); - Expected<std::vector<MemberData>> DataOrErr = - computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers); + Expected<std::vector<MemberData>> DataOrErr = computeMemberData( + StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers); if (Error E = DataOrErr.takeError()) return E; std::vector<MemberData> &Data = *DataOrErr; @@ -470,7 +566,7 @@ Error llvm::writeArchive(StringRef ArcName, if (WriteSymtab) { uint64_t MaxOffset = 0; uint64_t LastOffset = MaxOffset; - for (const auto& M : Data) { + for (const auto &M : Data) { // Record the start of the member's offset LastOffset = MaxOffset; // Account for the size of each part associated with the member. @@ -494,8 +590,12 @@ Error llvm::writeArchive(StringRef ArcName, // If LastOffset isn't going to fit in a 32-bit varible we need to switch // to 64-bit. Note that the file can be larger than 4GB as long as the last // member starts before the 4GB offset. - if (LastOffset >= (1ULL << Sym64Threshold)) - Kind = object::Archive::K_GNU64; + if (LastOffset >= (1ULL << Sym64Threshold)) { + if (Kind == object::Archive::K_DARWIN) + Kind = object::Archive::K_DARWIN64; + else + Kind = object::Archive::K_GNU64; + } } Expected<sys::fs::TempFile> Temp = |