summaryrefslogtreecommitdiff
path: root/lib/Object/ArchiveWriter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Object/ArchiveWriter.cpp')
-rw-r--r--lib/Object/ArchiveWriter.cpp188
1 files changed, 144 insertions, 44 deletions
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index ea17b2220a0b..da93602cbb28 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -27,6 +27,8 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
+#include <map>
+
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#else
@@ -119,6 +121,11 @@ static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) {
OS.indent(Size - SizeSoFar);
}
+static bool isDarwin(object::Archive::Kind Kind) {
+ return Kind == object::Archive::K_DARWIN ||
+ Kind == object::Archive::K_DARWIN64;
+}
+
static bool isBSDLike(object::Archive::Kind Kind) {
switch (Kind) {
case object::Archive::K_GNU:
@@ -126,8 +133,8 @@ static bool isBSDLike(object::Archive::Kind Kind) {
return false;
case object::Archive::K_BSD:
case object::Archive::K_DARWIN:
- return true;
case object::Archive::K_DARWIN64:
+ return true;
case object::Archive::K_COFF:
break;
}
@@ -243,20 +250,33 @@ static void addToStringTable(raw_ostream &Out, StringRef ArcName,
static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
raw_ostream &StringTable,
+ StringMap<uint64_t> &MemberNames,
object::Archive::Kind Kind, bool Thin,
StringRef ArcName, const NewArchiveMember &M,
+ sys::TimePoint<std::chrono::seconds> ModTime,
unsigned Size) {
+
if (isBSDLike(Kind))
- return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID,
+ return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
if (!useStringTable(Thin, M.MemberName))
- return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID,
+ return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
Out << '/';
- uint64_t NamePos = StringTable.tell();
- addToStringTable(StringTable, ArcName, M, Thin);
+ uint64_t NamePos;
+ if (Thin) {
+ NamePos = StringTable.tell();
+ addToStringTable(StringTable, ArcName, M, Thin);
+ } else {
+ auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
+ if (Insertion.second) {
+ Insertion.first->second = StringTable.tell();
+ addToStringTable(StringTable, ArcName, M, Thin);
+ }
+ NamePos = Insertion.first->second;
+ }
printWithSpacePadding(Out, NamePos, 15);
- printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size);
+ printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size);
}
namespace {
@@ -310,7 +330,9 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, ArrayRef<MemberData> Members,
StringRef StringTable) {
- if (StringTable.empty())
+ // We don't write a symbol table on an archive with no members -- except on
+ // Darwin, where the linker will abort unless the archive has a symbol table.
+ if (StringTable.empty() && !isDarwin(Kind))
return;
unsigned NumSyms = 0;
@@ -318,15 +340,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
NumSyms += M.Symbols.size();
unsigned Size = 0;
- Size += is64BitKind(Kind) ? 8 : 4; // Number of entries
+ unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t);
+
+ Size += OffsetSize; // Number of entries
if (isBSDLike(Kind))
- Size += NumSyms * 8; // Table
- else if (is64BitKind(Kind))
- Size += NumSyms * 8; // Table
+ Size += NumSyms * OffsetSize * 2; // Table
else
- Size += NumSyms * 4; // Table
+ Size += NumSyms * OffsetSize; // Table
if (isBSDLike(Kind))
- Size += 4; // byte count
+ Size += OffsetSize; // byte count
Size += StringTable.size();
// ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
@@ -336,25 +358,26 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
unsigned Pad = OffsetToAlignment(Size, Alignment);
Size += Pad;
- if (isBSDLike(Kind))
- printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0,
- 0, Size);
- else if (is64BitKind(Kind))
- printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size);
- else
- printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size);
+ if (isBSDLike(Kind)) {
+ const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
+ printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
+ Size);
+ } else {
+ const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
+ printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
+ }
uint64_t Pos = Out.tell() + Size;
if (isBSDLike(Kind))
- print<uint32_t>(Out, Kind, NumSyms * 8);
+ printNBits(Out, Kind, NumSyms * 2 * OffsetSize);
else
printNBits(Out, Kind, NumSyms);
for (const MemberData &M : Members) {
for (unsigned StringOffset : M.Symbols) {
if (isBSDLike(Kind))
- print<uint32_t>(Out, Kind, StringOffset);
+ printNBits(Out, Kind, StringOffset);
printNBits(Out, Kind, Pos); // member offset
}
Pos += M.Header.size() + M.Data.size() + M.Padding.size();
@@ -362,7 +385,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
if (isBSDLike(Kind))
// byte count of the string table
- print<uint32_t>(Out, Kind, StringTable.size());
+ printNBits(Out, Kind, StringTable.size());
Out << StringTable;
while (Pad--)
@@ -372,20 +395,32 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
static Expected<std::vector<unsigned>>
getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
std::vector<unsigned> Ret;
- LLVMContext Context;
- Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
- object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown,
- &Context);
- if (!ObjOrErr) {
- // FIXME: check only for "not an object file" errors.
- consumeError(ObjOrErr.takeError());
- return Ret;
+ // In the scenario when LLVMContext is populated SymbolicFile will contain a
+ // reference to it, thus SymbolicFile should be destroyed first.
+ LLVMContext Context;
+ std::unique_ptr<object::SymbolicFile> Obj;
+ if (identify_magic(Buf.getBuffer()) == file_magic::bitcode) {
+ auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
+ Buf, file_magic::bitcode, &Context);
+ if (!ObjOrErr) {
+ // FIXME: check only for "not an object file" errors.
+ consumeError(ObjOrErr.takeError());
+ return Ret;
+ }
+ Obj = std::move(*ObjOrErr);
+ } else {
+ auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
+ if (!ObjOrErr) {
+ // FIXME: check only for "not an object file" errors.
+ consumeError(ObjOrErr.takeError());
+ return Ret;
+ }
+ Obj = std::move(*ObjOrErr);
}
HasObject = true;
- object::SymbolicFile &Obj = *ObjOrErr.get();
- for (const object::BasicSymbolRef &S : Obj.symbols()) {
+ for (const object::BasicSymbolRef &S : Obj->symbols()) {
if (!isArchiveSymbol(S))
continue;
Ret.push_back(SymNames.tell());
@@ -399,7 +434,7 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, StringRef ArcName,
- ArrayRef<NewArchiveMember> NewMembers) {
+ bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
// This ignores the symbol table, but we only need the value mod 8 and the
@@ -408,6 +443,62 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
std::vector<MemberData> Ret;
bool HasObject = false;
+
+ // Deduplicate long member names in the string table and reuse earlier name
+ // offsets. This especially saves space for COFF Import libraries where all
+ // members have the same name.
+ StringMap<uint64_t> MemberNames;
+
+ // UniqueTimestamps is a special case to improve debugging on Darwin:
+ //
+ // The Darwin linker does not link debug info into the final
+ // binary. Instead, it emits entries of type N_OSO in in the output
+ // binary's symbol table, containing references to the linked-in
+ // object files. Using that reference, the debugger can read the
+ // debug data directly from the object files. Alternatively, an
+ // invocation of 'dsymutil' will link the debug data from the object
+ // files into a dSYM bundle, which can be loaded by the debugger,
+ // instead of the object files.
+ //
+ // For an object file, the N_OSO entries contain the absolute path
+ // path to the file, and the file's timestamp. For an object
+ // included in an archive, the path is formatted like
+ // "/absolute/path/to/archive.a(member.o)", and the timestamp is the
+ // archive member's timestamp, rather than the archive's timestamp.
+ //
+ // However, this doesn't always uniquely identify an object within
+ // an archive -- an archive file can have multiple entries with the
+ // same filename. (This will happen commonly if the original object
+ // files started in different directories.) The only way they get
+ // distinguished, then, is via the timestamp. But this process is
+ // unable to find the correct object file in the archive when there
+ // are two files of the same name and timestamp.
+ //
+ // Additionally, timestamp==0 is treated specially, and causes the
+ // timestamp to be ignored as a match criteria.
+ //
+ // That will "usually" work out okay when creating an archive not in
+ // deterministic timestamp mode, because the objects will probably
+ // have been created at different timestamps.
+ //
+ // To ameliorate this problem, in deterministic archive mode (which
+ // is the default), on Darwin we will emit a unique non-zero
+ // timestamp for each entry with a duplicated name. This is still
+ // deterministic: the only thing affecting that timestamp is the
+ // order of the files in the resultant archive.
+ //
+ // See also the functions that handle the lookup:
+ // in lldb: ObjectContainerBSDArchive::Archive::FindObject()
+ // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers().
+ bool UniqueTimestamps = Deterministic && isDarwin(Kind);
+ std::map<StringRef, unsigned> FilenameCount;
+ if (UniqueTimestamps) {
+ for (const NewArchiveMember &M : NewMembers)
+ FilenameCount[M.MemberName]++;
+ for (auto &Entry : FilenameCount)
+ Entry.second = Entry.second > 1 ? 1 : 0;
+ }
+
for (const NewArchiveMember &M : NewMembers) {
std::string Header;
raw_string_ostream Out(Header);
@@ -419,14 +510,19 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly. This matches the behaviour with cctools and ensures that ld64
// is happy with archives that we generate.
- unsigned MemberPadding = Kind == object::Archive::K_DARWIN
- ? OffsetToAlignment(Data.size(), 8)
- : 0;
+ unsigned MemberPadding =
+ isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0;
unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2);
StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding);
- printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M,
- Buf.getBufferSize() + MemberPadding);
+ sys::TimePoint<std::chrono::seconds> ModTime;
+ if (UniqueTimestamps)
+ // Increment timestamp for each file of a given name.
+ ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
+ else
+ ModTime = M.ModTime;
+ printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
+ M, ModTime, Buf.getBufferSize() + MemberPadding);
Out.flush();
Expected<std::vector<unsigned>> Symbols =
@@ -457,8 +553,8 @@ Error llvm::writeArchive(StringRef ArcName,
SmallString<0> StringTableBuf;
raw_svector_ostream StringTable(StringTableBuf);
- Expected<std::vector<MemberData>> DataOrErr =
- computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers);
+ Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
+ StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
@@ -470,7 +566,7 @@ Error llvm::writeArchive(StringRef ArcName,
if (WriteSymtab) {
uint64_t MaxOffset = 0;
uint64_t LastOffset = MaxOffset;
- for (const auto& M : Data) {
+ for (const auto &M : Data) {
// Record the start of the member's offset
LastOffset = MaxOffset;
// Account for the size of each part associated with the member.
@@ -494,8 +590,12 @@ Error llvm::writeArchive(StringRef ArcName,
// If LastOffset isn't going to fit in a 32-bit varible we need to switch
// to 64-bit. Note that the file can be larger than 4GB as long as the last
// member starts before the 4GB offset.
- if (LastOffset >= (1ULL << Sym64Threshold))
- Kind = object::Archive::K_GNU64;
+ if (LastOffset >= (1ULL << Sym64Threshold)) {
+ if (Kind == object::Archive::K_DARWIN)
+ Kind = object::Archive::K_DARWIN64;
+ else
+ Kind = object::Archive::K_GNU64;
+ }
}
Expected<sys::fs::TempFile> Temp =