diff options
Diffstat (limited to 'contrib/llvm/lib/Object')
19 files changed, 8737 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp new file mode 100644 index 000000000000..f2021f796d12 --- /dev/null +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -0,0 +1,976 @@ +//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ArchiveObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Archive.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace object; +using namespace llvm::support::endian; + +static const char *const Magic = "!<arch>\n"; +static const char *const ThinMagic = "!<thin>\n"; + +void Archive::anchor() { } + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : Parent(Parent), + ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + if (Size < sizeof(ArMemHdrType)) { + if (Err) { + std::string Msg("remaining size of archive too small for next archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } + if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { + if (Err) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator, + sizeof(ArMemHdr->Terminator))); + OS.flush(); + std::string Msg("terminator characters in archive member \"" + Buf + + "\" not the correct \"`\\n\" values for the archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } +} + +// This gets the raw name from the ArMemHdr->Name field and checks that it is +// valid for the kind of archive. If it is not valid it returns an Error. +Expected<StringRef> ArchiveMemberHeader::getRawName() const { + char EndCond; + auto Kind = Parent->kind(); + if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { + if (ArMemHdr->Name[0] == ' ') { + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("name contains a leading space for archive member " + "header at offset " + Twine(Offset)); + } + EndCond = ' '; + } + else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') + EndCond = ' '; + else + EndCond = '/'; + llvm::StringRef::size_type end = + llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); + if (end == llvm::StringRef::npos) + end = sizeof(ArMemHdr->Name); + assert(end <= sizeof(ArMemHdr->Name) && end > 0); + // Don't include the EndCond if there is one. + return llvm::StringRef(ArMemHdr->Name, end); +} + +// This gets the name looking up long names. Size is the size of the archive +// member including the header, so the size of any name following the header +// is checked to make sure it does not overflow. +Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { + + // This can be called from the ArchiveMemberHeader constructor when the + // archive header is truncated to produce an error message with the name. + // Make sure the name field is not truncated. + if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("archive header truncated before the name field " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + + // The raw name itself can be invalid. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + + // Check if it's a special name. + if (Name[0] == '/') { + if (Name.size() == 1) // Linker member. + return Name; + if (Name.size() == 2 && Name[1] == '/') // String table. + return Name; + // It's a long name. + // Get the string table offset. + std::size_t StringOffset; + if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(1).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name offset characters after the '/' are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(ArchiveOffset)); + } + + // Verify it. + if (StringOffset >= Parent->getStringTable().size()) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name offset " + Twine(StringOffset) + " past " + "the end of the string table for archive member " + "header at offset " + Twine(ArchiveOffset)); + } + const char *addr = Parent->getStringTable().begin() + StringOffset; + + // GNU long file names end with a "/\n". + if (Parent->kind() == Archive::K_GNU || + Parent->kind() == Archive::K_MIPS64) { + StringRef::size_type End = StringRef(addr).find('\n'); + return StringRef(addr, End - 1); + } + return addr; + } + + if (Name.startswith("#1/")) { + uint64_t NameLength; + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(ArchiveOffset)); + } + if (getSizeOf() + NameLength > Size) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name length: " + Twine(NameLength) + + " extends past the end of the member or archive " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), + NameLength).rtrim('\0'); + } + + // It is not a long name so trim the blanks at the end of the name. + if (Name[Name.size() - 1] != '/') + return Name.rtrim(' '); + + // It's a simple name. + return Name.drop_back(1); +} + +Expected<uint32_t> ArchiveMemberHeader::getSize() const { + uint32_t Ret; + if (llvm::StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in size field in archive header are not " + "all decimal numbers: '" + Buf + "' for archive " + "member header at offset " + Twine(Offset)); + } + return Ret; +} + +Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const { + unsigned Ret; + if (StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in AccessMode field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + return static_cast<sys::fs::perms>(Ret); +} + +Expected<sys::TimePoint<std::chrono::seconds>> +ArchiveMemberHeader::getLastModified() const { + unsigned Seconds; + if (StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(' ') + .getAsInteger(10, Seconds)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in LastModified field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + + return sys::toTimePoint(Seconds); +} + +Expected<unsigned> ArchiveMemberHeader::getUID() const { + unsigned Ret; + StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); + if (User.empty()) + return 0; + if (User.getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(User); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in UID field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + return Ret; +} + +Expected<unsigned> ArchiveMemberHeader::getGID() const { + unsigned Ret; + StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); + if (Group.empty()) + return 0; + if (Group.getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Group); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in GID field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + return Ret; +} + +Archive::Child::Child(const Archive *Parent, StringRef Data, + uint16_t StartOfFile) + : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr), + Data(Data), StartOfFile(StartOfFile) { +} + +Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) + : Parent(Parent), + Header(Parent, Start, + Parent + ? Parent->getData().size() - (Start - Parent->getData().data()) + : 0, Err) { + if (!Start) + return; + + // If we are pointed to real data, Start is not a nullptr, then there must be + // a non-null Err pointer available to report malformed data on. Only in + // the case sentinel value is being constructed is Err is permitted to be a + // nullptr. + assert(Err && "Err can't be nullptr if Start is not a nullptr"); + + ErrorAsOutParameter ErrAsOutParam(Err); + + // If there was an error in the construction of the Header + // then just return with the error now set. + if (*Err) + return; + + uint64_t Size = Header.getSizeOf(); + Data = StringRef(Start, Size); + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) { + *Err = isThinOrErr.takeError(); + return; + } + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint64_t> MemberSize = getRawSize(); + if (!MemberSize) { + *Err = MemberSize.takeError(); + return; + } + Size += MemberSize.get(); + Data = StringRef(Start, Size); + } + + // Setup StartOfFile and PaddingBytes. + StartOfFile = Header.getSizeOf(); + // Don't include attached name. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr){ + *Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); + if (Name.startswith("#1/")) { + uint64_t NameSize; + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t Offset = Start - Parent->getData().data(); + *Err = malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(Offset)); + return; + } + StartOfFile += NameSize; + } +} + +Expected<uint64_t> Archive::Child::getSize() const { + if (Parent->IsThin) { + Expected<uint32_t> Size = Header.getSize(); + if (!Size) + return Size.takeError(); + return Size.get(); + } + return Data.size() - StartOfFile; +} + +Expected<uint64_t> Archive::Child::getRawSize() const { + return Header.getSize(); +} + +Expected<bool> Archive::Child::isThinMember() const { + Expected<StringRef> NameOrErr = Header.getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Parent->IsThin && Name != "/" && Name != "//"; +} + +Expected<std::string> Archive::Child::getFullName() const { + Expected<bool> isThin = isThinMember(); + if (!isThin) + return isThin.takeError(); + assert(isThin.get()); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; + if (sys::path::is_absolute(Name)) + return Name; + + SmallString<128> FullName = sys::path::parent_path( + Parent->getMemoryBufferRef().getBufferIdentifier()); + sys::path::append(FullName, Name); + return StringRef(FullName); +} + +Expected<StringRef> Archive::Child::getBuffer() const { + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) + return isThinOrErr.takeError(); + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint32_t> Size = getSize(); + if (!Size) + return Size.takeError(); + return StringRef(Data.data() + StartOfFile, Size.get()); + } + Expected<std::string> FullNameOrErr = getFullName(); + if (!FullNameOrErr) + return FullNameOrErr.takeError(); + const std::string &FullName = *FullNameOrErr; + ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); + if (std::error_code EC = Buf.getError()) + return errorCodeToError(EC); + Parent->ThinBuffers.push_back(std::move(*Buf)); + return Parent->ThinBuffers.back()->getBuffer(); +} + +Expected<Archive::Child> Archive::Child::getNext() const { + size_t SpaceToSkip = Data.size(); + // If it's odd, add 1 to make it even. + if (SpaceToSkip & 1) + ++SpaceToSkip; + + const char *NextLoc = Data.data() + SpaceToSkip; + + // Check to see if this is at the end of the archive. + if (NextLoc == Parent->Data.getBufferEnd()) + return Child(nullptr, nullptr, nullptr); + + // Check to see if this is past the end of the archive. + if (NextLoc > Parent->Data.getBufferEnd()) { + std::string Msg("offset to next archive member past the end of the archive " + "after member "); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = Data.data() - Parent->getData().data(); + return malformedError(Msg + "at offset " + Twine(Offset)); + } else + return malformedError(Msg + NameOrErr.get()); + } + + Error Err = Error::success(); + Child Ret(Parent, NextLoc, &Err); + if (Err) + return std::move(Err); + return Ret; +} + +uint64_t Archive::Child::getChildOffset() const { + const char *a = Parent->Data.getBuffer().data(); + const char *c = Data.data(); + uint64_t offset = c - a; + return offset; +} + +Expected<StringRef> Archive::Child::getName() const { + Expected<uint64_t> RawSizeOrErr = getRawSize(); + if (!RawSizeOrErr) + return RawSizeOrErr.takeError(); + uint64_t RawSize = RawSizeOrErr.get(); + Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Name; +} + +Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + Expected<StringRef> Buf = getBuffer(); + if (!Buf) + return Buf.takeError(); + return MemoryBufferRef(*Buf, Name); +} + +Expected<std::unique_ptr<Binary>> +Archive::Child::getAsBinary(LLVMContext *Context) const { + Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); + if (!BuffOrErr) + return BuffOrErr.takeError(); + + auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); + if (BinaryOrErr) + return std::move(*BinaryOrErr); + return BinaryOrErr.takeError(); +} + +Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<Archive> Ret(new Archive(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} + +void Archive::setFirstRegular(const Child &C) { + FirstRegularData = C.Data; + FirstRegularStartOfFile = C.StartOfFile; +} + +Archive::Archive(MemoryBufferRef Source, Error &Err) + : Binary(Binary::ID_Archive, Source) { + ErrorAsOutParameter ErrAsOutParam(&Err); + StringRef Buffer = Data.getBuffer(); + // Check for sufficient magic. + if (Buffer.startswith(ThinMagic)) { + IsThin = true; + } else if (Buffer.startswith(Magic)) { + IsThin = false; + } else { + Err = make_error<GenericBinaryError>("File too small to be an archive", + object_error::invalid_file_type); + return; + } + + // Make sure Format is initialized before any call to + // ArchiveMemberHeader::getName() is made. This could be a valid empty + // archive which is the same in all formats. So claiming it to be gnu to is + // fine if not totally correct before we look for a string table or table of + // contents. + Format = K_GNU; + + // Get the special members. + child_iterator I = child_begin(Err, false); + if (Err) + return; + child_iterator E = child_end(); + + // See if this is a valid empty archive and if so return. + if (I == E) { + Err = Error::success(); + return; + } + const Child *C = &*I; + + auto Increment = [&]() { + ++I; + if (Err) + return true; + C = &*I; + return false; + }; + + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); + + // Below is the pattern that is used to figure out the archive format + // GNU archive format + // First member : / (may exist, if it exists, points to the symbol table ) + // Second member : // (may exist, if it exists, points to the string table) + // Note : The string table is used if the filename exceeds 15 characters + // BSD archive format + // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) + // There is no string table, if the filename exceeds 15 characters or has a + // embedded space, the filename has #1/<size>, The size represents the size + // of the filename that needs to be read after the archive header + // COFF archive format + // First member : / + // Second member : / (provides a directory of symbols) + // Third member : // (may exist, if it exists, contains the string table) + // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present + // even if the string table is empty. However, lib.exe does not in fact + // seem to create the third member if there's no member whose filename + // exceeds 15 characters. So the third member is optional. + + if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { + if (Name == "__.SYMDEF") + Format = K_BSD; + else // Name == "__.SYMDEF_64" + Format = K_DARWIN64; + // We know that the symbol table is not an external file, but we still must + // check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + setFirstRegular(*C); + + Err = Error::success(); + return; + } + + if (Name.startswith("#1/")) { + Format = K_BSD; + // We know this is BSD, so getName will work since there is no string table. + Expected<StringRef> NameOrErr = C->getName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + } + else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { + Format = K_DARWIN64; + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + } + setFirstRegular(*C); + return; + } + + // MIPS 64-bit ELF archives use a special format of a symbol table. + // This format is marked by `ar_name` field equals to "/SYM64/". + // For detailed description see page 96 in the following document: + // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf + + bool has64SymTable = false; + if (Name == "/" || Name == "/SYM64/") { + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Name == "/SYM64/") + has64SymTable = true; + + if (Increment()) + return; + if (I == E) { + Err = Error::success(); + return; + } + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + } + + if (Name == "//") { + Format = has64SymTable ? K_MIPS64 : K_GNU; + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); + if (Increment()) + return; + setFirstRegular(*C); + Err = Error::success(); + return; + } + + if (Name[0] != '/') { + Format = has64SymTable ? K_MIPS64 : K_GNU; + setFirstRegular(*C); + Err = Error::success(); + return; + } + + if (Name != "/") { + Err = errorCodeToError(object_error::parse_failed); + return; + } + + Format = K_COFF; + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + + if (Increment()) + return; + + if (I == E) { + setFirstRegular(*C); + Err = Error::success(); + return; + } + + NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + + if (Name == "//") { + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); + if (Increment()) + return; + } + + setFirstRegular(*C); + Err = Error::success(); +} + +Archive::child_iterator Archive::child_begin(Error &Err, + bool SkipInternal) const { + if (isEmpty()) + return child_end(); + + if (SkipInternal) + return child_iterator(Child(this, FirstRegularData, + FirstRegularStartOfFile), + &Err); + + const char *Loc = Data.getBufferStart() + strlen(Magic); + Child C(this, Loc, &Err); + if (Err) + return child_end(); + return child_iterator(C, &Err); +} + +Archive::child_iterator Archive::child_end() const { + return child_iterator(Child(nullptr, nullptr, nullptr), nullptr); +} + +StringRef Archive::Symbol::getName() const { + return Parent->getSymbolTable().begin() + StringIndex; +} + +Expected<Archive::Child> Archive::Symbol::getMember() const { + const char *Buf = Parent->getSymbolTable().begin(); + const char *Offsets = Buf; + if (Parent->kind() == K_MIPS64 || Parent->kind() == K_DARWIN64) + Offsets += sizeof(uint64_t); + else + Offsets += sizeof(uint32_t); + uint32_t Offset = 0; + if (Parent->kind() == K_GNU) { + Offset = read32be(Offsets + SymbolIndex * 4); + } else if (Parent->kind() == K_MIPS64) { + Offset = read64be(Offsets + SymbolIndex * 8); + } else if (Parent->kind() == K_BSD) { + // The SymbolIndex is an index into the ranlib structs that start at + // Offsets (the first uint32_t is the number of bytes of the ranlib + // structs). The ranlib structs are a pair of uint32_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = read32le(Offsets + SymbolIndex * 8 + 4); + } else if (Parent->kind() == K_DARWIN64) { + // The SymbolIndex is an index into the ranlib_64 structs that start at + // Offsets (the first uint64_t is the number of bytes of the ranlib_64 + // structs). The ranlib_64 structs are a pair of uint64_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = read64le(Offsets + SymbolIndex * 16 + 8); + } else { + // Skip offsets. + uint32_t MemberCount = read32le(Buf); + Buf += MemberCount * 4 + 4; + + uint32_t SymbolCount = read32le(Buf); + if (SymbolIndex >= SymbolCount) + return errorCodeToError(object_error::parse_failed); + + // Skip SymbolCount to get to the indices table. + const char *Indices = Buf + 4; + + // Get the index of the offset in the file member offset table for this + // symbol. + uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); + // Subtract 1 since OffsetIndex is 1 based. + --OffsetIndex; + + if (OffsetIndex >= MemberCount) + return errorCodeToError(object_error::parse_failed); + + Offset = read32le(Offsets + OffsetIndex * 4); + } + + const char *Loc = Parent->getData().begin() + Offset; + Error Err = Error::success(); + Child C(Parent, Loc, &Err); + if (Err) + return std::move(Err); + return C; +} + +Archive::Symbol Archive::Symbol::getNext() const { + Symbol t(*this); + if (Parent->kind() == K_BSD) { + // t.StringIndex is an offset from the start of the __.SYMDEF or + // "__.SYMDEF SORTED" member into the string table for the ranlib + // struct indexed by t.SymbolIndex . To change t.StringIndex to the + // offset in the string table for t.SymbolIndex+1 we subtract the + // its offset from the start of the string table for t.SymbolIndex + // and add the offset of the string table for t.SymbolIndex+1. + + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + const char *Buf = Parent->getSymbolTable().begin(); + uint32_t RanlibCount = 0; + RanlibCount = read32le(Buf) / 8; + // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) + // don't change the t.StringIndex as we don't want to reference a ranlib + // past RanlibCount. + if (t.SymbolIndex + 1 < RanlibCount) { + const char *Ranlibs = Buf + 4; + uint32_t CurRanStrx = 0; + uint32_t NextRanStrx = 0; + CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); + NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); + t.StringIndex -= CurRanStrx; + t.StringIndex += NextRanStrx; + } + } else { + // Go to one past next null. + t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; + } + ++t.SymbolIndex; + return t; +} + +Archive::symbol_iterator Archive::symbol_begin() const { + if (!hasSymbolTable()) + return symbol_iterator(Symbol(this, 0, 0)); + + const char *buf = getSymbolTable().begin(); + if (kind() == K_GNU) { + uint32_t symbol_count = 0; + symbol_count = read32be(buf); + buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); + } else if (kind() == K_MIPS64) { + uint64_t symbol_count = read64be(buf); + buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); + } else if (kind() == K_BSD) { + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + uint32_t ranlib_count = 0; + ranlib_count = read32le(buf) / 8; + const char *ranlibs = buf + 4; + uint32_t ran_strx = 0; + ran_strx = read32le(ranlibs); + buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint32_t); + buf += ran_strx; + } else if (kind() == K_DARWIN64) { + // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t + // which is the number of bytes of ranlib_64 structs that follow. The + // ranlib_64 structs are a pair of uint64_t's the first being a string + // table offset and the second being the offset into the archive of the + // member that define the symbol. After that the next uint64_t is the byte + // count of the string table followed by the string table. + uint64_t ranlib_count = 0; + ranlib_count = read64le(buf) / 16; + const char *ranlibs = buf + 8; + uint64_t ran_strx = 0; + ran_strx = read64le(ranlibs); + buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint64_t); + buf += ran_strx; + } else { + uint32_t member_count = 0; + uint32_t symbol_count = 0; + member_count = read32le(buf); + buf += 4 + (member_count * 4); // Skip offsets. + symbol_count = read32le(buf); + buf += 4 + (symbol_count * 2); // Skip indices. + } + uint32_t string_start_offset = buf - getSymbolTable().begin(); + return symbol_iterator(Symbol(this, 0, string_start_offset)); +} + +Archive::symbol_iterator Archive::symbol_end() const { + return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); +} + +uint32_t Archive::getNumberOfSymbols() const { + if (!hasSymbolTable()) + return 0; + const char *buf = getSymbolTable().begin(); + if (kind() == K_GNU) + return read32be(buf); + if (kind() == K_MIPS64) + return read64be(buf); + if (kind() == K_BSD) + return read32le(buf) / 8; + if (kind() == K_DARWIN64) + return read64le(buf) / 16; + uint32_t member_count = 0; + member_count = read32le(buf); + buf += 4 + (member_count * 4); // Skip offsets. + return read32le(buf); +} + +Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { + Archive::symbol_iterator bs = symbol_begin(); + Archive::symbol_iterator es = symbol_end(); + + for (; bs != es; ++bs) { + StringRef SymName = bs->getName(); + if (SymName == name) { + if (auto MemberOrErr = bs->getMember()) + return Child(*MemberOrErr); + else + return MemberOrErr.takeError(); + } + } + return Optional<Child>(); +} + +// Returns true if archive file contains no member file. +bool Archive::isEmpty() const { return Data.getBufferSize() == 8; } + +bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp new file mode 100644 index 000000000000..f8e3c5a0a03f --- /dev/null +++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp @@ -0,0 +1,434 @@ +//===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeArchive function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; + +NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)) {} + +Expected<NewArchiveMember> +NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, + bool Deterministic) { + Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); + if (!BufOrErr) + return BufOrErr.takeError(); + + NewArchiveMember M; + assert(M.IsNew == false); + M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + if (!Deterministic) { + auto ModTimeOrErr = OldMember.getLastModified(); + if (!ModTimeOrErr) + return ModTimeOrErr.takeError(); + M.ModTime = ModTimeOrErr.get(); + Expected<unsigned> UIDOrErr = OldMember.getUID(); + if (!UIDOrErr) + return UIDOrErr.takeError(); + M.UID = UIDOrErr.get(); + Expected<unsigned> GIDOrErr = OldMember.getGID(); + if (!GIDOrErr) + return GIDOrErr.takeError(); + M.GID = GIDOrErr.get(); + Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); + if (!AccessModeOrErr) + return AccessModeOrErr.takeError(); + M.Perms = AccessModeOrErr.get(); + } + return std::move(M); +} + +Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, + bool Deterministic) { + sys::fs::file_status Status; + int FD; + if (auto EC = sys::fs::openFileForRead(FileName, FD)) + return errorCodeToError(EC); + assert(FD != -1); + + if (auto EC = sys::fs::status(FD, Status)) + return errorCodeToError(EC); + + // Opening a directory doesn't make sense. Let it fail. + // Linux cannot open directories with open(2), although + // cygwin and *bsd can. + if (Status.type() == sys::fs::file_type::directory_file) + return errorCodeToError(make_error_code(errc::is_a_directory)); + + ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = + MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); + if (!MemberBufferOrErr) + return errorCodeToError(MemberBufferOrErr.getError()); + + if (close(FD) != 0) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + NewArchiveMember M; + M.IsNew = true; + M.Buf = std::move(*MemberBufferOrErr); + if (!Deterministic) { + M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( + Status.getLastModificationTime()); + M.UID = Status.getUser(); + M.GID = Status.getGroup(); + M.Perms = Status.permissions(); + } + return std::move(M); +} + +template <typename T> +static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, + bool MayTruncate = false) { + uint64_t OldPos = OS.tell(); + OS << Data; + unsigned SizeSoFar = OS.tell() - OldPos; + if (Size > SizeSoFar) { + OS.indent(Size - SizeSoFar); + } else if (Size < SizeSoFar) { + assert(MayTruncate && "Data doesn't fit in Size"); + // Some of the data this is used for (like UID) can be larger than the + // space available in the archive format. Truncate in that case. + OS.seek(OldPos + Size); + } +} + +static void print32(raw_ostream &Out, object::Archive::Kind Kind, + uint32_t Val) { + if (Kind == object::Archive::K_GNU) + support::endian::Writer<support::big>(Out).write(Val); + else + support::endian::Writer<support::little>(Out).write(Val); +} + +static void printRestOfMemberHeader( + raw_fd_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); + printWithSpacePadding(Out, UID, 6, true); + printWithSpacePadding(Out, GID, 6, true); + printWithSpacePadding(Out, format("%o", Perms), 8); + printWithSpacePadding(Out, Size, 10); + Out << "`\n"; +} + +static void +printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + unsigned Size) { + printWithSpacePadding(Out, Twine(Name) + "/", 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +static void +printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + unsigned Size) { + uint64_t PosAfterHeader = Out.tell() + 60 + Name.size(); + // Pad so that even 64 bit object files are aligned. + unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); + unsigned NameWithPadding = Name.size() + Pad; + printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, + NameWithPadding + Size); + Out << Name; + assert(PosAfterHeader == Out.tell()); + while (Pad--) + Out.write(uint8_t(0)); +} + +static bool useStringTable(bool Thin, StringRef Name) { + return Thin || Name.size() >= 16; +} + +static void +printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin, + StringRef Name, + std::vector<unsigned>::iterator &StringMapIndexIter, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + if (Kind == object::Archive::K_BSD) + return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); + if (!useStringTable(Thin, Name)) + return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); + Out << '/'; + printWithSpacePadding(Out, *StringMapIndexIter++, 15); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +// Compute the relative path from From to To. +static std::string computeRelativePath(StringRef From, StringRef To) { + if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) + return To; + + StringRef DirFrom = sys::path::parent_path(From); + auto FromI = sys::path::begin(DirFrom); + auto ToI = sys::path::begin(To); + while (*FromI == *ToI) { + ++FromI; + ++ToI; + } + + SmallString<128> Relative; + for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) + sys::path::append(Relative, ".."); + + for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) + sys::path::append(Relative, *ToI); + +#ifdef LLVM_ON_WIN32 + // Replace backslashes with slashes so that the path is portable between *nix + // and Windows. + std::replace(Relative.begin(), Relative.end(), '\\', '/'); +#endif + + return Relative.str(); +} + +static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, + ArrayRef<NewArchiveMember> Members, + std::vector<unsigned> &StringMapIndexes, + bool Thin) { + unsigned StartOffset = 0; + for (const NewArchiveMember &M : Members) { + StringRef Path = M.Buf->getBufferIdentifier(); + StringRef Name = sys::path::filename(Path); + if (!useStringTable(Thin, Name)) + continue; + if (StartOffset == 0) { + printWithSpacePadding(Out, "//", 58); + Out << "`\n"; + StartOffset = Out.tell(); + } + StringMapIndexes.push_back(Out.tell() - StartOffset); + + if (Thin) { + if (M.IsNew) + Out << computeRelativePath(ArcName, Path); + else + Out << M.Buf->getBufferIdentifier(); + } else + Out << Name; + + Out << "/\n"; + } + if (StartOffset == 0) + return; + if (Out.tell() % 2) + Out << '\n'; + int Pos = Out.tell(); + Out.seek(StartOffset - 12); + printWithSpacePadding(Out, Pos - StartOffset, 10); + Out.seek(Pos); +} + +static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { + using namespace std::chrono; + + if (!Deterministic) + return time_point_cast<seconds>(system_clock::now()); + return sys::TimePoint<seconds>(); +} + +// Returns the offset of the first reference to a member offset. +static ErrorOr<unsigned> +writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, + ArrayRef<NewArchiveMember> Members, + std::vector<unsigned> &MemberOffsetRefs, bool Deterministic) { + unsigned HeaderStartOffset = 0; + unsigned BodyStartOffset = 0; + SmallString<128> NameBuf; + raw_svector_ostream NameOS(NameBuf); + LLVMContext Context; + for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) { + MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); + Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr = + object::SymbolicFile::createSymbolicFile( + MemberBuffer, sys::fs::file_magic::unknown, &Context); + if (!ObjOrErr) { + // FIXME: check only for "not an object file" errors. + consumeError(ObjOrErr.takeError()); + continue; + } + object::SymbolicFile &Obj = *ObjOrErr.get(); + + if (!HeaderStartOffset) { + HeaderStartOffset = Out.tell(); + if (Kind == object::Archive::K_GNU) + printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); + else + printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0); + BodyStartOffset = Out.tell(); + print32(Out, Kind, 0); // number of entries or bytes + } + + for (const object::BasicSymbolRef &S : Obj.symbols()) { + uint32_t Symflags = S.getFlags(); + if (Symflags & object::SymbolRef::SF_FormatSpecific) + continue; + if (!(Symflags & object::SymbolRef::SF_Global)) + continue; + if (Symflags & object::SymbolRef::SF_Undefined) + continue; + + unsigned NameOffset = NameOS.tell(); + if (auto EC = S.printName(NameOS)) + return EC; + NameOS << '\0'; + MemberOffsetRefs.push_back(MemberNum); + if (Kind == object::Archive::K_BSD) + print32(Out, Kind, NameOffset); + print32(Out, Kind, 0); // member offset + } + } + + if (HeaderStartOffset == 0) + return 0; + + StringRef StringTable = NameOS.str(); + if (Kind == object::Archive::K_BSD) + print32(Out, Kind, StringTable.size()); // byte count of the string table + Out << StringTable; + + // ld64 requires the next member header to start at an offset that is + // 4 bytes aligned. + unsigned Pad = OffsetToAlignment(Out.tell(), 4); + while (Pad--) + Out.write(uint8_t(0)); + + // Patch up the size of the symbol table now that we know how big it is. + unsigned Pos = Out.tell(); + const unsigned MemberHeaderSize = 60; + Out.seek(HeaderStartOffset + 48); // offset of the size field. + printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10); + + // Patch up the number of symbols. + Out.seek(BodyStartOffset); + unsigned NumSyms = MemberOffsetRefs.size(); + if (Kind == object::Archive::K_GNU) + print32(Out, Kind, NumSyms); + else + print32(Out, Kind, NumSyms * 8); + + Out.seek(Pos); + return BodyStartOffset + 4; +} + +std::pair<StringRef, std::error_code> +llvm::writeArchive(StringRef ArcName, + std::vector<NewArchiveMember> &NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin, + std::unique_ptr<MemoryBuffer> OldArchiveBuf) { + assert((!Thin || Kind == object::Archive::K_GNU) && + "Only the gnu format has a thin mode"); + SmallString<128> TmpArchive; + int TmpArchiveFD; + if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", + TmpArchiveFD, TmpArchive)) + return std::make_pair(ArcName, EC); + + tool_output_file Output(TmpArchive, TmpArchiveFD); + raw_fd_ostream &Out = Output.os(); + if (Thin) + Out << "!<thin>\n"; + else + Out << "!<arch>\n"; + + std::vector<unsigned> MemberOffsetRefs; + + std::vector<std::unique_ptr<MemoryBuffer>> Buffers; + std::vector<MemoryBufferRef> Members; + std::vector<sys::fs::file_status> NewMemberStatus; + + unsigned MemberReferenceOffset = 0; + if (WriteSymtab) { + ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable( + Out, Kind, NewMembers, MemberOffsetRefs, Deterministic); + if (auto EC = MemberReferenceOffsetOrErr.getError()) + return std::make_pair(ArcName, EC); + MemberReferenceOffset = MemberReferenceOffsetOrErr.get(); + } + + std::vector<unsigned> StringMapIndexes; + if (Kind != object::Archive::K_BSD) + writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin); + + std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin(); + std::vector<unsigned> MemberOffset; + for (const NewArchiveMember &M : NewMembers) { + MemoryBufferRef File = M.Buf->getMemBufferRef(); + + unsigned Pos = Out.tell(); + MemberOffset.push_back(Pos); + + printMemberHeader(Out, Kind, Thin, + sys::path::filename(M.Buf->getBufferIdentifier()), + StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms, + M.Buf->getBufferSize()); + + if (!Thin) + Out << File.getBuffer(); + + if (Out.tell() % 2) + Out << '\n'; + } + + if (MemberReferenceOffset) { + Out.seek(MemberReferenceOffset); + for (unsigned MemberNum : MemberOffsetRefs) { + if (Kind == object::Archive::K_BSD) + Out.seek(Out.tell() + 4); // skip over the string offset + print32(Out, Kind, MemberOffset[MemberNum]); + } + } + + Output.keep(); + Out.close(); + + // At this point, we no longer need whatever backing memory + // was used to generate the NewMembers. On Windows, this buffer + // could be a mapped view of the file we want to replace (if + // we're updating an existing archive, say). In that case, the + // rename would still succeed, but it would leave behind a + // temporary file (actually the original file renamed) because + // a file cannot be deleted while there's a handle open on it, + // only renamed. So by freeing this buffer, this ensures that + // the last open handle on the destination file, if any, is + // closed before we attempt to rename. + OldArchiveBuf.reset(); + + sys::fs::rename(TmpArchive, ArcName); + return std::make_pair("", std::error_code()); +} diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp new file mode 100644 index 000000000000..8467d349cd95 --- /dev/null +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -0,0 +1,93 @@ +//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Binary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +// Include headers for createBinary. +#include "llvm/Object/Archive.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" + +using namespace llvm; +using namespace object; + +Binary::~Binary() {} + +Binary::Binary(unsigned int Type, MemoryBufferRef Source) + : TypeID(Type), Data(Source) {} + +StringRef Binary::getData() const { return Data.getBuffer(); } + +StringRef Binary::getFileName() const { return Data.getBufferIdentifier(); } + +MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } + +Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer, + LLVMContext *Context) { + sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer()); + + switch (Type) { + case sys::fs::file_magic::archive: + return Archive::create(Buffer); + case sys::fs::file_magic::elf: + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::elf_executable: + case sys::fs::file_magic::elf_shared_object: + case sys::fs::file_magic::elf_core: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::macho_executable: + case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case sys::fs::file_magic::macho_core: + case sys::fs::file_magic::macho_preload_executable: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case sys::fs::file_magic::macho_dynamic_linker: + case sys::fs::file_magic::macho_bundle: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case sys::fs::file_magic::macho_dsym_companion: + case sys::fs::file_magic::macho_kext_bundle: + case sys::fs::file_magic::coff_object: + case sys::fs::file_magic::coff_import_library: + case sys::fs::file_magic::pecoff_executable: + case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context); + case sys::fs::file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case sys::fs::file_magic::unknown: + case sys::fs::file_magic::coff_cl_gl_object: + case sys::fs::file_magic::windows_resource: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); + } + llvm_unreachable("Unexpected Binary File Type"); +} + +Expected<OwningBinary<Binary>> object::createBinary(StringRef Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = FileOrErr.getError()) + return errorCodeToError(EC); + std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get(); + + Expected<std::unique_ptr<Binary>> BinOrErr = + createBinary(Buffer->getMemBufferRef()); + if (!BinOrErr) + return BinOrErr.takeError(); + std::unique_ptr<Binary> &Bin = BinOrErr.get(); + + return OwningBinary<Binary>(std::move(Bin), std::move(Buffer)); +} diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp new file mode 100644 index 000000000000..a2d8f12449e6 --- /dev/null +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -0,0 +1,1581 @@ +//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the COFFObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFF.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <cctype> +#include <limits> + +using namespace llvm; +using namespace object; + +using support::ulittle16_t; +using support::ulittle32_t; +using support::ulittle64_t; +using support::little16_t; + +// Returns false if size is greater than the buffer size. And sets ec. +static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) { + if (M.getBufferSize() < Size) { + EC = object_error::unexpected_eof; + return false; + } + return true; +} + +static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr, + const uint64_t Size) { + if (Addr + Size < Addr || Addr + Size < Size || + Addr + Size > uintptr_t(M.getBufferEnd()) || + Addr < uintptr_t(M.getBufferStart())) { + return object_error::unexpected_eof; + } + return std::error_code(); +} + +// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. +// Returns unexpected_eof if error. +template <typename T> +static std::error_code getObject(const T *&Obj, MemoryBufferRef M, + const void *Ptr, + const uint64_t Size = sizeof(T)) { + uintptr_t Addr = uintptr_t(Ptr); + if (std::error_code EC = checkOffset(M, Addr, Size)) + return EC; + Obj = reinterpret_cast<const T *>(Addr); + return std::error_code(); +} + +// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without +// prefixed slashes. +static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) { + assert(Str.size() <= 6 && "String too long, possible overflow."); + if (Str.size() > 6) + return true; + + uint64_t Value = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25 + CharVal = Str[0] - 'A'; + else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51 + CharVal = Str[0] - 'a' + 26; + else if (Str[0] >= '0' && Str[0] <= '9') // 52..61 + CharVal = Str[0] - '0' + 52; + else if (Str[0] == '+') // 62 + CharVal = 62; + else if (Str[0] == '/') // 63 + CharVal = 63; + else + return true; + + Value = (Value * 64) + CharVal; + Str = Str.substr(1); + } + + if (Value > std::numeric_limits<uint32_t>::max()) + return true; + + Result = static_cast<uint32_t>(Value); + return false; +} + +template <typename coff_symbol_type> +const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { + const coff_symbol_type *Addr = + reinterpret_cast<const coff_symbol_type *>(Ref.p); + + assert(!checkOffset(Data, uintptr_t(Addr), sizeof(*Addr))); +#ifndef NDEBUG + // Verify that the symbol points to a valid entry in the symbol table. + uintptr_t Offset = uintptr_t(Addr) - uintptr_t(base()); + + assert((Offset - getPointerToSymbolTable()) % sizeof(coff_symbol_type) == 0 && + "Symbol did not point to the beginning of a symbol"); +#endif + + return Addr; +} + +const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { + const coff_section *Addr = reinterpret_cast<const coff_section*>(Ref.p); + +# ifndef NDEBUG + // Verify that the section points to a valid entry in the section table. + if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) + report_fatal_error("Section was outside of section table."); + + uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable); + assert(Offset % sizeof(coff_section) == 0 && + "Section did not point to the beginning of a section"); +# endif + + return Addr; +} + +void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const { + auto End = reinterpret_cast<uintptr_t>(StringTable); + if (SymbolTable16) { + const coff_symbol16 *Symb = toSymb<coff_symbol16>(Ref); + Symb += 1 + Symb->NumberOfAuxSymbols; + Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End); + } else if (SymbolTable32) { + const coff_symbol32 *Symb = toSymb<coff_symbol32>(Ref); + Symb += 1 + Symb->NumberOfAuxSymbols; + Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End); + } else { + llvm_unreachable("no symbol table pointer!"); + } +} + +Expected<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + StringRef Result; + std::error_code EC = getSymbolName(Symb, Result); + if (EC) + return errorCodeToError(EC); + return Result; +} + +uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const { + return getCOFFSymbol(Ref).getValue(); +} + +uint32_t COFFObjectFile::getSymbolAlignment(DataRefImpl Ref) const { + // MSVC/link.exe seems to align symbols to the next-power-of-2 + // up to 32 bytes. + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return std::min(uint64_t(32), PowerOf2Ceil(Symb.getValue())); +} + +Expected<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { + uint64_t Result = getSymbolValue(Ref); + COFFSymbolRef Symb = getCOFFSymbol(Ref); + int32_t SectionNumber = Symb.getSectionNumber(); + + if (Symb.isAnyUndefined() || Symb.isCommon() || + COFF::isReservedSectionNumber(SectionNumber)) + return Result; + + const coff_section *Section = nullptr; + if (std::error_code EC = getSection(SectionNumber, Section)) + return errorCodeToError(EC); + Result += Section->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + + return Result; +} + +Expected<SymbolRef::Type> COFFObjectFile::getSymbolType(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + int32_t SectionNumber = Symb.getSectionNumber(); + + if (Symb.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) + return SymbolRef::ST_Function; + if (Symb.isAnyUndefined()) + return SymbolRef::ST_Unknown; + if (Symb.isCommon()) + return SymbolRef::ST_Data; + if (Symb.isFileRecord()) + return SymbolRef::ST_File; + + // TODO: perhaps we need a new symbol type ST_Section. + if (SectionNumber == COFF::IMAGE_SYM_DEBUG || Symb.isSectionDefinition()) + return SymbolRef::ST_Debug; + + if (!COFF::isReservedSectionNumber(SectionNumber)) + return SymbolRef::ST_Data; + + return SymbolRef::ST_Other; +} + +uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + uint32_t Result = SymbolRef::SF_None; + + if (Symb.isExternal() || Symb.isWeakExternal()) + Result |= SymbolRef::SF_Global; + + if (Symb.isWeakExternal()) + Result |= SymbolRef::SF_Weak; + + if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) + Result |= SymbolRef::SF_Absolute; + + if (Symb.isFileRecord()) + Result |= SymbolRef::SF_FormatSpecific; + + if (Symb.isSectionDefinition()) + Result |= SymbolRef::SF_FormatSpecific; + + if (Symb.isCommon()) + Result |= SymbolRef::SF_Common; + + if (Symb.isAnyUndefined()) + Result |= SymbolRef::SF_Undefined; + + return Result; +} + +uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return Symb.getValue(); +} + +Expected<section_iterator> +COFFObjectFile::getSymbolSection(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) + return section_end(); + const coff_section *Sec = nullptr; + if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec)) + return errorCodeToError(EC); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(Sec); + return section_iterator(SectionRef(Ret, this)); +} + +unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const { + COFFSymbolRef Symb = getCOFFSymbol(Sym.getRawDataRefImpl()); + return Symb.getSectionNumber(); +} + +void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const { + const coff_section *Sec = toSec(Ref); + Sec += 1; + Ref.p = reinterpret_cast<uintptr_t>(Sec); +} + +std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref, + StringRef &Result) const { + const coff_section *Sec = toSec(Ref); + return getSectionName(Sec, Result); +} + +uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + uint64_t Result = Sec->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + return Result; +} + +uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { + return getSectionSize(toSec(Ref)); +} + +std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref, + StringRef &Result) const { + const coff_section *Sec = toSec(Ref); + ArrayRef<uint8_t> Res; + std::error_code EC = getSectionContents(Sec, Res); + Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size()); + return EC; +} + +uint64_t COFFObjectFile::getSectionAlignment(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->getAlignment(); +} + +bool COFFObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool COFFObjectFile::isSectionText(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; +} + +bool COFFObjectFile::isSectionData(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; +} + +bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const uint32_t BssFlags = COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + return (Sec->Characteristics & BssFlags) == BssFlags; +} + +unsigned COFFObjectFile::getSectionID(SectionRef Sec) const { + uintptr_t Offset = + uintptr_t(Sec.getRawDataRefImpl().p) - uintptr_t(SectionTable); + assert((Offset % sizeof(coff_section)) == 0); + return (Offset / sizeof(coff_section)) + 1; +} + +bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + return Sec->PointerToRawData == 0; +} + +static uint32_t getNumberOfRelocations(const coff_section *Sec, + MemoryBufferRef M, const uint8_t *base) { + // The field for the number of relocations in COFF section table is only + // 16-bit wide. If a section has more than 65535 relocations, 0xFFFF is set to + // NumberOfRelocations field, and the actual relocation count is stored in the + // VirtualAddress field in the first relocation entry. + if (Sec->hasExtendedRelocations()) { + const coff_relocation *FirstReloc; + if (getObject(FirstReloc, M, reinterpret_cast<const coff_relocation*>( + base + Sec->PointerToRelocations))) + return 0; + // -1 to exclude this first relocation entry. + return FirstReloc->VirtualAddress - 1; + } + return Sec->NumberOfRelocations; +} + +static const coff_relocation * +getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) { + uint64_t NumRelocs = getNumberOfRelocations(Sec, M, Base); + if (!NumRelocs) + return nullptr; + auto begin = reinterpret_cast<const coff_relocation *>( + Base + Sec->PointerToRelocations); + if (Sec->hasExtendedRelocations()) { + // Skip the first relocation entry repurposed to store the number of + // relocations. + begin++; + } + if (checkOffset(M, uintptr_t(begin), sizeof(coff_relocation) * NumRelocs)) + return nullptr; + return begin; +} + +relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const coff_relocation *begin = getFirstReloc(Sec, Data, base()); + if (begin && Sec->VirtualAddress != 0) + report_fatal_error("Sections with relocations should have an address of 0"); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(begin); + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const coff_relocation *I = getFirstReloc(Sec, Data, base()); + if (I) + I += getNumberOfRelocations(Sec, Data, base()); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(I); + return relocation_iterator(RelocationRef(Ret, this)); +} + +// Initialize the pointer to the symbol table. +std::error_code COFFObjectFile::initSymbolTablePtr() { + if (COFFHeader) + if (std::error_code EC = getObject( + SymbolTable16, Data, base() + getPointerToSymbolTable(), + (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize())) + return EC; + + if (COFFBigObjHeader) + if (std::error_code EC = getObject( + SymbolTable32, Data, base() + getPointerToSymbolTable(), + (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize())) + return EC; + + // Find string table. The first four byte of the string table contains the + // total size of the string table, including the size field itself. If the + // string table is empty, the value of the first four byte would be 4. + uint32_t StringTableOffset = getPointerToSymbolTable() + + getNumberOfSymbols() * getSymbolTableEntrySize(); + const uint8_t *StringTableAddr = base() + StringTableOffset; + const ulittle32_t *StringTableSizePtr; + if (std::error_code EC = getObject(StringTableSizePtr, Data, StringTableAddr)) + return EC; + StringTableSize = *StringTableSizePtr; + if (std::error_code EC = + getObject(StringTable, Data, StringTableAddr, StringTableSize)) + return EC; + + // Treat table sizes < 4 as empty because contrary to the PECOFF spec, some + // tools like cvtres write a size of 0 for an empty table instead of 4. + if (StringTableSize < 4) + StringTableSize = 4; + + // Check that the string table is null terminated if has any in it. + if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0) + return object_error::parse_failed; + return std::error_code(); +} + +uint64_t COFFObjectFile::getImageBase() const { + if (PE32Header) + return PE32Header->ImageBase; + else if (PE32PlusHeader) + return PE32PlusHeader->ImageBase; + // This actually comes up in practice. + return 0; +} + +// Returns the file offset for the given VA. +std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { + uint64_t ImageBase = getImageBase(); + uint64_t Rva = Addr - ImageBase; + assert(Rva <= UINT32_MAX); + return getRvaPtr((uint32_t)Rva, Res); +} + +// Returns the file offset for the given RVA. +std::error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { + for (const SectionRef &S : sections()) { + const coff_section *Section = getCOFFSection(S); + uint32_t SectionStart = Section->VirtualAddress; + uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize; + if (SectionStart <= Addr && Addr < SectionEnd) { + uint32_t Offset = Addr - SectionStart; + Res = uintptr_t(base()) + Section->PointerToRawData + Offset; + return std::error_code(); + } + } + return object_error::parse_failed; +} + +std::error_code +COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, + ArrayRef<uint8_t> &Contents) const { + for (const SectionRef &S : sections()) { + const coff_section *Section = getCOFFSection(S); + uint32_t SectionStart = Section->VirtualAddress; + // Check if this RVA is within the section bounds. Be careful about integer + // overflow. + uint32_t OffsetIntoSection = RVA - SectionStart; + if (SectionStart <= RVA && OffsetIntoSection < Section->VirtualSize && + Size <= Section->VirtualSize - OffsetIntoSection) { + uintptr_t Begin = + uintptr_t(base()) + Section->PointerToRawData + OffsetIntoSection; + Contents = + ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Begin), Size); + return std::error_code(); + } + } + return object_error::parse_failed; +} + +// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name +// table entry. +std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint, + StringRef &Name) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(Rva, IntPtr)) + return EC; + const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr); + Hint = *reinterpret_cast<const ulittle16_t *>(Ptr); + Name = StringRef(reinterpret_cast<const char *>(Ptr + 2)); + return std::error_code(); +} + +std::error_code +COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, + const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { + ArrayRef<uint8_t> InfoBytes; + if (std::error_code EC = getRvaAndSizeAsBytes( + DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes)) + return EC; + if (InfoBytes.size() < sizeof(*PDBInfo) + 1) + return object_error::parse_failed; + PDBInfo = reinterpret_cast<const codeview::DebugInfo *>(InfoBytes.data()); + InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo)); + PDBFileName = StringRef(reinterpret_cast<const char *>(InfoBytes.data()), + InfoBytes.size()); + // Truncate the name at the first null byte. Ignore any padding. + PDBFileName = PDBFileName.split('\0').first; + return std::error_code(); +} + +std::error_code +COFFObjectFile::getDebugPDBInfo(const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { + for (const debug_directory &D : debug_directories()) + if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) + return getDebugPDBInfo(&D, PDBInfo, PDBFileName); + // If we get here, there is no PDB info to return. + PDBInfo = nullptr; + PDBFileName = StringRef(); + return std::error_code(); +} + +// Find the import table. +std::error_code COFFObjectFile::initImportTablePtr() { + // First, we get the RVA of the import table. If the file lacks a pointer to + // the import table, do nothing. + const data_directory *DataEntry; + if (getDataDirectory(COFF::IMPORT_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the pointer to import table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress; + + // Find the section that contains the RVA. This is needed because the RVA is + // the import table's memory address which is different from its file offset. + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(ImportTableRva, IntPtr)) + return EC; + if (std::error_code EC = checkOffset(Data, IntPtr, DataEntry->Size)) + return EC; + ImportDirectory = reinterpret_cast< + const coff_import_directory_table_entry *>(IntPtr); + return std::error_code(); +} + +// Initializes DelayImportDirectory and NumberOfDelayImportDirectory. +std::error_code COFFObjectFile::initDelayImportTablePtr() { + const data_directory *DataEntry; + if (getDataDirectory(COFF::DELAY_IMPORT_DESCRIPTOR, DataEntry)) + return std::error_code(); + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uint32_t RVA = DataEntry->RelativeVirtualAddress; + NumberOfDelayImportDirectory = DataEntry->Size / + sizeof(delay_import_directory_table_entry) - 1; + + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(RVA, IntPtr)) + return EC; + DelayImportDirectory = reinterpret_cast< + const delay_import_directory_table_entry *>(IntPtr); + return std::error_code(); +} + +// Find the export table. +std::error_code COFFObjectFile::initExportTablePtr() { + // First, we get the RVA of the export table. If the file lacks a pointer to + // the export table, do nothing. + const data_directory *DataEntry; + if (getDataDirectory(COFF::EXPORT_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the pointer to export table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress; + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(ExportTableRva, IntPtr)) + return EC; + ExportDirectory = + reinterpret_cast<const export_directory_table_entry *>(IntPtr); + return std::error_code(); +} + +std::error_code COFFObjectFile::initBaseRelocPtr() { + const data_directory *DataEntry; + if (getDataDirectory(COFF::BASE_RELOCATION_TABLE, DataEntry)) + return std::error_code(); + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + BaseRelocHeader = reinterpret_cast<const coff_base_reloc_block_header *>( + IntPtr); + BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>( + IntPtr + DataEntry->Size); + return std::error_code(); +} + +std::error_code COFFObjectFile::initDebugDirectoryPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry; + if (getDataDirectory(COFF::DEBUG_DIRECTORY, DataEntry)) + return std::error_code(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + // Check that the size is a multiple of the entry size. + if (DataEntry->Size % sizeof(debug_directory) != 0) + return object_error::parse_failed; + + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + DebugDirectoryBegin = reinterpret_cast<const debug_directory *>(IntPtr); + if (std::error_code EC = getRvaPtr( + DataEntry->RelativeVirtualAddress + DataEntry->Size, IntPtr)) + return EC; + DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(IntPtr); + return std::error_code(); +} + +COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) + : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr), + COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), + DataDirectory(nullptr), SectionTable(nullptr), SymbolTable16(nullptr), + SymbolTable32(nullptr), StringTable(nullptr), StringTableSize(0), + ImportDirectory(nullptr), + DelayImportDirectory(nullptr), NumberOfDelayImportDirectory(0), + ExportDirectory(nullptr), BaseRelocHeader(nullptr), BaseRelocEnd(nullptr), + DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr) { + // Check that we at least have enough room for a header. + if (!checkSize(Data, EC, sizeof(coff_file_header))) + return; + + // The current location in the file where we are looking at. + uint64_t CurPtr = 0; + + // PE header is optional and is present only in executables. If it exists, + // it is placed right after COFF header. + bool HasPEHeader = false; + + // Check if this is a PE/COFF file. + if (checkSize(Data, EC, sizeof(dos_header) + sizeof(COFF::PEMagic))) { + // PE/COFF, seek through MS-DOS compatibility stub and 4-byte + // PE signature to find 'normal' COFF header. + const auto *DH = reinterpret_cast<const dos_header *>(base()); + if (DH->Magic[0] == 'M' && DH->Magic[1] == 'Z') { + CurPtr = DH->AddressOfNewExeHeader; + // Check the PE magic bytes. ("PE\0\0") + if (memcmp(base() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != 0) { + EC = object_error::parse_failed; + return; + } + CurPtr += sizeof(COFF::PEMagic); // Skip the PE magic bytes. + HasPEHeader = true; + } + } + + if ((EC = getObject(COFFHeader, Data, base() + CurPtr))) + return; + + // It might be a bigobj file, let's check. Note that COFF bigobj and COFF + // import libraries share a common prefix but bigobj is more restrictive. + if (!HasPEHeader && COFFHeader->Machine == COFF::IMAGE_FILE_MACHINE_UNKNOWN && + COFFHeader->NumberOfSections == uint16_t(0xffff) && + checkSize(Data, EC, sizeof(coff_bigobj_file_header))) { + if ((EC = getObject(COFFBigObjHeader, Data, base() + CurPtr))) + return; + + // Verify that we are dealing with bigobj. + if (COFFBigObjHeader->Version >= COFF::BigObjHeader::MinBigObjectVersion && + std::memcmp(COFFBigObjHeader->UUID, COFF::BigObjMagic, + sizeof(COFF::BigObjMagic)) == 0) { + COFFHeader = nullptr; + CurPtr += sizeof(coff_bigobj_file_header); + } else { + // It's not a bigobj. + COFFBigObjHeader = nullptr; + } + } + if (COFFHeader) { + // The prior checkSize call may have failed. This isn't a hard error + // because we were just trying to sniff out bigobj. + EC = std::error_code(); + CurPtr += sizeof(coff_file_header); + + if (COFFHeader->isImportLibrary()) + return; + } + + if (HasPEHeader) { + const pe32_header *Header; + if ((EC = getObject(Header, Data, base() + CurPtr))) + return; + + const uint8_t *DataDirAddr; + uint64_t DataDirSize; + if (Header->Magic == COFF::PE32Header::PE32) { + PE32Header = Header; + DataDirAddr = base() + CurPtr + sizeof(pe32_header); + DataDirSize = sizeof(data_directory) * PE32Header->NumberOfRvaAndSize; + } else if (Header->Magic == COFF::PE32Header::PE32_PLUS) { + PE32PlusHeader = reinterpret_cast<const pe32plus_header *>(Header); + DataDirAddr = base() + CurPtr + sizeof(pe32plus_header); + DataDirSize = sizeof(data_directory) * PE32PlusHeader->NumberOfRvaAndSize; + } else { + // It's neither PE32 nor PE32+. + EC = object_error::parse_failed; + return; + } + if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize))) + return; + } + + if (COFFHeader) + CurPtr += COFFHeader->SizeOfOptionalHeader; + + if ((EC = getObject(SectionTable, Data, base() + CurPtr, + (uint64_t)getNumberOfSections() * sizeof(coff_section)))) + return; + + // Initialize the pointer to the symbol table. + if (getPointerToSymbolTable() != 0) { + if ((EC = initSymbolTablePtr())) { + SymbolTable16 = nullptr; + SymbolTable32 = nullptr; + StringTable = nullptr; + StringTableSize = 0; + } + } else { + // We had better not have any symbols if we don't have a symbol table. + if (getNumberOfSymbols() != 0) { + EC = object_error::parse_failed; + return; + } + } + + // Initialize the pointer to the beginning of the import table. + if ((EC = initImportTablePtr())) + return; + if ((EC = initDelayImportTablePtr())) + return; + + // Initialize the pointer to the export table. + if ((EC = initExportTablePtr())) + return; + + // Initialize the pointer to the base relocation table. + if ((EC = initBaseRelocPtr())) + return; + + // Initialize the pointer to the export table. + if ((EC = initDebugDirectoryPtr())) + return; + + EC = std::error_code(); +} + +basic_symbol_iterator COFFObjectFile::symbol_begin() const { + DataRefImpl Ret; + Ret.p = getSymbolTable(); + return basic_symbol_iterator(SymbolRef(Ret, this)); +} + +basic_symbol_iterator COFFObjectFile::symbol_end() const { + // The symbol table ends where the string table begins. + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(StringTable); + return basic_symbol_iterator(SymbolRef(Ret, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_begin() const { + if (!ImportDirectory) + return import_directory_end(); + if (ImportDirectory->isNull()) + return import_directory_end(); + return import_directory_iterator( + ImportDirectoryEntryRef(ImportDirectory, 0, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_end() const { + return import_directory_iterator( + ImportDirectoryEntryRef(nullptr, -1, this)); +} + +delay_import_directory_iterator +COFFObjectFile::delay_import_directory_begin() const { + return delay_import_directory_iterator( + DelayImportDirectoryEntryRef(DelayImportDirectory, 0, this)); +} + +delay_import_directory_iterator +COFFObjectFile::delay_import_directory_end() const { + return delay_import_directory_iterator( + DelayImportDirectoryEntryRef( + DelayImportDirectory, NumberOfDelayImportDirectory, this)); +} + +export_directory_iterator COFFObjectFile::export_directory_begin() const { + return export_directory_iterator( + ExportDirectoryEntryRef(ExportDirectory, 0, this)); +} + +export_directory_iterator COFFObjectFile::export_directory_end() const { + if (!ExportDirectory) + return export_directory_iterator(ExportDirectoryEntryRef(nullptr, 0, this)); + ExportDirectoryEntryRef Ref(ExportDirectory, + ExportDirectory->AddressTableEntries, this); + return export_directory_iterator(Ref); +} + +section_iterator COFFObjectFile::section_begin() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SectionTable); + return section_iterator(SectionRef(Ret, this)); +} + +section_iterator COFFObjectFile::section_end() const { + DataRefImpl Ret; + int NumSections = + COFFHeader && COFFHeader->isImportLibrary() ? 0 : getNumberOfSections(); + Ret.p = reinterpret_cast<uintptr_t>(SectionTable + NumSections); + return section_iterator(SectionRef(Ret, this)); +} + +base_reloc_iterator COFFObjectFile::base_reloc_begin() const { + return base_reloc_iterator(BaseRelocRef(BaseRelocHeader, this)); +} + +base_reloc_iterator COFFObjectFile::base_reloc_end() const { + return base_reloc_iterator(BaseRelocRef(BaseRelocEnd, this)); +} + +uint8_t COFFObjectFile::getBytesInAddress() const { + return getArch() == Triple::x86_64 ? 8 : 4; +} + +StringRef COFFObjectFile::getFileFormatName() const { + switch(getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + return "COFF-i386"; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return "COFF-x86-64"; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return "COFF-ARM"; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return "COFF-ARM64"; + default: + return "COFF-<unknown arch>"; + } +} + +unsigned COFFObjectFile::getArch() const { + switch (getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + return Triple::x86; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return Triple::x86_64; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return Triple::thumb; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return Triple::aarch64; + default: + return Triple::UnknownArch; + } +} + +iterator_range<import_directory_iterator> +COFFObjectFile::import_directories() const { + return make_range(import_directory_begin(), import_directory_end()); +} + +iterator_range<delay_import_directory_iterator> +COFFObjectFile::delay_import_directories() const { + return make_range(delay_import_directory_begin(), + delay_import_directory_end()); +} + +iterator_range<export_directory_iterator> +COFFObjectFile::export_directories() const { + return make_range(export_directory_begin(), export_directory_end()); +} + +iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const { + return make_range(base_reloc_begin(), base_reloc_end()); +} + +std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { + Res = PE32Header; + return std::error_code(); +} + +std::error_code +COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { + Res = PE32PlusHeader; + return std::error_code(); +} + +std::error_code +COFFObjectFile::getDataDirectory(uint32_t Index, + const data_directory *&Res) const { + // Error if if there's no data directory or the index is out of range. + if (!DataDirectory) { + Res = nullptr; + return object_error::parse_failed; + } + assert(PE32Header || PE32PlusHeader); + uint32_t NumEnt = PE32Header ? PE32Header->NumberOfRvaAndSize + : PE32PlusHeader->NumberOfRvaAndSize; + if (Index >= NumEnt) { + Res = nullptr; + return object_error::parse_failed; + } + Res = &DataDirectory[Index]; + return std::error_code(); +} + +std::error_code COFFObjectFile::getSection(int32_t Index, + const coff_section *&Result) const { + Result = nullptr; + if (COFF::isReservedSectionNumber(Index)) + return std::error_code(); + if (static_cast<uint32_t>(Index) <= getNumberOfSections()) { + // We already verified the section table data, so no need to check again. + Result = SectionTable + (Index - 1); + return std::error_code(); + } + return object_error::parse_failed; +} + +std::error_code COFFObjectFile::getString(uint32_t Offset, + StringRef &Result) const { + if (StringTableSize <= 4) + // Tried to get a string from an empty string table. + return object_error::parse_failed; + if (Offset >= StringTableSize) + return object_error::unexpected_eof; + Result = StringRef(StringTable + Offset); + return std::error_code(); +} + +std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol, + StringRef &Res) const { + return getSymbolName(Symbol.getGeneric(), Res); +} + +std::error_code COFFObjectFile::getSymbolName(const coff_symbol_generic *Symbol, + StringRef &Res) const { + // Check for string table entry. First 4 bytes are 0. + if (Symbol->Name.Offset.Zeroes == 0) { + if (std::error_code EC = getString(Symbol->Name.Offset.Offset, Res)) + return EC; + return std::error_code(); + } + + if (Symbol->Name.ShortName[COFF::NameSize - 1] == 0) + // Null terminated, let ::strlen figure out the length. + Res = StringRef(Symbol->Name.ShortName); + else + // Not null terminated, use all 8 bytes. + Res = StringRef(Symbol->Name.ShortName, COFF::NameSize); + return std::error_code(); +} + +ArrayRef<uint8_t> +COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { + const uint8_t *Aux = nullptr; + + size_t SymbolSize = getSymbolTableEntrySize(); + if (Symbol.getNumberOfAuxSymbols() > 0) { + // AUX data comes immediately after the symbol in COFF + Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize; +# ifndef NDEBUG + // Verify that the Aux symbol points to a valid entry in the symbol table. + uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base()); + if (Offset < getPointerToSymbolTable() || + Offset >= + getPointerToSymbolTable() + (getNumberOfSymbols() * SymbolSize)) + report_fatal_error("Aux Symbol data was outside of symbol table."); + + assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 && + "Aux Symbol data did not point to the beginning of a symbol"); +# endif + } + return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize); +} + +std::error_code COFFObjectFile::getSectionName(const coff_section *Sec, + StringRef &Res) const { + StringRef Name; + if (Sec->Name[COFF::NameSize - 1] == 0) + // Null terminated, let ::strlen figure out the length. + Name = Sec->Name; + else + // Not null terminated, use all 8 bytes. + Name = StringRef(Sec->Name, COFF::NameSize); + + // Check for string table entry. First byte is '/'. + if (Name.startswith("/")) { + uint32_t Offset; + if (Name.startswith("//")) { + if (decodeBase64StringEntry(Name.substr(2), Offset)) + return object_error::parse_failed; + } else { + if (Name.substr(1).getAsInteger(10, Offset)) + return object_error::parse_failed; + } + if (std::error_code EC = getString(Offset, Name)) + return EC; + } + + Res = Name; + return std::error_code(); +} + +uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const { + // SizeOfRawData and VirtualSize change what they represent depending on + // whether or not we have an executable image. + // + // For object files, SizeOfRawData contains the size of section's data; + // VirtualSize should be zero but isn't due to buggy COFF writers. + // + // For executables, SizeOfRawData *must* be a multiple of FileAlignment; the + // actual section size is in VirtualSize. It is possible for VirtualSize to + // be greater than SizeOfRawData; the contents past that point should be + // considered to be zero. + if (getDOSHeader()) + return std::min(Sec->VirtualSize, Sec->SizeOfRawData); + return Sec->SizeOfRawData; +} + +std::error_code +COFFObjectFile::getSectionContents(const coff_section *Sec, + ArrayRef<uint8_t> &Res) const { + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + if (Sec->PointerToRawData == 0) + return object_error::parse_failed; + // The only thing that we need to verify is that the contents is contained + // within the file bounds. We don't need to make sure it doesn't cover other + // data, as there's nothing that says that is not allowed. + uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData; + uint32_t SectionSize = getSectionSize(Sec); + if (checkOffset(Data, ConStart, SectionSize)) + return object_error::parse_failed; + Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize); + return std::error_code(); +} + +const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const { + return reinterpret_cast<const coff_relocation*>(Rel.p); +} + +void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + Rel.p = reinterpret_cast<uintptr_t>( + reinterpret_cast<const coff_relocation*>(Rel.p) + 1); +} + +uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const { + const coff_relocation *R = toRel(Rel); + return R->VirtualAddress; +} + +symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + const coff_relocation *R = toRel(Rel); + DataRefImpl Ref; + if (R->SymbolTableIndex >= getNumberOfSymbols()) + return symbol_end(); + if (SymbolTable16) + Ref.p = reinterpret_cast<uintptr_t>(SymbolTable16 + R->SymbolTableIndex); + else if (SymbolTable32) + Ref.p = reinterpret_cast<uintptr_t>(SymbolTable32 + R->SymbolTableIndex); + else + llvm_unreachable("no symbol table pointer!"); + return symbol_iterator(SymbolRef(Ref, this)); +} + +uint64_t COFFObjectFile::getRelocationType(DataRefImpl Rel) const { + const coff_relocation* R = toRel(Rel); + return R->Type; +} + +const coff_section * +COFFObjectFile::getCOFFSection(const SectionRef &Section) const { + return toSec(Section.getRawDataRefImpl()); +} + +COFFSymbolRef COFFObjectFile::getCOFFSymbol(const DataRefImpl &Ref) const { + if (SymbolTable16) + return toSymb<coff_symbol16>(Ref); + if (SymbolTable32) + return toSymb<coff_symbol32>(Ref); + llvm_unreachable("no symbol table pointer!"); +} + +COFFSymbolRef COFFObjectFile::getCOFFSymbol(const SymbolRef &Symbol) const { + return getCOFFSymbol(Symbol.getRawDataRefImpl()); +} + +const coff_relocation * +COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const { + return toRel(Reloc.getRawDataRefImpl()); +} + +iterator_range<const coff_relocation *> +COFFObjectFile::getRelocations(const coff_section *Sec) const { + const coff_relocation *I = getFirstReloc(Sec, Data, base()); + const coff_relocation *E = I; + if (I) + E += getNumberOfRelocations(Sec, Data, base()); + return make_range(I, E); +} + +#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type) \ + case COFF::reloc_type: \ + Res = #reloc_type; \ + break; + +void COFFObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + const coff_relocation *Reloc = toRel(Rel); + StringRef Res; + switch (getMachine()) { + case COFF::IMAGE_FILE_MACHINE_AMD64: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_1); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_2); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_3); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_4); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_5); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SREL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32); + default: + Res = "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T); + default: + Res = "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_I386: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SEG12); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32); + default: + Res = "Unknown"; + } + break; + default: + Res = "Unknown"; + } + Result.append(Res.begin(), Res.end()); +} + +#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME + +bool COFFObjectFile::isRelocatableObject() const { + return !DataDirectory; +} + +bool ImportDirectoryEntryRef:: +operator==(const ImportDirectoryEntryRef &Other) const { + return ImportTable == Other.ImportTable && Index == Other.Index; +} + +void ImportDirectoryEntryRef::moveNext() { + ++Index; + if (ImportTable[Index].isNull()) { + Index = -1; + ImportTable = nullptr; + } +} + +std::error_code ImportDirectoryEntryRef::getImportTableEntry( + const coff_import_directory_table_entry *&Result) const { + return getObject(Result, OwningObject->Data, ImportTable + Index); +} + +static imported_symbol_iterator +makeImportedSymbolIterator(const COFFObjectFile *Object, + uintptr_t Ptr, int Index) { + if (Object->getBytesInAddress() == 4) { + auto *P = reinterpret_cast<const import_lookup_table_entry32 *>(Ptr); + return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object)); + } + auto *P = reinterpret_cast<const import_lookup_table_entry64 *>(Ptr); + return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object)); +} + +static imported_symbol_iterator +importedSymbolBegin(uint32_t RVA, const COFFObjectFile *Object) { + uintptr_t IntPtr = 0; + Object->getRvaPtr(RVA, IntPtr); + return makeImportedSymbolIterator(Object, IntPtr, 0); +} + +static imported_symbol_iterator +importedSymbolEnd(uint32_t RVA, const COFFObjectFile *Object) { + uintptr_t IntPtr = 0; + Object->getRvaPtr(RVA, IntPtr); + // Forward the pointer to the last entry which is null. + int Index = 0; + if (Object->getBytesInAddress() == 4) { + auto *Entry = reinterpret_cast<ulittle32_t *>(IntPtr); + while (*Entry++) + ++Index; + } else { + auto *Entry = reinterpret_cast<ulittle64_t *>(IntPtr); + while (*Entry++) + ++Index; + } + return makeImportedSymbolIterator(Object, IntPtr, Index); +} + +imported_symbol_iterator +ImportDirectoryEntryRef::imported_symbol_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportAddressTableRVA, + OwningObject); +} + +imported_symbol_iterator +ImportDirectoryEntryRef::imported_symbol_end() const { + return importedSymbolEnd(ImportTable[Index].ImportAddressTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::imported_symbols() const { + return make_range(imported_symbol_begin(), imported_symbol_end()); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_end() const { + return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::lookup_table_symbols() const { + return make_range(lookup_table_begin(), lookup_table_end()); +} + +std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +std::error_code +ImportDirectoryEntryRef::getImportLookupTableRVA(uint32_t &Result) const { + Result = ImportTable[Index].ImportLookupTableRVA; + return std::error_code(); +} + +std::error_code +ImportDirectoryEntryRef::getImportAddressTableRVA(uint32_t &Result) const { + Result = ImportTable[Index].ImportAddressTableRVA; + return std::error_code(); +} + +bool DelayImportDirectoryEntryRef:: +operator==(const DelayImportDirectoryEntryRef &Other) const { + return Table == Other.Table && Index == Other.Index; +} + +void DelayImportDirectoryEntryRef::moveNext() { + ++Index; +} + +imported_symbol_iterator +DelayImportDirectoryEntryRef::imported_symbol_begin() const { + return importedSymbolBegin(Table[Index].DelayImportNameTable, + OwningObject); +} + +imported_symbol_iterator +DelayImportDirectoryEntryRef::imported_symbol_end() const { + return importedSymbolEnd(Table[Index].DelayImportNameTable, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +DelayImportDirectoryEntryRef::imported_symbols() const { + return make_range(imported_symbol_begin(), imported_symbol_end()); +} + +std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(Table[Index].Name, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +std::error_code DelayImportDirectoryEntryRef:: +getDelayImportTable(const delay_import_directory_table_entry *&Result) const { + Result = Table; + return std::error_code(); +} + +std::error_code DelayImportDirectoryEntryRef:: +getImportAddress(int AddrIndex, uint64_t &Result) const { + uint32_t RVA = Table[Index].DelayImportAddressTable + + AddrIndex * (OwningObject->is64() ? 8 : 4); + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + if (OwningObject->is64()) + Result = *reinterpret_cast<const ulittle64_t *>(IntPtr); + else + Result = *reinterpret_cast<const ulittle32_t *>(IntPtr); + return std::error_code(); +} + +bool ExportDirectoryEntryRef:: +operator==(const ExportDirectoryEntryRef &Other) const { + return ExportTable == Other.ExportTable && Index == Other.Index; +} + +void ExportDirectoryEntryRef::moveNext() { + ++Index; +} + +// Returns the name of the current export symbol. If the symbol is exported only +// by ordinal, the empty string is set as a result. +std::error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +// Returns the starting ordinal number. +std::error_code +ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const { + Result = ExportTable->OrdinalBase; + return std::error_code(); +} + +// Returns the export ordinal of the current export symbol. +std::error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { + Result = ExportTable->OrdinalBase + Index; + return std::error_code(); +} + +// Returns the address of the current export symbol. +std::error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr)) + return EC; + const export_address_table_entry *entry = + reinterpret_cast<const export_address_table_entry *>(IntPtr); + Result = entry[Index].ExportRVA; + return std::error_code(); +} + +// Returns the name of the current export symbol. If the symbol is exported only +// by ordinal, the empty string is set as a result. +std::error_code +ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr)) + return EC; + const ulittle16_t *Start = reinterpret_cast<const ulittle16_t *>(IntPtr); + + uint32_t NumEntries = ExportTable->NumberOfNamePointers; + int Offset = 0; + for (const ulittle16_t *I = Start, *E = Start + NumEntries; + I < E; ++I, ++Offset) { + if (*I != Index) + continue; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr)) + return EC; + const ulittle32_t *NamePtr = reinterpret_cast<const ulittle32_t *>(IntPtr); + if (std::error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); + } + Result = ""; + return std::error_code(); +} + +std::error_code ExportDirectoryEntryRef::isForwarder(bool &Result) const { + const data_directory *DataEntry; + if (auto EC = OwningObject->getDataDirectory(COFF::EXPORT_TABLE, DataEntry)) + return EC; + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uint32_t Begin = DataEntry->RelativeVirtualAddress; + uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size; + Result = (Begin <= RVA && RVA < End); + return std::error_code(); +} + +std::error_code ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const { + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uintptr_t IntPtr = 0; + if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +bool ImportedSymbolRef:: +operator==(const ImportedSymbolRef &Other) const { + return Entry32 == Other.Entry32 && Entry64 == Other.Entry64 + && Index == Other.Index; +} + +void ImportedSymbolRef::moveNext() { + ++Index; +} + +std::error_code +ImportedSymbolRef::getSymbolName(StringRef &Result) const { + uint32_t RVA; + if (Entry32) { + // If a symbol is imported only by ordinal, it has no name. + if (Entry32[Index].isOrdinal()) + return std::error_code(); + RVA = Entry32[Index].getHintNameRVA(); + } else { + if (Entry64[Index].isOrdinal()) + return std::error_code(); + RVA = Entry64[Index].getHintNameRVA(); + } + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + // +2 because the first two bytes is hint. + Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2)); + return std::error_code(); +} + +std::error_code ImportedSymbolRef::isOrdinal(bool &Result) const { + if (Entry32) + Result = Entry32[Index].isOrdinal(); + else + Result = Entry64[Index].isOrdinal(); + return std::error_code(); +} + +std::error_code ImportedSymbolRef::getHintNameRVA(uint32_t &Result) const { + if (Entry32) + Result = Entry32[Index].getHintNameRVA(); + else + Result = Entry64[Index].getHintNameRVA(); + return std::error_code(); +} + +std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const { + uint32_t RVA; + if (Entry32) { + if (Entry32[Index].isOrdinal()) { + Result = Entry32[Index].getOrdinal(); + return std::error_code(); + } + RVA = Entry32[Index].getHintNameRVA(); + } else { + if (Entry64[Index].isOrdinal()) { + Result = Entry64[Index].getOrdinal(); + return std::error_code(); + } + RVA = Entry64[Index].getHintNameRVA(); + } + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = *reinterpret_cast<const ulittle16_t *>(IntPtr); + return std::error_code(); +} + +ErrorOr<std::unique_ptr<COFFObjectFile>> +ObjectFile::createCOFFObjectFile(MemoryBufferRef Object) { + std::error_code EC; + std::unique_ptr<COFFObjectFile> Ret(new COFFObjectFile(Object, EC)); + if (EC) + return EC; + return std::move(Ret); +} + +bool BaseRelocRef::operator==(const BaseRelocRef &Other) const { + return Header == Other.Header && Index == Other.Index; +} + +void BaseRelocRef::moveNext() { + // Header->BlockSize is the size of the current block, including the + // size of the header itself. + uint32_t Size = sizeof(*Header) + + sizeof(coff_base_reloc_block_entry) * (Index + 1); + if (Size == Header->BlockSize) { + // .reloc contains a list of base relocation blocks. Each block + // consists of the header followed by entries. The header contains + // how many entories will follow. When we reach the end of the + // current block, proceed to the next block. + Header = reinterpret_cast<const coff_base_reloc_block_header *>( + reinterpret_cast<const uint8_t *>(Header) + Size); + Index = 0; + } else { + ++Index; + } +} + +std::error_code BaseRelocRef::getType(uint8_t &Type) const { + auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); + Type = Entry[Index].getType(); + return std::error_code(); +} + +std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { + auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); + Result = Header->PageRVA + Entry[Index].getOffset(); + return std::error_code(); +} diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp new file mode 100644 index 000000000000..23682e1fabfd --- /dev/null +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -0,0 +1,144 @@ +//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELF.h" + +namespace llvm { +namespace object { + +#define ELF_RELOC(name, value) \ + case ELF::name: \ + return #name; \ + +StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { + switch (Machine) { + case ELF::EM_X86_64: + switch (Type) { +#include "llvm/Support/ELFRelocs/x86_64.def" + default: + break; + } + break; + case ELF::EM_386: + case ELF::EM_IAMCU: + switch (Type) { +#include "llvm/Support/ELFRelocs/i386.def" + default: + break; + } + break; + case ELF::EM_MIPS: + switch (Type) { +#include "llvm/Support/ELFRelocs/Mips.def" + default: + break; + } + break; + case ELF::EM_AARCH64: + switch (Type) { +#include "llvm/Support/ELFRelocs/AArch64.def" + default: + break; + } + break; + case ELF::EM_ARM: + switch (Type) { +#include "llvm/Support/ELFRelocs/ARM.def" + default: + break; + } + break; + case ELF::EM_AVR: + switch (Type) { +#include "llvm/Support/ELFRelocs/AVR.def" + default: + break; + } + break; + case ELF::EM_HEXAGON: + switch (Type) { +#include "llvm/Support/ELFRelocs/Hexagon.def" + default: + break; + } + break; + case ELF::EM_LANAI: + switch (Type) { +#include "llvm/Support/ELFRelocs/Lanai.def" + default: + break; + } + break; + case ELF::EM_PPC: + switch (Type) { +#include "llvm/Support/ELFRelocs/PowerPC.def" + default: + break; + } + break; + case ELF::EM_PPC64: + switch (Type) { +#include "llvm/Support/ELFRelocs/PowerPC64.def" + default: + break; + } + break; + case ELF::EM_RISCV: + switch (Type) { +#include "llvm/Support/ELFRelocs/RISCV.def" + default: + break; + } + break; + case ELF::EM_S390: + switch (Type) { +#include "llvm/Support/ELFRelocs/SystemZ.def" + default: + break; + } + break; + case ELF::EM_SPARC: + case ELF::EM_SPARC32PLUS: + case ELF::EM_SPARCV9: + switch (Type) { +#include "llvm/Support/ELFRelocs/Sparc.def" + default: + break; + } + break; + case ELF::EM_WEBASSEMBLY: + switch (Type) { +#include "llvm/Support/ELFRelocs/WebAssembly.def" + default: + break; + } + break; + case ELF::EM_AMDGPU: + switch (Type) { +#include "llvm/Support/ELFRelocs/AMDGPU.def" + default: + break; + } + case ELF::EM_BPF: + switch (Type) { +#include "llvm/Support/ELFRelocs/BPF.def" + default: + break; + } + break; + default: + break; + } + return "Unknown"; +} + +#undef ELF_RELOC + +} // end namespace object +} // end namespace llvm diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp new file mode 100644 index 000000000000..4bd69e34e3c3 --- /dev/null +++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp @@ -0,0 +1,125 @@ +//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the ELFObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/MathExtras.h" + +namespace llvm { +using namespace object; + +ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) + : ObjectFile(Type, Source) {} + +ErrorOr<std::unique_ptr<ObjectFile>> +ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { + std::pair<unsigned char, unsigned char> Ident = + getElfArchType(Obj.getBuffer()); + std::size_t MaxAlignment = + 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart())); + + if (MaxAlignment < 2) + return object_error::parse_failed; + + std::error_code EC; + std::unique_ptr<ObjectFile> R; + if (Ident.first == ELF::ELFCLASS32) { + if (Ident.second == ELF::ELFDATA2LSB) + R.reset(new ELFObjectFile<ELFType<support::little, false>>(Obj, EC)); + else if (Ident.second == ELF::ELFDATA2MSB) + R.reset(new ELFObjectFile<ELFType<support::big, false>>(Obj, EC)); + else + return object_error::parse_failed; + } else if (Ident.first == ELF::ELFCLASS64) { + if (Ident.second == ELF::ELFDATA2LSB) + R.reset(new ELFObjectFile<ELFType<support::little, true>>(Obj, EC)); + else if (Ident.second == ELF::ELFDATA2MSB) + R.reset(new ELFObjectFile<ELFType<support::big, true>>(Obj, EC)); + else + return object_error::parse_failed; + } else { + return object_error::parse_failed; + } + + if (EC) + return EC; + return std::move(R); +} + +SubtargetFeatures ELFObjectFileBase::getFeatures() const { + switch (getEMachine()) { + case ELF::EM_MIPS: { + SubtargetFeatures Features; + unsigned PlatformFlags; + getPlatformFlags(PlatformFlags); + + switch (PlatformFlags & ELF::EF_MIPS_ARCH) { + case ELF::EF_MIPS_ARCH_1: + break; + case ELF::EF_MIPS_ARCH_2: + Features.AddFeature("mips2"); + break; + case ELF::EF_MIPS_ARCH_3: + Features.AddFeature("mips3"); + break; + case ELF::EF_MIPS_ARCH_4: + Features.AddFeature("mips4"); + break; + case ELF::EF_MIPS_ARCH_5: + Features.AddFeature("mips5"); + break; + case ELF::EF_MIPS_ARCH_32: + Features.AddFeature("mips32"); + break; + case ELF::EF_MIPS_ARCH_64: + Features.AddFeature("mips64"); + break; + case ELF::EF_MIPS_ARCH_32R2: + Features.AddFeature("mips32r2"); + break; + case ELF::EF_MIPS_ARCH_64R2: + Features.AddFeature("mips64r2"); + break; + case ELF::EF_MIPS_ARCH_32R6: + Features.AddFeature("mips32r6"); + break; + case ELF::EF_MIPS_ARCH_64R6: + Features.AddFeature("mips64r6"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + switch (PlatformFlags & ELF::EF_MIPS_MACH) { + case ELF::EF_MIPS_MACH_NONE: + // No feature associated with this value. + break; + case ELF::EF_MIPS_MACH_OCTEON: + Features.AddFeature("cnmips"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) + Features.AddFeature("mips16"); + if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) + Features.AddFeature("micromips"); + + return Features; + } + default: + return SubtargetFeatures(); + } +} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp new file mode 100644 index 000000000000..7d43a84f3e0e --- /dev/null +++ b/contrib/llvm/lib/Object/Error.cpp @@ -0,0 +1,95 @@ +//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines a new error_category for the Object library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; +using namespace object; + +namespace { +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. +class _object_error_category : public std::error_category { +public: + const char* name() const noexcept override; + std::string message(int ev) const override; +}; +} + +const char *_object_error_category::name() const noexcept { + return "llvm.object"; +} + +std::string _object_error_category::message(int EV) const { + object_error E = static_cast<object_error>(EV); + switch (E) { + case object_error::arch_not_found: + return "No object file for requested architecture"; + case object_error::invalid_file_type: + return "The file was not recognized as a valid object file"; + case object_error::parse_failed: + return "Invalid data was encountered while parsing the file"; + case object_error::unexpected_eof: + return "The end of the file was unexpectedly encountered"; + case object_error::string_table_non_null_end: + return "String table must end with a null terminator"; + case object_error::invalid_section_index: + return "Invalid section index"; + case object_error::bitcode_section_not_found: + return "Bitcode section not found in object file"; + case object_error::invalid_symbol_index: + return "Invalid symbol index"; + } + llvm_unreachable("An enumerator of object_error does not have a message " + "defined."); +} + +char BinaryError::ID = 0; +char GenericBinaryError::ID = 0; + +GenericBinaryError::GenericBinaryError(Twine Msg) : Msg(Msg.str()) {} + +GenericBinaryError::GenericBinaryError(Twine Msg, object_error ECOverride) + : Msg(Msg.str()) { + setErrorCode(make_error_code(ECOverride)); +} + +void GenericBinaryError::log(raw_ostream &OS) const { + OS << Msg; +} + +static ManagedStatic<_object_error_category> error_category; + +const std::error_category &object::object_category() { + return *error_category; +} + +llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { + if (auto Err2 = + handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error { + // Try to handle 'M'. If successful, return a success value from + // the handler. + if (M->convertToErrorCode() == object_error::invalid_file_type) + return Error::success(); + + // We failed to handle 'M' - return it from the handler. + // This value will be passed back from catchErrors and + // wind up in Err2, where it will be returned from this function. + return Error(std::move(M)); + })) + return Err2; + return Err; +} diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp new file mode 100644 index 000000000000..adbf0de6d1bc --- /dev/null +++ b/contrib/llvm/lib/Object/IRObjectFile.cpp @@ -0,0 +1,140 @@ +//===- IRObjectFile.cpp - IR object file implementation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the IRObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRObjectFile.h" +#include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +IRObjectFile::IRObjectFile(MemoryBufferRef Object, + std::vector<std::unique_ptr<Module>> Mods) + : SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) { + for (auto &M : this->Mods) + SymTab.addModule(M.get()); +} + +IRObjectFile::~IRObjectFile() {} + +static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) { + return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p); +} + +void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + Symb.p += sizeof(ModuleSymbolTable::Symbol); +} + +std::error_code IRObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + SymTab.printSymbolName(OS, getSym(Symb)); + return std::error_code(); +} + +uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { + return SymTab.getSymbolFlags(getSym(Symb)); +} + +basic_symbol_iterator IRObjectFile::symbol_begin() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data()); + return basic_symbol_iterator(BasicSymbolRef(Ret, this)); +} + +basic_symbol_iterator IRObjectFile::symbol_end() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data() + + SymTab.symbols().size()); + return basic_symbol_iterator(BasicSymbolRef(Ret, this)); +} + +StringRef IRObjectFile::getTargetTriple() const { + // Each module must have the same target triple, so we arbitrarily access the + // first one. + return Mods[0]->getTargetTriple(); +} + +ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + if (Sec.isBitcode()) { + StringRef SecContents; + if (std::error_code EC = Sec.getContents(SecContents)) + return EC; + return MemoryBufferRef(SecContents, Obj.getFileName()); + } + } + + return object_error::bitcode_section_not_found; +} + +ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: + return Object; + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) + return errorToErrorCode(ObjFile.takeError()); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return object_error::invalid_file_type; + } +} + +Expected<std::unique_ptr<IRObjectFile>> +IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { + ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected<std::vector<BitcodeModule>> BMsOrErr = + getBitcodeModuleList(*BCOrErr); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + std::vector<std::unique_ptr<Module>> Mods; + for (auto BM : *BMsOrErr) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Context, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(std::move(*MOrErr)); + } + + return std::unique_ptr<IRObjectFile>( + new IRObjectFile(*BCOrErr, std::move(Mods))); +} diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp new file mode 100644 index 000000000000..5b018676eba3 --- /dev/null +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -0,0 +1,3601 @@ +//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOObjectFile class, which binds the MachOObject +// class to the generic ObjectFile wrapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachO.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <cctype> +#include <cstring> +#include <limits> +#include <list> + +using namespace llvm; +using namespace object; + +namespace { + struct section_base { + char sectname[16]; + char segname[16]; + }; +} + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed object (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +// FIXME: Replace all uses of this function with getStructOrErr. +template <typename T> +static T getStruct(const MachOObjectFile &O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) + report_fatal_error("Malformed MachO file."); + + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; +} + +template <typename T> +static Expected<T> getStructOrErr(const MachOObjectFile &O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) + return malformedError("Structure read out-of-range"); + + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; +} + +static const char * +getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L, + unsigned Sec) { + uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr); + + bool Is64 = O.is64Bit(); + unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) : + sizeof(MachO::segment_command); + unsigned SectionSize = Is64 ? sizeof(MachO::section_64) : + sizeof(MachO::section); + + uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize; + return reinterpret_cast<const char*>(SectionAddr); +} + +static const char *getPtr(const MachOObjectFile &O, size_t Offset) { + return O.getData().substr(Offset, 1).data(); +} + +static MachO::nlist_base +getSymbolTableEntryBase(const MachOObjectFile &O, DataRefImpl DRI) { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist_base>(O, P); +} + +static StringRef parseSegmentOrSectionName(const char *P) { + if (P[15] == 0) + // Null terminated. + return P; + // Not null terminated, so this is a 16 char string. + return StringRef(P, 16); +} + +// Helper to advance a section or symbol iterator multiple increments at a time. +template<class T> +static void advance(T &it, size_t Val) { + while (Val--) + ++it; +} + +static unsigned getCPUType(const MachOObjectFile &O) { + return O.getHeader().cputype; +} + +static uint32_t +getPlainRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0; +} + +static unsigned +getScatteredRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0 & 0xffffff; +} + +static bool getPlainRelocationPCRel(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return (RE.r_word1 >> 24) & 1; + return (RE.r_word1 >> 7) & 1; +} + +static bool +getScatteredRelocationPCRel(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 30) & 1; +} + +static unsigned getPlainRelocationLength(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return (RE.r_word1 >> 25) & 3; + return (RE.r_word1 >> 5) & 3; +} + +static unsigned +getScatteredRelocationLength(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 28) & 3; +} + +static unsigned getPlainRelocationType(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return RE.r_word1 >> 28; + return RE.r_word1 & 0xf; +} + +static uint32_t getSectionFlags(const MachOObjectFile &O, + DataRefImpl Sec) { + if (O.is64Bit()) { + MachO::section_64 Sect = O.getSection64(Sec); + return Sect.flags; + } + MachO::section Sect = O.getSection(Sec); + return Sect.flags; +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr, + uint32_t LoadCommandIndex) { + if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) { + if (CmdOrErr->cmdsize < 8) + return malformedError("load command " + Twine(LoadCommandIndex) + + " with size less than 8 bytes"); + return MachOObjectFile::LoadCommandInfo({Ptr, *CmdOrErr}); + } else + return CmdOrErr.takeError(); +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getFirstLoadCommandInfo(const MachOObjectFile &Obj) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds) + return malformedError("load command 0 extends past the end all load " + "commands in the file"); + return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0); +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex, + const MachOObjectFile::LoadCommandInfo &L) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) > + Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds) + return malformedError("load command " + Twine(LoadCommandIndex + 1) + + " extends past the end all load commands in the file"); + return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1); +} + +template <typename T> +static void parseHeader(const MachOObjectFile &Obj, T &Header, + Error &Err) { + if (sizeof(T) > Obj.getData().size()) { + Err = malformedError("the mach header extends past the end of the " + "file"); + return; + } + if (auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0))) + Header = *HeaderOrErr; + else + Err = HeaderOrErr.takeError(); +} + +// This is used to check for overlapping of Mach-O elements. +struct MachOElement { + uint64_t Offset; + uint64_t Size; + const char *Name; +}; + +static Error checkOverlappingElement(std::list<MachOElement> &Elements, + uint64_t Offset, uint64_t Size, + const char *Name) { + if (Size == 0) + return Error::success(); + + for (auto it=Elements.begin() ; it != Elements.end(); ++it) { + auto E = *it; + if ((Offset >= E.Offset && Offset < E.Offset + E.Size) || + (Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) || + (Offset <= E.Offset && Offset + Size >= E.Offset + E.Size)) + return malformedError(Twine(Name) + " at offset " + Twine(Offset) + + " with a size of " + Twine(Size) + ", overlaps " + + E.Name + " at offset " + Twine(E.Offset) + " with " + "a size of " + Twine(E.Size)); + auto nt = it; + nt++; + if (nt != Elements.end()) { + auto N = *nt; + if (Offset + Size <= N.Offset) { + Elements.insert(nt, {Offset, Size, Name}); + return Error::success(); + } + } + } + Elements.push_back({Offset, Size, Name}); + return Error::success(); +} + +// Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all +// sections to \param Sections, and optionally sets +// \param IsPageZeroSegment to true. +template <typename Segment, typename Section> +static Error parseSegmentLoadCommand( + const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment, + uint32_t LoadCommandIndex, const char *CmdName, uint64_t SizeOfHeaders, + std::list<MachOElement> &Elements) { + const unsigned SegmentLoadSize = sizeof(Segment); + if (Load.C.cmdsize < SegmentLoadSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " " + CmdName + " cmdsize too small"); + if (auto SegOrErr = getStructOrErr<Segment>(Obj, Load.Ptr)) { + Segment S = SegOrErr.get(); + const unsigned SectionSize = sizeof(Section); + uint64_t FileSize = Obj.getData().size(); + if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize || + S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " inconsistent cmdsize in " + CmdName + + " for the number of sections"); + for (unsigned J = 0; J < S.nsects; ++J) { + const char *Sec = getSectionPtr(Obj, Load, J); + Sections.push_back(Sec); + Section s = getStruct<Section>(Obj, Sec); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.offset > FileSize) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && S.fileoff == 0 && + s.offset < SizeOfHeaders && s.size != 0) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " not past the headers of the file"); + uint64_t BigSize = s.offset; + BigSize += s.size; + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + BigSize > FileSize) + return malformedError("offset field plus size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.size > S.filesize) + return malformedError("size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than the segment"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && s.size != 0 && + s.addr < S.vmaddr) + return malformedError("addr field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " less than the segment's vmaddr"); + BigSize = s.addr; + BigSize += s.size; + uint64_t BigEnd = S.vmaddr; + BigEnd += S.vmsize; + if (S.vmsize != 0 && s.size != 0 && BigSize > BigEnd) + return malformedError("addr field plus size of section " + Twine(J) + + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than than " + "the segment's vmaddr plus vmsize"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL) + if (Error Err = checkOverlappingElement(Elements, s.offset, s.size, + "section contents")) + return Err; + if (s.reloff > FileSize) + return malformedError("reloff field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + BigSize = s.nreloc; + BigSize *= sizeof(struct MachO::relocation_info); + BigSize += s.reloff; + if (BigSize > FileSize) + return malformedError("reloff field plus nreloc field times sizeof(" + "struct relocation_info) of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, s.reloff, s.nreloc * + sizeof(struct + MachO::relocation_info), + "section relocation entries")) + return Err; + } + if (S.fileoff > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field in " + CmdName + + " extends past the end of the file"); + uint64_t BigSize = S.fileoff; + BigSize += S.filesize; + if (BigSize > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field plus filesize field in " + + CmdName + " extends past the end of the file"); + if (S.vmsize != 0 && S.filesize > S.vmsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field in " + CmdName + + " greater than vmsize field"); + IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); + } else + return SegOrErr.takeError(); + + return Error::success(); +} + +static Error checkSymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **SymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::symtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_SYMTAB cmdsize too small"); + if (*SymtabLoadCmd != nullptr) + return malformedError("more than one LC_SYMTAB command"); + MachO::symtab_command Symtab = + getStruct<MachO::symtab_command>(Obj, Load.Ptr); + if (Symtab.cmdsize != sizeof(MachO::symtab_command)) + return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Symtab.symoff > FileSize) + return malformedError("symoff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + uint64_t SymtabSize = Symtab.nsyms; + const char *struct_nlist_name; + if (Obj.is64Bit()) { + SymtabSize *= sizeof(MachO::nlist_64); + struct_nlist_name = "struct nlist_64"; + } else { + SymtabSize *= sizeof(MachO::nlist); + struct_nlist_name = "struct nlist"; + } + uint64_t BigSize = SymtabSize; + BigSize += Symtab.symoff; + if (BigSize > FileSize) + return malformedError("symoff field plus nsyms field times sizeof(" + + Twine(struct_nlist_name) + ") of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.symoff, SymtabSize, + "symbol table")) + return Err; + if (Symtab.stroff > FileSize) + return malformedError("stroff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + BigSize = Symtab.stroff; + BigSize += Symtab.strsize; + if (BigSize > FileSize) + return malformedError("stroff field plus strsize field of LC_SYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.stroff, + Symtab.strsize, "string table")) + return Err; + *SymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDysymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **DysymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dysymtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_DYSYMTAB cmdsize too small"); + if (*DysymtabLoadCmd != nullptr) + return malformedError("more than one LC_DYSYMTAB command"); + MachO::dysymtab_command Dysymtab = + getStruct<MachO::dysymtab_command>(Obj, Load.Ptr); + if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command)) + return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Dysymtab.tocoff > FileSize) + return malformedError("tocoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Dysymtab.ntoc; + BigSize *= sizeof(MachO::dylib_table_of_contents); + BigSize += Dysymtab.tocoff; + if (BigSize > FileSize) + return malformedError("tocoff field plus ntoc field times sizeof(struct " + "dylib_table_of_contents) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff, + Dysymtab.ntoc * sizeof(struct + MachO::dylib_table_of_contents), + "table of contents")) + return Err; + if (Dysymtab.modtaboff > FileSize) + return malformedError("modtaboff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nmodtab; + const char *struct_dylib_module_name; + uint64_t sizeof_modtab; + if (Obj.is64Bit()) { + sizeof_modtab = sizeof(MachO::dylib_module_64); + struct_dylib_module_name = "struct dylib_module_64"; + } else { + sizeof_modtab = sizeof(MachO::dylib_module); + struct_dylib_module_name = "struct dylib_module"; + } + BigSize *= sizeof_modtab; + BigSize += Dysymtab.modtaboff; + if (BigSize > FileSize) + return malformedError("modtaboff field plus nmodtab field times sizeof(" + + Twine(struct_dylib_module_name) + ") of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.modtaboff, + Dysymtab.nmodtab * sizeof_modtab, + "module table")) + return Err; + if (Dysymtab.extrefsymoff > FileSize) + return malformedError("extrefsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrefsyms; + BigSize *= sizeof(MachO::dylib_reference); + BigSize += Dysymtab.extrefsymoff; + if (BigSize > FileSize) + return malformedError("extrefsymoff field plus nextrefsyms field times " + "sizeof(struct dylib_reference) of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extrefsymoff, + Dysymtab.nextrefsyms * + sizeof(MachO::dylib_reference), + "reference table")) + return Err; + if (Dysymtab.indirectsymoff > FileSize) + return malformedError("indirectsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nindirectsyms; + BigSize *= sizeof(uint32_t); + BigSize += Dysymtab.indirectsymoff; + if (BigSize > FileSize) + return malformedError("indirectsymoff field plus nindirectsyms field times " + "sizeof(uint32_t) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff, + Dysymtab.nindirectsyms * + sizeof(uint32_t), + "indirect table")) + return Err; + if (Dysymtab.extreloff > FileSize) + return malformedError("extreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.extreloff; + if (BigSize > FileSize) + return malformedError("extreloff field plus nextrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extreloff, + Dysymtab.nextrel * + sizeof(MachO::relocation_info), + "external relocation table")) + return Err; + if (Dysymtab.locreloff > FileSize) + return malformedError("locreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nlocrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.locreloff; + if (BigSize > FileSize) + return malformedError("locreloff field plus nlocrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.locreloff, + Dysymtab.nlocrel * + sizeof(MachO::relocation_info), + "local relocation table")) + return Err; + *DysymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkeditDataCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements, + const char *ElementName) { + if (Load.C.cmdsize < sizeof(MachO::linkedit_data_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one " + Twine(CmdName) + " command"); + MachO::linkedit_data_command LinkData = + getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr); + if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (LinkData.dataoff > FileSize) + return malformedError("dataoff field of " + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = LinkData.dataoff; + BigSize += LinkData.datasize; + if (BigSize > FileSize) + return malformedError("dataoff field plus datasize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, LinkData.dataoff, + LinkData.datasize, ElementName)) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldInfoCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dyld_info_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY " + "command"); + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(Obj, Load.Ptr); + if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (DyldInfo.rebase_off > FileSize) + return malformedError("rebase_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = DyldInfo.rebase_off; + BigSize += DyldInfo.rebase_size; + if (BigSize > FileSize) + return malformedError("rebase_off field plus rebase_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.rebase_off, + DyldInfo.rebase_size, + "dyld rebase info")) + return Err; + if (DyldInfo.bind_off > FileSize) + return malformedError("bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.bind_off; + BigSize += DyldInfo.bind_size; + if (BigSize > FileSize) + return malformedError("bind_off field plus bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.bind_off, + DyldInfo.bind_size, + "dyld bind info")) + return Err; + if (DyldInfo.weak_bind_off > FileSize) + return malformedError("weak_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.weak_bind_off; + BigSize += DyldInfo.weak_bind_size; + if (BigSize > FileSize) + return malformedError("weak_bind_off field plus weak_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.weak_bind_off, + DyldInfo.weak_bind_size, + "dyld weak bind info")) + return Err; + if (DyldInfo.lazy_bind_off > FileSize) + return malformedError("lazy_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.lazy_bind_off; + BigSize += DyldInfo.lazy_bind_size; + if (BigSize > FileSize) + return malformedError("lazy_bind_off field plus lazy_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.lazy_bind_off, + DyldInfo.lazy_bind_size, + "dyld lazy bind info")) + return Err; + if (DyldInfo.export_off > FileSize) + return malformedError("export_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.export_off; + BigSize += DyldInfo.export_size; + if (BigSize > FileSize) + return malformedError("export_off field plus export_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.export_off, + DyldInfo.export_size, + "dyld export info")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDylibCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr); + if (D.dylib.name < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylib_command struct"); + if (D.dylib.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.dylib.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkDylibIdCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd) { + if (Error Err = checkDylibCommand(Obj, Load, LoadCommandIndex, + "LC_ID_DYLIB")) + return Err; + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ID_DYLIB command"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB && + Obj.getHeader().filetype != MachO::MH_DYLIB_STUB) + return malformedError("LC_ID_DYLIB load command in non-dynamic library " + "file type"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr); + if (D.name < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylinker_command struct"); + if (D.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " dyld name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkVersCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName) { + if (Load.C.cmdsize != sizeof(MachO::version_min_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_VERSION_MIN_MACOSX, " + "LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS or " + "LC_VERSION_MIN_WATCHOS command"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkRpathCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH cmdsize too small"); + MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr); + if (R.path < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field too small, not past " + "the end of the rpath_command struct"); + if (R.path >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = R.path; i < R.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkEncryptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + uint64_t cryptoff, uint64_t cryptsize, + const char **LoadCmd, const char *CmdName) { + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ENCRYPTION_INFO and or " + "LC_ENCRYPTION_INFO_64 command"); + uint64_t FileSize = Obj.getData().size(); + if (cryptoff > FileSize) + return malformedError("cryptoff field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = cryptoff; + BigSize += cryptsize; + if (BigSize > FileSize) + return malformedError("cryptoff field plus cryptsize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkerOptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::linker_option_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION cmdsize too small"); + MachO::linker_option_command L = + getStruct<MachO::linker_option_command>(Obj, Load.Ptr); + // Make sure the count of strings is correct. + const char *string = (const char *)Load.Ptr + + sizeof(struct MachO::linker_option_command); + uint32_t left = L.cmdsize - sizeof(struct MachO::linker_option_command); + uint32_t i = 0; + while (left > 0) { + while (*string == '\0' && left > 0) { + string++; + left--; + } + if (left > 0) { + i++; + uint32_t NullPos = StringRef(string, left).find('\0'); + uint32_t len = std::min(NullPos, left) + 1; + string += len; + left -= len; + } + } + if (L.count != i) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION string count " + Twine(L.count) + + " does not match number of strings"); + return Error::success(); +} + +static Error checkSubCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName, + size_t SizeOfCmd, const char *CmdStructName, + uint32_t PathOffset, const char *PathFieldName) { + if (PathOffset < SizeOfCmd) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field too " + "small, not past the end of the " + CmdStructName); + if (PathOffset >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field " + "extends past the end of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = PathOffset; i < Load.C.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + " name extends past " + "the end of the load command"); + return Error::success(); +} + +static Error checkThreadCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::thread_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + CmdName + " cmdsize too small"); + MachO::thread_command T = + getStruct<MachO::thread_command>(Obj, Load.Ptr); + const char *state = Load.Ptr + sizeof(MachO::thread_command); + const char *end = Load.Ptr + T.cmdsize; + uint32_t nflavor = 0; + uint32_t cputype = getCPUType(Obj); + while (state < end) { + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + "flavor in " + CmdName + " extends past end of " + "command"); + uint32_t flavor; + memcpy(&flavor, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(flavor); + state += sizeof(uint32_t); + + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count in " + CmdName + " extends past end of " + "command"); + uint32_t count; + memcpy(&count, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(count); + state += sizeof(uint32_t); + + if (cputype == MachO::CPU_TYPE_X86_64) { + if (flavor == MachO::x86_THREAD_STATE64) { + if (count != MachO::x86_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM) { + if (flavor == MachO::ARM_THREAD_STATE) { + if (count != MachO::ARM_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM64) { + if (flavor == MachO::ARM_THREAD_STATE64) { + if (count != MachO::ARM_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_POWERPC) { + if (flavor == MachO::PPC_THREAD_STATE) { + if (count != MachO::PPC_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not PPC_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a PPC_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::ppc_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " PPC_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::ppc_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else { + return malformedError("unknown cputype (" + Twine(cputype) + ") load " + "command " + Twine(LoadCommandIndex) + " for " + + CmdName + " command can't be checked"); + } + nflavor++; + } + return Error::success(); +} + +static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo + &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::twolevel_hints_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_TWOLEVEL_HINTS has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_TWOLEVEL_HINTS command"); + MachO::twolevel_hints_command Hints = + getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Hints.offset > FileSize) + return malformedError("offset field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Hints.nhints; + BigSize *= Hints.nhints * sizeof(MachO::twolevel_hint); + BigSize += Hints.offset; + if (BigSize > FileSize) + return malformedError("offset field plus nhints times sizeof(struct " + "twolevel_hint) field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Hints.offset, Hints.nhints * + sizeof(MachO::twolevel_hint), + "two level hints")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +// Returns true if the libObject code does not support the load command and its +// contents. The cmd value it is treated as an unknown load command but with +// an error message that says the cmd value is obsolete. +static bool isLoadCommandObsolete(uint32_t cmd) { + if (cmd == MachO::LC_SYMSEG || + cmd == MachO::LC_LOADFVMLIB || + cmd == MachO::LC_IDFVMLIB || + cmd == MachO::LC_IDENT || + cmd == MachO::LC_FVMFILE || + cmd == MachO::LC_PREPAGE || + cmd == MachO::LC_PREBOUND_DYLIB || + cmd == MachO::LC_TWOLEVEL_HINTS || + cmd == MachO::LC_PREBIND_CKSUM) + return true; + return false; +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian, + bool Is64Bits, uint32_t UniversalCputype, + uint32_t UniversalIndex) { + Error Err = Error::success(); + std::unique_ptr<MachOObjectFile> Obj( + new MachOObjectFile(std::move(Object), IsLittleEndian, + Is64Bits, Err, UniversalCputype, + UniversalIndex)); + if (Err) + return std::move(Err); + return std::move(Obj); +} + +MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, + bool Is64bits, Error &Err, + uint32_t UniversalCputype, + uint32_t UniversalIndex) + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), + SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), + DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), + DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), + HasPageZeroSegment(false) { + ErrorAsOutParameter ErrAsOutParam(&Err); + uint64_t SizeOfHeaders; + uint32_t cputype; + if (is64Bit()) { + parseHeader(*this, Header64, Err); + SizeOfHeaders = sizeof(MachO::mach_header_64); + cputype = Header64.cputype; + } else { + parseHeader(*this, Header, Err); + SizeOfHeaders = sizeof(MachO::mach_header); + cputype = Header.cputype; + } + if (Err) + return; + SizeOfHeaders += getHeader().sizeofcmds; + if (getData().data() + SizeOfHeaders > getData().end()) { + Err = malformedError("load commands extend past the end of the file"); + return; + } + if (UniversalCputype != 0 && cputype != UniversalCputype) { + Err = malformedError("universal header architecture: " + + Twine(UniversalIndex) + "'s cputype does not match " + "object file's mach header"); + return; + } + std::list<MachOElement> Elements; + Elements.push_back({0, SizeOfHeaders, "Mach-O headers"}); + + uint32_t LoadCommandCount = getHeader().ncmds; + LoadCommandInfo Load; + if (LoadCommandCount != 0) { + if (auto LoadOrErr = getFirstLoadCommandInfo(*this)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } + } + + const char *DyldIdLoadCmd = nullptr; + const char *FuncStartsLoadCmd = nullptr; + const char *SplitInfoLoadCmd = nullptr; + const char *CodeSignDrsLoadCmd = nullptr; + const char *CodeSignLoadCmd = nullptr; + const char *VersLoadCmd = nullptr; + const char *SourceLoadCmd = nullptr; + const char *EntryPointLoadCmd = nullptr; + const char *EncryptLoadCmd = nullptr; + const char *RoutinesLoadCmd = nullptr; + const char *UnixThreadLoadCmd = nullptr; + const char *TwoLevelHintsLoadCmd = nullptr; + for (unsigned I = 0; I < LoadCommandCount; ++I) { + if (is64Bit()) { + if (Load.C.cmdsize % 8 != 0) { + // We have a hack here to allow 64-bit Mach-O core files to have + // LC_THREAD commands that are only a multiple of 4 and not 8 to be + // allowed since the macOS kernel produces them. + if (getHeader().filetype != MachO::MH_CORE || + Load.C.cmd != MachO::LC_THREAD || Load.C.cmdsize % 4) { + Err = malformedError("load command " + Twine(I) + " cmdsize not a " + "multiple of 8"); + return; + } + } + } else { + if (Load.C.cmdsize % 4 != 0) { + Err = malformedError("load command " + Twine(I) + " cmdsize not a " + "multiple of 4"); + return; + } + } + LoadCommands.push_back(Load); + if (Load.C.cmd == MachO::LC_SYMTAB) { + if ((Err = checkSymtabCommand(*this, Load, I, &SymtabLoadCmd, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYSYMTAB) { + if ((Err = checkDysymtabCommand(*this, Load, I, &DysymtabLoadCmd, + Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &DataInCodeLoadCmd, + "LC_DATA_IN_CODE", Elements, + "data in code info"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &LinkOptHintsLoadCmd, + "LC_LINKER_OPTIMIZATION_HINT", + Elements, "linker optimization " + "hints"))) + return; + } else if (Load.C.cmd == MachO::LC_FUNCTION_STARTS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &FuncStartsLoadCmd, + "LC_FUNCTION_STARTS", Elements, + "function starts data"))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &SplitInfoLoadCmd, + "LC_SEGMENT_SPLIT_INFO", Elements, + "split info data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignDrsLoadCmd, + "LC_DYLIB_CODE_SIGN_DRS", Elements, + "code signing RDs data"))) + return; + } else if (Load.C.cmd == MachO::LC_CODE_SIGNATURE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignLoadCmd, + "LC_CODE_SIGNATURE", Elements, + "code signature data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO_ONLY", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_UUID) { + if (Load.C.cmdsize != sizeof(MachO::uuid_command)) { + Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect " + "cmdsize"); + return; + } + if (UuidLoadCmd) { + Err = malformedError("more than one LC_UUID command"); + return; + } + UuidLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_SEGMENT_64) { + if ((Err = parseSegmentLoadCommand<MachO::segment_command_64, + MachO::section_64>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT_64", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + if ((Err = parseSegmentLoadCommand<MachO::segment_command, + MachO::section>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_ID_DYLIB) { + if ((Err = checkDylibIdCommand(*this, Load, I, &DyldIdLoadCmd))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_WEAK_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LAZY_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_REEXPORT_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_UPWARD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_ID_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_ID_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_LOAD_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_ENVIRONMENT) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_DYLD_ENVIRONMENT"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_MACOSX) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_MACOSX"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_IPHONEOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_TVOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_TVOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_WATCHOS"))) + return; + } else if (Load.C.cmd == MachO::LC_RPATH) { + if ((Err = checkRpathCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SOURCE_VERSION) { + if (Load.C.cmdsize != sizeof(MachO::source_version_command)) { + Err = malformedError("LC_SOURCE_VERSION command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (SourceLoadCmd) { + Err = malformedError("more than one LC_SOURCE_VERSION command"); + return; + } + SourceLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_MAIN) { + if (Load.C.cmdsize != sizeof(MachO::entry_point_command)) { + Err = malformedError("LC_MAIN command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (EntryPointLoadCmd) { + Err = malformedError("more than one LC_MAIN command"); + return; + } + EntryPointLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command)) { + Err = malformedError("LC_ENCRYPTION_INFO command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command E = + getStruct<MachO::encryption_info_command>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO"))) + return; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO_64) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command_64)) { + Err = malformedError("LC_ENCRYPTION_INFO_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command_64 E = + getStruct<MachO::encryption_info_command_64>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO_64"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTION) { + if ((Err = checkLinkerOptCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_FRAMEWORK) { + if (Load.C.cmdsize < sizeof(MachO::sub_framework_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_FRAMEWORK cmdsize too small"); + return; + } + MachO::sub_framework_command S = + getStruct<MachO::sub_framework_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_FRAMEWORK", + sizeof(MachO::sub_framework_command), + "sub_framework_command", S.umbrella, + "umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_UMBRELLA) { + if (Load.C.cmdsize < sizeof(MachO::sub_umbrella_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_UMBRELLA cmdsize too small"); + return; + } + MachO::sub_umbrella_command S = + getStruct<MachO::sub_umbrella_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_UMBRELLA", + sizeof(MachO::sub_umbrella_command), + "sub_umbrella_command", S.sub_umbrella, + "sub_umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_LIBRARY) { + if (Load.C.cmdsize < sizeof(MachO::sub_library_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_LIBRARY cmdsize too small"); + return; + } + MachO::sub_library_command S = + getStruct<MachO::sub_library_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_LIBRARY", + sizeof(MachO::sub_library_command), + "sub_library_command", S.sub_library, + "sub_library"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_CLIENT) { + if (Load.C.cmdsize < sizeof(MachO::sub_client_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_CLIENT cmdsize too small"); + return; + } + MachO::sub_client_command S = + getStruct<MachO::sub_client_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_CLIENT", + sizeof(MachO::sub_client_command), + "sub_client_command", S.client, "client"))) + return; + } else if (Load.C.cmd == MachO::LC_ROUTINES) { + if (Load.C.cmdsize != sizeof(MachO::routines_command)) { + Err = malformedError("LC_ROUTINES command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES and or LC_ROUTINES_64 " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ROUTINES_64) { + if (Load.C.cmdsize != sizeof(MachO::routines_command_64)) { + Err = malformedError("LC_ROUTINES_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES_64 and or LC_ROUTINES " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_UNIXTHREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_UNIXTHREAD"))) + return; + if (UnixThreadLoadCmd) { + Err = malformedError("more than one LC_UNIXTHREAD command"); + return; + } + UnixThreadLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_THREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_THREAD"))) + return; + // Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported. + } else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) { + if ((Err = checkTwoLevelHintsCommand(*this, Load, I, + &TwoLevelHintsLoadCmd, Elements))) + return; + } else if (isLoadCommandObsolete(Load.C.cmd)) { + Err = malformedError("load command " + Twine(I) + " for cmd value of: " + + Twine(Load.C.cmd) + " is obsolete and not " + "supported"); + return; + } + // TODO: generate a error for unknown load commands by default. But still + // need work out an approach to allow or not allow unknown values like this + // as an option for some uses like lldb. + if (I < LoadCommandCount - 1) { + if (auto LoadOrErr = getNextLoadCommandInfo(*this, I, Load)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } + } + } + if (!SymtabLoadCmd) { + if (DysymtabLoadCmd) { + Err = malformedError("contains LC_DYSYMTAB load command without a " + "LC_SYMTAB load command"); + return; + } + } else if (DysymtabLoadCmd) { + MachO::symtab_command Symtab = + getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); + MachO::dysymtab_command Dysymtab = + getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); + if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + Err = malformedError("ilocalsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + uint64_t BigSize = Dysymtab.ilocalsym; + BigSize += Dysymtab.nlocalsym; + if (Dysymtab.nlocalsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("ilocalsym plus nlocalsym in LC_DYSYMTAB load " + "command extends past the end of the symbol table"); + return; + } + if (Dysymtab.nextdefsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + Err = malformedError("nextdefsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + BigSize = Dysymtab.iextdefsym; + BigSize += Dysymtab.nextdefsym; + if (Dysymtab.nextdefsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("iextdefsym plus nextdefsym in LC_DYSYMTAB " + "load command extends past the end of the symbol " + "table"); + return; + } + if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) { + Err = malformedError("nundefsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + BigSize = Dysymtab.iundefsym; + BigSize += Dysymtab.nundefsym; + if (Dysymtab.nundefsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("iundefsym plus nundefsym in LC_DYSYMTAB load " + " command extends past the end of the symbol table"); + return; + } + } + if ((getHeader().filetype == MachO::MH_DYLIB || + getHeader().filetype == MachO::MH_DYLIB_STUB) && + DyldIdLoadCmd == nullptr) { + Err = malformedError("no LC_ID_DYLIB load command in dynamic library " + "filetype"); + return; + } + assert(LoadCommands.size() == LoadCommandCount); + + Err = Error::success(); +} + +Error MachOObjectFile::checkSymbolTable() const { + uint32_t Flags = 0; + if (is64Bit()) { + MachO::mach_header_64 H_64 = MachOObjectFile::getHeader64(); + Flags = H_64.flags; + } else { + MachO::mach_header H = MachOObjectFile::getHeader(); + Flags = H.flags; + } + uint8_t NType = 0; + uint8_t NSect = 0; + uint16_t NDesc = 0; + uint32_t NStrx = 0; + uint64_t NValue = 0; + uint32_t SymbolIndex = 0; + MachO::symtab_command S = getSymtabLoadCommand(); + for (const SymbolRef &Symbol : symbols()) { + DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); + if (is64Bit()) { + MachO::nlist_64 STE_64 = getSymbol64TableEntry(SymDRI); + NType = STE_64.n_type; + NSect = STE_64.n_sect; + NDesc = STE_64.n_desc; + NStrx = STE_64.n_strx; + NValue = STE_64.n_value; + } else { + MachO::nlist STE = getSymbolTableEntry(SymDRI); + NType = STE.n_type; + NType = STE.n_type; + NSect = STE.n_sect; + NDesc = STE.n_desc; + NStrx = STE.n_strx; + NValue = STE.n_value; + } + if ((NType & MachO::N_STAB) == 0 && + (NType & MachO::N_TYPE) == MachO::N_SECT) { + if (NSect == 0 || NSect > Sections.size()) + return malformedError("bad section index: " + Twine((int)NSect) + + " for symbol at index " + Twine(SymbolIndex)); + } + if ((NType & MachO::N_STAB) == 0 && + (NType & MachO::N_TYPE) == MachO::N_INDR) { + if (NValue >= S.strsize) + return malformedError("bad n_value: " + Twine((int)NValue) + " past " + "the end of string table, for N_INDR symbol at " + "index " + Twine(SymbolIndex)); + } + if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL && + (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) || + (NType & MachO::N_TYPE) == MachO::N_PBUD)) { + uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc); + if (LibraryOrdinal != 0 && + LibraryOrdinal != MachO::EXECUTABLE_ORDINAL && + LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL && + LibraryOrdinal - 1 >= Libraries.size() ) { + return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) + + " for symbol at index " + Twine(SymbolIndex)); + } + } + if (NStrx >= S.strsize) + return malformedError("bad string table index: " + Twine((int)NStrx) + + " past the end of string table, for symbol at " + "index " + Twine(SymbolIndex)); + SymbolIndex++; + } + return Error::success(); +} + +void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + Symb.p += SymbolTableEntrySize; +} + +Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const { + StringRef StringTable = getStringTableData(); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + const char *Start = &StringTable.data()[Entry.n_strx]; + if (Start < getData().begin() || Start >= getData().end()) { + return malformedError("bad string index: " + Twine(Entry.n_strx) + + " for symbol at index " + Twine(getSymbolIndex(Symb))); + } + return StringRef(Start); +} + +unsigned MachOObjectFile::getSectionType(SectionRef Sec) const { + DataRefImpl DRI = Sec.getRawDataRefImpl(); + uint32_t Flags = getSectionFlags(*this, DRI); + return Flags & MachO::SECTION_TYPE; +} + +uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const { + if (is64Bit()) { + MachO::nlist_64 Entry = getSymbol64TableEntry(Sym); + return Entry.n_value; + } + MachO::nlist Entry = getSymbolTableEntry(Sym); + return Entry.n_value; +} + +// getIndirectName() returns the name of the alias'ed symbol who's string table +// index is in the n_value field. +std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, + StringRef &Res) const { + StringRef StringTable = getStringTableData(); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR) + return object_error::parse_failed; + uint64_t NValue = getNValue(Symb); + if (NValue >= StringTable.size()) + return object_error::parse_failed; + const char *Start = &StringTable.data()[NValue]; + Res = StringRef(Start); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const { + return getNValue(Sym); +} + +Expected<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const { + return getSymbolValue(Sym); +} + +uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { + uint32_t flags = getSymbolFlags(DRI); + if (flags & SymbolRef::SF_Common) { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); + return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); + } + return 0; +} + +uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const { + return getNValue(DRI); +} + +Expected<SymbolRef::Type> +MachOObjectFile::getSymbolType(DataRefImpl Symb) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + uint8_t n_type = Entry.n_type; + + // If this is a STAB debugging symbol, we can do nothing more. + if (n_type & MachO::N_STAB) + return SymbolRef::ST_Debug; + + switch (n_type & MachO::N_TYPE) { + case MachO::N_UNDF : + return SymbolRef::ST_Unknown; + case MachO::N_SECT : + Expected<section_iterator> SecOrError = getSymbolSection(Symb); + if (!SecOrError) + return SecOrError.takeError(); + section_iterator Sec = *SecOrError; + if (Sec->isData() || Sec->isBSS()) + return SymbolRef::ST_Data; + return SymbolRef::ST_Function; + } + return SymbolRef::ST_Other; +} + +uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); + + uint8_t MachOType = Entry.n_type; + uint16_t MachOFlags = Entry.n_desc; + + uint32_t Result = SymbolRef::SF_None; + + if ((MachOType & MachO::N_TYPE) == MachO::N_INDR) + Result |= SymbolRef::SF_Indirect; + + if (MachOType & MachO::N_STAB) + Result |= SymbolRef::SF_FormatSpecific; + + if (MachOType & MachO::N_EXT) { + Result |= SymbolRef::SF_Global; + if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { + if (getNValue(DRI)) + Result |= SymbolRef::SF_Common; + else + Result |= SymbolRef::SF_Undefined; + } + + if (!(MachOType & MachO::N_PEXT)) + Result |= SymbolRef::SF_Exported; + } + + if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) + Result |= SymbolRef::SF_Weak; + + if (MachOFlags & (MachO::N_ARM_THUMB_DEF)) + Result |= SymbolRef::SF_Thumb; + + if ((MachOType & MachO::N_TYPE) == MachO::N_ABS) + Result |= SymbolRef::SF_Absolute; + + return Result; +} + +Expected<section_iterator> +MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + uint8_t index = Entry.n_sect; + + if (index == 0) + return section_end(); + DataRefImpl DRI; + DRI.d.a = index - 1; + if (DRI.d.a >= Sections.size()){ + return malformedError("bad section index: " + Twine((int)index) + + " for symbol at index " + Twine(getSymbolIndex(Symb))); + } + return section_iterator(SectionRef(DRI, this)); +} + +unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const { + MachO::nlist_base Entry = + getSymbolTableEntryBase(*this, Sym.getRawDataRefImpl()); + return Entry.n_sect - 1; +} + +void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const { + Sec.d.a++; +} + +std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Result) const { + ArrayRef<char> Raw = getSectionRawName(Sec); + Result = parseSegmentOrSectionName(Raw.data()); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { + if (is64Bit()) + return getSection64(Sec).addr; + return getSection(Sec).addr; +} + +uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { + // In the case if a malformed Mach-O file where the section offset is past + // the end of the file or some part of the section size is past the end of + // the file return a size of zero or a size that covers the rest of the file + // but does not extend past the end of the file. + uint32_t SectOffset, SectType; + uint64_t SectSize; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } else { + MachO::section Sect = getSection(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } + if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL) + return SectSize; + uint64_t FileSize = getData().size(); + if (SectOffset > FileSize) + return 0; + if (FileSize - SectOffset < SectSize) + return FileSize - SectOffset; + return SectSize; +} + +std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Res) const { + uint32_t Offset; + uint64_t Size; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.offset; + Size = Sect.size; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.offset; + Size = Sect.size; + } + + Res = this->getData().substr(Offset, Size); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { + uint32_t Align; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Align = Sect.align; + } else { + MachO::section Sect = getSection(Sec); + Align = Sect.align; + } + + return uint64_t(1) << Align; +} + +bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool MachOObjectFile::isSectionText(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; +} + +bool MachOObjectFile::isSectionData(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + !(SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); +} + +bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + (SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); +} + +unsigned MachOObjectFile::getSectionID(SectionRef Sec) const { + return Sec.getRawDataRefImpl().d.a; +} + +bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const { + // FIXME: Unimplemented. + return false; +} + +bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const { + StringRef SegmentName = getSectionFinalSegmentName(Sec); + StringRef SectName; + if (!getSectionName(Sec, SectName)) + return (SegmentName == "__LLVM" && SectName == "__bitcode"); + return false; +} + +relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const { + DataRefImpl Ret; + Ret.d.a = Sec.d.a; + Ret.d.b = 0; + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator +MachOObjectFile::section_rel_end(DataRefImpl Sec) const { + uint32_t Num; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Num = Sect.nreloc; + } else { + MachO::section Sect = getSection(Sec); + Num = Sect.nreloc; + } + + DataRefImpl Ret; + Ret.d.a = Sec.d.a; + Ret.d.b = Num; + return relocation_iterator(RelocationRef(Ret, this)); +} + +void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + ++Rel.d.b; +} + +uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { + assert(getHeader().filetype == MachO::MH_OBJECT && + "Only implemented for MH_OBJECT"); + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationAddress(RE); +} + +symbol_iterator +MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + if (isRelocationScattered(RE)) + return symbol_end(); + + uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE); + bool isExtern = getPlainRelocationExternal(RE); + if (!isExtern) + return symbol_end(); + + MachO::symtab_command S = getSymtabLoadCommand(); + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize; + DataRefImpl Sym; + Sym.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return symbol_iterator(SymbolRef(Sym, this)); +} + +section_iterator +MachOObjectFile::getRelocationSection(DataRefImpl Rel) const { + return section_iterator(getAnyRelocationSection(getRelocation(Rel))); +} + +uint64_t MachOObjectFile::getRelocationType(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationType(RE); +} + +void MachOObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + StringRef res; + uint64_t RType = getRelocationType(Rel); + + unsigned Arch = this->getArch(); + + switch (Arch) { + case Triple::x86: { + static const char *const Table[] = { + "GENERIC_RELOC_VANILLA", + "GENERIC_RELOC_PAIR", + "GENERIC_RELOC_SECTDIFF", + "GENERIC_RELOC_PB_LA_PTR", + "GENERIC_RELOC_LOCAL_SECTDIFF", + "GENERIC_RELOC_TLV" }; + + if (RType > 5) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::x86_64: { + static const char *const Table[] = { + "X86_64_RELOC_UNSIGNED", + "X86_64_RELOC_SIGNED", + "X86_64_RELOC_BRANCH", + "X86_64_RELOC_GOT_LOAD", + "X86_64_RELOC_GOT", + "X86_64_RELOC_SUBTRACTOR", + "X86_64_RELOC_SIGNED_1", + "X86_64_RELOC_SIGNED_2", + "X86_64_RELOC_SIGNED_4", + "X86_64_RELOC_TLV" }; + + if (RType > 9) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::arm: { + static const char *const Table[] = { + "ARM_RELOC_VANILLA", + "ARM_RELOC_PAIR", + "ARM_RELOC_SECTDIFF", + "ARM_RELOC_LOCAL_SECTDIFF", + "ARM_RELOC_PB_LA_PTR", + "ARM_RELOC_BR24", + "ARM_THUMB_RELOC_BR22", + "ARM_THUMB_32BIT_BRANCH", + "ARM_RELOC_HALF", + "ARM_RELOC_HALF_SECTDIFF" }; + + if (RType > 9) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::aarch64: { + static const char *const Table[] = { + "ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR", + "ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21", + "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT", + "ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + "ARM64_RELOC_ADDEND" + }; + + if (RType >= array_lengthof(Table)) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::ppc: { + static const char *const Table[] = { + "PPC_RELOC_VANILLA", + "PPC_RELOC_PAIR", + "PPC_RELOC_BR14", + "PPC_RELOC_BR24", + "PPC_RELOC_HI16", + "PPC_RELOC_LO16", + "PPC_RELOC_HA16", + "PPC_RELOC_LO14", + "PPC_RELOC_SECTDIFF", + "PPC_RELOC_PB_LA_PTR", + "PPC_RELOC_HI16_SECTDIFF", + "PPC_RELOC_LO16_SECTDIFF", + "PPC_RELOC_HA16_SECTDIFF", + "PPC_RELOC_JBSR", + "PPC_RELOC_LO14_SECTDIFF", + "PPC_RELOC_LOCAL_SECTDIFF" }; + + if (RType > 15) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::UnknownArch: + res = "Unknown"; + break; + } + Result.append(res.begin(), res.end()); +} + +uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationLength(RE); +} + +// +// guessLibraryShortName() is passed a name of a dynamic library and returns a +// guess on what the short name is. Then name is returned as a substring of the +// StringRef Name passed in. The name of the dynamic library is recognized as +// a framework if it has one of the two following forms: +// Foo.framework/Versions/A/Foo +// Foo.framework/Foo +// Where A and Foo can be any string. And may contain a trailing suffix +// starting with an underbar. If the Name is recognized as a framework then +// isFramework is set to true else it is set to false. If the Name has a +// suffix then Suffix is set to the substring in Name that contains the suffix +// else it is set to a NULL StringRef. +// +// The Name of the dynamic library is recognized as a library name if it has +// one of the two following forms: +// libFoo.A.dylib +// libFoo.dylib +// The library may have a suffix trailing the name Foo of the form: +// libFoo_profile.A.dylib +// libFoo_profile.dylib +// +// The Name of the dynamic library is also recognized as a library name if it +// has the following form: +// Foo.qtx +// +// If the Name of the dynamic library is none of the forms above then a NULL +// StringRef is returned. +// +StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, + bool &isFramework, + StringRef &Suffix) { + StringRef Foo, F, DotFramework, V, Dylib, Lib, Dot, Qtx; + size_t a, b, c, d, Idx; + + isFramework = false; + Suffix = StringRef(); + + // Pull off the last component and make Foo point to it + a = Name.rfind('/'); + if (a == Name.npos || a == 0) + goto guess_library; + Foo = Name.slice(a+1, Name.npos); + + // Look for a suffix starting with a '_' + Idx = Foo.rfind('_'); + if (Idx != Foo.npos && Foo.size() >= 2) { + Suffix = Foo.slice(Idx, Foo.npos); + Foo = Foo.slice(0, Idx); + } + + // First look for the form Foo.framework/Foo + b = Name.rfind('/', a); + if (b == Name.npos) + Idx = 0; + else + Idx = b+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + + // Next look for the form Foo.framework/Versions/A/Foo + if (b == Name.npos) + goto guess_library; + c = Name.rfind('/', b); + if (c == Name.npos || c == 0) + goto guess_library; + V = Name.slice(c+1, Name.npos); + if (!V.startswith("Versions/")) + goto guess_library; + d = Name.rfind('/', c); + if (d == Name.npos) + Idx = 0; + else + Idx = d+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + +guess_library: + // pull off the suffix after the "." and make a point to it + a = Name.rfind('.'); + if (a == Name.npos || a == 0) + return StringRef(); + Dylib = Name.slice(a, Name.npos); + if (Dylib != ".dylib") + goto guess_qtx; + + // First pull off the version letter for the form Foo.A.dylib if any. + if (a >= 3) { + Dot = Name.slice(a-2, a-1); + if (Dot == ".") + a = a - 2; + } + + b = Name.rfind('/', a); + if (b == Name.npos) + b = 0; + else + b = b+1; + // ignore any suffix after an underbar like Foo_profile.A.dylib + Idx = Name.find('_', b); + if (Idx != Name.npos && Idx != b) { + Lib = Name.slice(b, Idx); + Suffix = Name.slice(Idx, a); + } + else + Lib = Name.slice(b, a); + // There are incorrect library names of the form: + // libATS.A_profile.dylib so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; + +guess_qtx: + Qtx = Name.slice(a, Name.npos); + if (Qtx != ".qtx") + return StringRef(); + b = Name.rfind('/', a); + if (b == Name.npos) + Lib = Name.slice(0, a); + else + Lib = Name.slice(b+1, a); + // There are library names of the form: QT.A.qtx so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; +} + +// getLibraryShortNameByIndex() is used to get the short name of the library +// for an undefined symbol in a linked Mach-O binary that was linked with the +// normal two-level namespace default (that is MH_TWOLEVEL in the header). +// It is passed the index (0 - based) of the library as translated from +// GET_LIBRARY_ORDINAL (1 - based). +std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, + StringRef &Res) const { + if (Index >= Libraries.size()) + return object_error::parse_failed; + + // If the cache of LibrariesShortNames is not built up do that first for + // all the Libraries. + if (LibrariesShortNames.size() == 0) { + for (unsigned i = 0; i < Libraries.size(); i++) { + MachO::dylib_command D = + getStruct<MachO::dylib_command>(*this, Libraries[i]); + if (D.dylib.name >= D.cmdsize) + return object_error::parse_failed; + const char *P = (const char *)(Libraries[i]) + D.dylib.name; + StringRef Name = StringRef(P); + if (D.dylib.name+Name.size() >= D.cmdsize) + return object_error::parse_failed; + StringRef Suffix; + bool isFramework; + StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix); + if (shortName.empty()) + LibrariesShortNames.push_back(Name); + else + LibrariesShortNames.push_back(shortName); + } + } + + Res = LibrariesShortNames[Index]; + return std::error_code(); +} + +section_iterator +MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { + DataRefImpl Sec; + Sec.d.a = Rel->getRawDataRefImpl().d.a; + return section_iterator(SectionRef(Sec, this)); +} + +basic_symbol_iterator MachOObjectFile::symbol_begin() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + return getSymbolByIndex(0); +} + +basic_symbol_iterator MachOObjectFile::symbol_end() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + unsigned Offset = Symtab.symoff + + Symtab.nsyms * SymbolTableEntrySize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + +basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const { + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Index >= Symtab.nsyms) + report_fatal_error("Requested symbol index is out of range."); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRI; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); + DRI.p += Index * SymbolTableEntrySize; + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + +uint64_t MachOObjectFile::getSymbolIndex(DataRefImpl Symb) const { + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd) + report_fatal_error("getSymbolIndex() called with no symbol table symbol"); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRIstart; + DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); + uint64_t Index = (Symb.p - DRIstart.p) / SymbolTableEntrySize; + return Index; +} + +section_iterator MachOObjectFile::section_begin() const { + DataRefImpl DRI; + return section_iterator(SectionRef(DRI, this)); +} + +section_iterator MachOObjectFile::section_end() const { + DataRefImpl DRI; + DRI.d.a = Sections.size(); + return section_iterator(SectionRef(DRI, this)); +} + +uint8_t MachOObjectFile::getBytesInAddress() const { + return is64Bit() ? 8 : 4; +} + +StringRef MachOObjectFile::getFileFormatName() const { + unsigned CPUType = getCPUType(*this); + if (!is64Bit()) { + switch (CPUType) { + case llvm::MachO::CPU_TYPE_I386: + return "Mach-O 32-bit i386"; + case llvm::MachO::CPU_TYPE_ARM: + return "Mach-O arm"; + case llvm::MachO::CPU_TYPE_POWERPC: + return "Mach-O 32-bit ppc"; + default: + return "Mach-O 32-bit unknown"; + } + } + + switch (CPUType) { + case llvm::MachO::CPU_TYPE_X86_64: + return "Mach-O 64-bit x86-64"; + case llvm::MachO::CPU_TYPE_ARM64: + return "Mach-O arm64"; + case llvm::MachO::CPU_TYPE_POWERPC64: + return "Mach-O 64-bit ppc64"; + default: + return "Mach-O 64-bit unknown"; + } +} + +Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { + switch (CPUType) { + case llvm::MachO::CPU_TYPE_I386: + return Triple::x86; + case llvm::MachO::CPU_TYPE_X86_64: + return Triple::x86_64; + case llvm::MachO::CPU_TYPE_ARM: + return Triple::arm; + case llvm::MachO::CPU_TYPE_ARM64: + return Triple::aarch64; + case llvm::MachO::CPU_TYPE_POWERPC: + return Triple::ppc; + case llvm::MachO::CPU_TYPE_POWERPC64: + return Triple::ppc64; + default: + return Triple::UnknownArch; + } +} + +Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, + const char **McpuDefault, + const char **ArchFlag) { + if (McpuDefault) + *McpuDefault = nullptr; + if (ArchFlag) + *ArchFlag = nullptr; + + switch (CPUType) { + case MachO::CPU_TYPE_I386: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_I386_ALL: + if (ArchFlag) + *ArchFlag = "i386"; + return Triple("i386-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_X86_64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_X86_64_ALL: + if (ArchFlag) + *ArchFlag = "x86_64"; + return Triple("x86_64-apple-darwin"); + case MachO::CPU_SUBTYPE_X86_64_H: + if (ArchFlag) + *ArchFlag = "x86_64h"; + return Triple("x86_64h-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM_V4T: + if (ArchFlag) + *ArchFlag = "armv4t"; + return Triple("armv4t-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V5TEJ: + if (ArchFlag) + *ArchFlag = "armv5e"; + return Triple("armv5e-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_XSCALE: + if (ArchFlag) + *ArchFlag = "xscale"; + return Triple("xscale-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6: + if (ArchFlag) + *ArchFlag = "armv6"; + return Triple("armv6-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6M: + if (McpuDefault) + *McpuDefault = "cortex-m0"; + if (ArchFlag) + *ArchFlag = "armv6m"; + return Triple("armv6m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7: + if (ArchFlag) + *ArchFlag = "armv7"; + return Triple("armv7-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7EM: + if (McpuDefault) + *McpuDefault = "cortex-m4"; + if (ArchFlag) + *ArchFlag = "armv7em"; + return Triple("thumbv7em-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7K: + if (ArchFlag) + *ArchFlag = "armv7k"; + return Triple("armv7k-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7M: + if (McpuDefault) + *McpuDefault = "cortex-m3"; + if (ArchFlag) + *ArchFlag = "armv7m"; + return Triple("thumbv7m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7S: + if (ArchFlag) + *ArchFlag = "armv7s"; + return Triple("armv7s-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM64_ALL: + if (ArchFlag) + *ArchFlag = "arm64"; + return Triple("arm64-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc"; + return Triple("ppc-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc64"; + return Triple("ppc64-apple-darwin"); + default: + return Triple(); + } + default: + return Triple(); + } +} + +Triple MachOObjectFile::getHostArch() { + return Triple(sys::getDefaultTargetTriple()); +} + +bool MachOObjectFile::isValidArch(StringRef ArchFlag) { + return StringSwitch<bool>(ArchFlag) + .Case("i386", true) + .Case("x86_64", true) + .Case("x86_64h", true) + .Case("armv4t", true) + .Case("arm", true) + .Case("armv5e", true) + .Case("armv6", true) + .Case("armv6m", true) + .Case("armv7", true) + .Case("armv7em", true) + .Case("armv7k", true) + .Case("armv7m", true) + .Case("armv7s", true) + .Case("arm64", true) + .Case("ppc", true) + .Case("ppc64", true) + .Default(false); +} + +unsigned MachOObjectFile::getArch() const { + return getArch(getCPUType(*this)); +} + +Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const { + return getArchTriple(Header.cputype, Header.cpusubtype, McpuDefault); +} + +relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return section_rel_begin(DRI); +} + +relocation_iterator MachOObjectFile::section_rel_end(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return section_rel_end(DRI); +} + +dice_iterator MachOObjectFile::begin_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, DicLC.dataoff)); + return dice_iterator(DiceRef(DRI, this)); +} + +dice_iterator MachOObjectFile::end_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + unsigned Offset = DicLC.dataoff + DicLC.datasize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return dice_iterator(DiceRef(DRI, this)); +} + +ExportEntry::ExportEntry(ArrayRef<uint8_t> T) + : Trie(T), Malformed(false), Done(false) {} + +void ExportEntry::moveToFirst() { + pushNode(0); + pushDownUntilBottom(); +} + +void ExportEntry::moveToEnd() { + Stack.clear(); + Done = true; +} + +bool ExportEntry::operator==(const ExportEntry &Other) const { + // Common case, one at end, other iterating from begin. + if (Done || Other.Done) + return (Done == Other.Done); + // Not equal if different stack sizes. + if (Stack.size() != Other.Stack.size()) + return false; + // Not equal if different cumulative strings. + if (!CumulativeString.equals(Other.CumulativeString)) + return false; + // Equal if all nodes in both stacks match. + for (unsigned i=0; i < Stack.size(); ++i) { + if (Stack[i].Start != Other.Stack[i].Start) + return false; + } + return true; +} + +uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + if (Ptr > Trie.end()) { + Ptr = Trie.end(); + Malformed = true; + } + return Result; +} + +StringRef ExportEntry::name() const { + return CumulativeString; +} + +uint64_t ExportEntry::flags() const { + return Stack.back().Flags; +} + +uint64_t ExportEntry::address() const { + return Stack.back().Address; +} + +uint64_t ExportEntry::other() const { + return Stack.back().Other; +} + +StringRef ExportEntry::otherName() const { + const char* ImportName = Stack.back().ImportName; + if (ImportName) + return StringRef(ImportName); + return StringRef(); +} + +uint32_t ExportEntry::nodeOffset() const { + return Stack.back().Start - Trie.begin(); +} + +ExportEntry::NodeState::NodeState(const uint8_t *Ptr) + : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), + ImportName(nullptr), ChildCount(0), NextChildIndex(0), + ParentStringLength(0), IsExportNode(false) {} + +void ExportEntry::pushNode(uint64_t offset) { + const uint8_t *Ptr = Trie.begin() + offset; + NodeState State(Ptr); + uint64_t ExportInfoSize = readULEB128(State.Current); + State.IsExportNode = (ExportInfoSize != 0); + const uint8_t* Children = State.Current + ExportInfoSize; + if (State.IsExportNode) { + State.Flags = readULEB128(State.Current); + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + State.Address = 0; + State.Other = readULEB128(State.Current); // dylib ordinal + State.ImportName = reinterpret_cast<const char*>(State.Current); + } else { + State.Address = readULEB128(State.Current); + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + State.Other = readULEB128(State.Current); + } + } + State.ChildCount = *Children; + State.Current = Children + 1; + State.NextChildIndex = 0; + State.ParentStringLength = CumulativeString.size(); + Stack.push_back(State); +} + +void ExportEntry::pushDownUntilBottom() { + while (Stack.back().NextChildIndex < Stack.back().ChildCount) { + NodeState &Top = Stack.back(); + CumulativeString.resize(Top.ParentStringLength); + for (;*Top.Current != 0; Top.Current++) { + char C = *Top.Current; + CumulativeString.push_back(C); + } + Top.Current += 1; + uint64_t childNodeIndex = readULEB128(Top.Current); + Top.NextChildIndex += 1; + pushNode(childNodeIndex); + } + if (!Stack.back().IsExportNode) { + Malformed = true; + moveToEnd(); + } +} + +// We have a trie data structure and need a way to walk it that is compatible +// with the C++ iterator model. The solution is a non-recursive depth first +// traversal where the iterator contains a stack of parent nodes along with a +// string that is the accumulation of all edge strings along the parent chain +// to this point. +// +// There is one "export" node for each exported symbol. But because some +// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export +// node may have child nodes too. +// +// The algorithm for moveNext() is to keep moving down the leftmost unvisited +// child until hitting a node with no children (which is an export node or +// else the trie is malformed). On the way down, each node is pushed on the +// stack ivar. If there is no more ways down, it pops up one and tries to go +// down a sibling path until a childless node is reached. +void ExportEntry::moveNext() { + if (Stack.empty() || !Stack.back().IsExportNode) { + Malformed = true; + moveToEnd(); + return; + } + + Stack.pop_back(); + while (!Stack.empty()) { + NodeState &Top = Stack.back(); + if (Top.NextChildIndex < Top.ChildCount) { + pushDownUntilBottom(); + // Now at the next export node. + return; + } else { + if (Top.IsExportNode) { + // This node has no children but is itself an export node. + CumulativeString.resize(Top.ParentStringLength); + return; + } + Stack.pop_back(); + } + } + Done = true; +} + +iterator_range<export_iterator> +MachOObjectFile::exports(ArrayRef<uint8_t> Trie) { + ExportEntry Start(Trie); + if (Trie.size() == 0) + Start.moveToEnd(); + else + Start.moveToFirst(); + + ExportEntry Finish(Trie); + Finish.moveToEnd(); + + return make_range(export_iterator(Start), export_iterator(Finish)); +} + +iterator_range<export_iterator> MachOObjectFile::exports() const { + return exports(getDyldInfoExportsTrie()); +} + +MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit) + : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), + RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), + PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {} + +void MachORebaseEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachORebaseEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachORebaseEntry::moveNext() { + // If in the middle of some loop, move to next rebasing in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More && !Malformed) { + // Parse next opcode and set up next loop. + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + switch (Opcode) { + case MachO::REBASE_OPCODE_DONE: + More = false; + Done = true; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + break; + case MachO::REBASE_OPCODE_SET_TYPE_IMM: + RebaseType = ImmValue; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); + break; + case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(); + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(); + DEBUG_WITH_TYPE("mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + SegmentOffset += ImmValue * PointerSize; + DEBUG_WITH_TYPE("mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + AdvanceAmount = PointerSize; + RemainingLoopCount = ImmValue - 1; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + AdvanceAmount = PointerSize; + RemainingLoopCount = readULEB128() - 1; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: + AdvanceAmount = readULEB128() + PointerSize; + RemainingLoopCount = 0; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: + RemainingLoopCount = readULEB128() - 1; + AdvanceAmount = readULEB128() + PointerSize; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + Malformed = true; + } + } +} + +uint64_t MachORebaseEntry::readULEB128() { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + if (Ptr > Opcodes.end()) { + Ptr = Opcodes.end(); + Malformed = true; + } + return Result; +} + +uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachORebaseEntry::typeName() const { + switch (RebaseType) { + case MachO::REBASE_TYPE_POINTER: + return "pointer"; + case MachO::REBASE_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::REBASE_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { +#ifdef EXPENSIVE_CHECKS + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +iterator_range<rebase_iterator> +MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) { + MachORebaseEntry Start(Opcodes, is64); + Start.moveToFirst(); + + MachORebaseEntry Finish(Opcodes, is64); + Finish.moveToEnd(); + + return make_range(rebase_iterator(Start), rebase_iterator(Finish)); +} + +iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const { + return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit()); +} + +MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) + : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), + Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0), + BindType(0), PointerSize(is64Bit ? 8 : 4), + TableKind(BK), Malformed(false), Done(false) {} + +void MachOBindEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachOBindEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachOBindEntry::moveNext() { + // If in the middle of some loop, move to next binding in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More && !Malformed) { + // Parse next opcode and set up next loop. + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK; + int8_t SignExtended; + const uint8_t *SymStart; + switch (Opcode) { + case MachO::BIND_OPCODE_DONE: + if (TableKind == Kind::Lazy) { + // Lazying bindings have a DONE opcode between entries. Need to ignore + // it to advance to next entry. But need not if this is last entry. + bool NotLastEntry = false; + for (const uint8_t *P = Ptr; P < Opcodes.end(); ++P) { + if (*P) { + NotLastEntry = true; + } + } + if (NotLastEntry) + break; + } + More = false; + Done = true; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + Ordinal = ImmValue; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: + Ordinal = readULEB128(); + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (ImmValue) { + SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; + Ordinal = SignExtended; + } else + Ordinal = 0; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: + Flags = ImmValue; + SymStart = Ptr; + while (*Ptr) { + ++Ptr; + } + SymbolName = StringRef(reinterpret_cast<const char*>(SymStart), + Ptr-SymStart); + ++Ptr; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); + if (TableKind == Kind::Weak) { + if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) + return; + } + break; + case MachO::BIND_OPCODE_SET_TYPE_IMM: + BindType = ImmValue; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); + break; + case MachO::BIND_OPCODE_SET_ADDEND_SLEB: + Addend = readSLEB128(); + if (TableKind == Kind::Lazy) + Malformed = true; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); + break; + case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(); + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::BIND_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(); + DEBUG_WITH_TYPE("mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::BIND_OPCODE_DO_BIND: + AdvanceAmount = PointerSize; + RemainingLoopCount = 0; + DEBUG_WITH_TYPE("mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: + AdvanceAmount = readULEB128() + PointerSize; + RemainingLoopCount = 0; + if (TableKind == Kind::Lazy) + Malformed = true; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + AdvanceAmount = ImmValue * PointerSize + PointerSize; + RemainingLoopCount = 0; + if (TableKind == Kind::Lazy) + Malformed = true; + DEBUG_WITH_TYPE("mach-o-bind", + llvm::dbgs() + << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + RemainingLoopCount = readULEB128() - 1; + AdvanceAmount = readULEB128() + PointerSize; + if (TableKind == Kind::Lazy) + Malformed = true; + DEBUG_WITH_TYPE( + "mach-o-bind", + llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + Malformed = true; + } + } +} + +uint64_t MachOBindEntry::readULEB128() { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + if (Ptr > Opcodes.end()) { + Ptr = Opcodes.end(); + Malformed = true; + } + return Result; +} + +int64_t MachOBindEntry::readSLEB128() { + unsigned Count; + int64_t Result = decodeSLEB128(Ptr, &Count); + Ptr += Count; + if (Ptr > Opcodes.end()) { + Ptr = Opcodes.end(); + Malformed = true; + } + return Result; +} + +uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachOBindEntry::typeName() const { + switch (BindType) { + case MachO::BIND_TYPE_POINTER: + return "pointer"; + case MachO::BIND_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::BIND_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +StringRef MachOBindEntry::symbolName() const { return SymbolName; } + +int64_t MachOBindEntry::addend() const { return Addend; } + +uint32_t MachOBindEntry::flags() const { return Flags; } + +int MachOBindEntry::ordinal() const { return Ordinal; } + +bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { +#ifdef EXPENSIVE_CHECKS + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +iterator_range<bind_iterator> +MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64, + MachOBindEntry::Kind BKind) { + MachOBindEntry Start(Opcodes, is64, BKind); + Start.moveToFirst(); + + MachOBindEntry Finish(Opcodes, is64, BKind); + Finish.moveToEnd(); + + return make_range(bind_iterator(Start), bind_iterator(Finish)); +} + +iterator_range<bind_iterator> MachOObjectFile::bindTable() const { + return bindTable(getDyldInfoBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Regular); +} + +iterator_range<bind_iterator> MachOObjectFile::lazyBindTable() const { + return bindTable(getDyldInfoLazyBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Lazy); +} + +iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const { + return bindTable(getDyldInfoWeakBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Weak); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::begin_load_commands() const { + return LoadCommands.begin(); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::end_load_commands() const { + return LoadCommands.end(); +} + +iterator_range<MachOObjectFile::load_command_iterator> +MachOObjectFile::load_commands() const { + return make_range(begin_load_commands(), end_load_commands()); +} + +StringRef +MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { + ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec); + return parseSegmentOrSectionName(Raw.data()); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawName(DataRefImpl Sec) const { + assert(Sec.d.a < Sections.size() && "Should have detected this earlier"); + const section_base *Base = + reinterpret_cast<const section_base *>(Sections[Sec.d.a]); + return makeArrayRef(Base->sectname); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const { + assert(Sec.d.a < Sections.size() && "Should have detected this earlier"); + const section_base *Base = + reinterpret_cast<const section_base *>(Sections[Sec.d.a]); + return makeArrayRef(Base->segname); +} + +bool +MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE) + const { + if (getCPUType(*this) == MachO::CPU_TYPE_X86_64) + return false; + return getPlainRelocationAddress(RE) & MachO::R_SCATTERED; +} + +unsigned MachOObjectFile::getPlainRelocationSymbolNum( + const MachO::any_relocation_info &RE) const { + if (isLittleEndian()) + return RE.r_word1 & 0xffffff; + return RE.r_word1 >> 8; +} + +bool MachOObjectFile::getPlainRelocationExternal( + const MachO::any_relocation_info &RE) const { + if (isLittleEndian()) + return (RE.r_word1 >> 27) & 1; + return (RE.r_word1 >> 4) & 1; +} + +bool MachOObjectFile::getScatteredRelocationScattered( + const MachO::any_relocation_info &RE) const { + return RE.r_word0 >> 31; +} + +uint32_t MachOObjectFile::getScatteredRelocationValue( + const MachO::any_relocation_info &RE) const { + return RE.r_word1; +} + +uint32_t MachOObjectFile::getScatteredRelocationType( + const MachO::any_relocation_info &RE) const { + return (RE.r_word0 >> 24) & 0xf; +} + +unsigned MachOObjectFile::getAnyRelocationAddress( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationAddress(RE); + return getPlainRelocationAddress(RE); +} + +unsigned MachOObjectFile::getAnyRelocationPCRel( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationPCRel(RE); + return getPlainRelocationPCRel(*this, RE); +} + +unsigned MachOObjectFile::getAnyRelocationLength( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationLength(RE); + return getPlainRelocationLength(*this, RE); +} + +unsigned +MachOObjectFile::getAnyRelocationType( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationType(RE); + return getPlainRelocationType(*this, RE); +} + +SectionRef +MachOObjectFile::getAnyRelocationSection( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE) || getPlainRelocationExternal(RE)) + return *section_end(); + unsigned SecNum = getPlainRelocationSymbolNum(RE); + if (SecNum == MachO::R_ABS || SecNum > Sections.size()) + return *section_end(); + DataRefImpl DRI; + DRI.d.a = SecNum - 1; + return SectionRef(DRI, this); +} + +MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const { + assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); + return getStruct<MachO::section>(*this, Sections[DRI.d.a]); +} + +MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const { + assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); + return getStruct<MachO::section_64>(*this, Sections[DRI.d.a]); +} + +MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section>(*this, Sec); +} + +MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section_64>(*this, Sec); +} + +MachO::nlist +MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist>(*this, P); +} + +MachO::nlist_64 +MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist_64>(*this, P); +} + +MachO::linkedit_data_command +MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::linkedit_data_command>(*this, L.Ptr); +} + +MachO::segment_command +MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::segment_command>(*this, L.Ptr); +} + +MachO::segment_command_64 +MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::segment_command_64>(*this, L.Ptr); +} + +MachO::linker_option_command +MachOObjectFile::getLinkerOptionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::linker_option_command>(*this, L.Ptr); +} + +MachO::version_min_command +MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::version_min_command>(*this, L.Ptr); +} + +MachO::dylib_command +MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylib_command>(*this, L.Ptr); +} + +MachO::dyld_info_command +MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dyld_info_command>(*this, L.Ptr); +} + +MachO::dylinker_command +MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylinker_command>(*this, L.Ptr); +} + +MachO::uuid_command +MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::uuid_command>(*this, L.Ptr); +} + +MachO::rpath_command +MachOObjectFile::getRpathCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::rpath_command>(*this, L.Ptr); +} + +MachO::source_version_command +MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::source_version_command>(*this, L.Ptr); +} + +MachO::entry_point_command +MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::entry_point_command>(*this, L.Ptr); +} + +MachO::encryption_info_command +MachOObjectFile::getEncryptionInfoCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::encryption_info_command>(*this, L.Ptr); +} + +MachO::encryption_info_command_64 +MachOObjectFile::getEncryptionInfoCommand64(const LoadCommandInfo &L) const { + return getStruct<MachO::encryption_info_command_64>(*this, L.Ptr); +} + +MachO::sub_framework_command +MachOObjectFile::getSubFrameworkCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_framework_command>(*this, L.Ptr); +} + +MachO::sub_umbrella_command +MachOObjectFile::getSubUmbrellaCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_umbrella_command>(*this, L.Ptr); +} + +MachO::sub_library_command +MachOObjectFile::getSubLibraryCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_library_command>(*this, L.Ptr); +} + +MachO::sub_client_command +MachOObjectFile::getSubClientCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_client_command>(*this, L.Ptr); +} + +MachO::routines_command +MachOObjectFile::getRoutinesCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::routines_command>(*this, L.Ptr); +} + +MachO::routines_command_64 +MachOObjectFile::getRoutinesCommand64(const LoadCommandInfo &L) const { + return getStruct<MachO::routines_command_64>(*this, L.Ptr); +} + +MachO::thread_command +MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::thread_command>(*this, L.Ptr); +} + +MachO::any_relocation_info +MachOObjectFile::getRelocation(DataRefImpl Rel) const { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + uint32_t Offset; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } + + auto P = reinterpret_cast<const MachO::any_relocation_info *>( + getPtr(*this, Offset)) + Rel.d.b; + return getStruct<MachO::any_relocation_info>( + *this, reinterpret_cast<const char *>(P)); +} + +MachO::data_in_code_entry +MachOObjectFile::getDice(DataRefImpl Rel) const { + const char *P = reinterpret_cast<const char *>(Rel.p); + return getStruct<MachO::data_in_code_entry>(*this, P); +} + +const MachO::mach_header &MachOObjectFile::getHeader() const { + return Header; +} + +const MachO::mach_header_64 &MachOObjectFile::getHeader64() const { + assert(is64Bit()); + return Header64; +} + +uint32_t MachOObjectFile::getIndirectSymbolTableEntry( + const MachO::dysymtab_command &DLC, + unsigned Index) const { + uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t); + return getStruct<uint32_t>(*this, getPtr(*this, Offset)); +} + +MachO::data_in_code_entry +MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset, + unsigned Index) const { + uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry); + return getStruct<MachO::data_in_code_entry>(*this, getPtr(*this, Offset)); +} + +MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const { + if (SymtabLoadCmd) + return getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); + + // If there is no SymtabLoadCmd return a load command with zero'ed fields. + MachO::symtab_command Cmd; + Cmd.cmd = MachO::LC_SYMTAB; + Cmd.cmdsize = sizeof(MachO::symtab_command); + Cmd.symoff = 0; + Cmd.nsyms = 0; + Cmd.stroff = 0; + Cmd.strsize = 0; + return Cmd; +} + +MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const { + if (DysymtabLoadCmd) + return getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); + + // If there is no DysymtabLoadCmd return a load command with zero'ed fields. + MachO::dysymtab_command Cmd; + Cmd.cmd = MachO::LC_DYSYMTAB; + Cmd.cmdsize = sizeof(MachO::dysymtab_command); + Cmd.ilocalsym = 0; + Cmd.nlocalsym = 0; + Cmd.iextdefsym = 0; + Cmd.nextdefsym = 0; + Cmd.iundefsym = 0; + Cmd.nundefsym = 0; + Cmd.tocoff = 0; + Cmd.ntoc = 0; + Cmd.modtaboff = 0; + Cmd.nmodtab = 0; + Cmd.extrefsymoff = 0; + Cmd.nextrefsyms = 0; + Cmd.indirectsymoff = 0; + Cmd.nindirectsyms = 0; + Cmd.extreloff = 0; + Cmd.nextrel = 0; + Cmd.locreloff = 0; + Cmd.nlocrel = 0; + return Cmd; +} + +MachO::linkedit_data_command +MachOObjectFile::getDataInCodeLoadCommand() const { + if (DataInCodeLoadCmd) + return getStruct<MachO::linkedit_data_command>(*this, DataInCodeLoadCmd); + + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_DATA_IN_CODE; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; +} + +MachO::linkedit_data_command +MachOObjectFile::getLinkOptHintsLoadCommand() const { + if (LinkOptHintsLoadCmd) + return getStruct<MachO::linkedit_data_command>(*this, LinkOptHintsLoadCmd); + + // If there is no LinkOptHintsLoadCmd return a load command with zero'ed + // fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_LINKER_OPTIMIZATION_HINT; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off)); + return makeArrayRef(Ptr, DyldInfo.rebase_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off)); + return makeArrayRef(Ptr, DyldInfo.bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off)); + return makeArrayRef(Ptr, DyldInfo.weak_bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off)); + return makeArrayRef(Ptr, DyldInfo.lazy_bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off)); + return makeArrayRef(Ptr, DyldInfo.export_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getUuid() const { + if (!UuidLoadCmd) + return None; + // Returning a pointer is fine as uuid doesn't need endian swapping. + const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid); + return makeArrayRef(reinterpret_cast<const uint8_t *>(Ptr), 16); +} + +StringRef MachOObjectFile::getStringTableData() const { + MachO::symtab_command S = getSymtabLoadCommand(); + return getData().substr(S.stroff, S.strsize); +} + +bool MachOObjectFile::is64Bit() const { + return getType() == getMachOType(false, true) || + getType() == getMachOType(true, true); +} + +void MachOObjectFile::ReadULEB128s(uint64_t Index, + SmallVectorImpl<uint64_t> &Out) const { + DataExtractor extractor(ObjectFile::getData(), true, 0); + + uint32_t offset = Index; + uint64_t data = 0; + while (uint64_t delta = extractor.getULEB128(&offset)) { + data += delta; + Out.push_back(data); + } +} + +bool MachOObjectFile::isRelocatableObject() const { + return getHeader().filetype == MachO::MH_OBJECT; +} + +Expected<std::unique_ptr<MachOObjectFile>> +ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, + uint32_t UniversalCputype, + uint32_t UniversalIndex) { + StringRef Magic = Buffer.getBuffer().slice(0, 4); + if (Magic == "\xFE\xED\xFA\xCE") + return MachOObjectFile::create(Buffer, false, false, + UniversalCputype, UniversalIndex); + if (Magic == "\xCE\xFA\xED\xFE") + return MachOObjectFile::create(Buffer, true, false, + UniversalCputype, UniversalIndex); + if (Magic == "\xFE\xED\xFA\xCF") + return MachOObjectFile::create(Buffer, false, true, + UniversalCputype, UniversalIndex); + if (Magic == "\xCF\xFA\xED\xFE") + return MachOObjectFile::create(Buffer, true, true, + UniversalCputype, UniversalIndex); + return make_error<GenericBinaryError>("Unrecognized MachO magic number", + object_error::invalid_file_type); +} diff --git a/contrib/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm/lib/Object/MachOUniversal.cpp new file mode 100644 index 000000000000..309708e9b37c --- /dev/null +++ b/contrib/llvm/lib/Object/MachOUniversal.cpp @@ -0,0 +1,228 @@ +//===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOUniversalBinary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace object; + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed fat file (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +template<typename T> +static T getUniversalBinaryStruct(const char *Ptr) { + T Res; + memcpy(&Res, Ptr, sizeof(T)); + // Universal binary headers have big-endian byte order. + if (sys::IsLittleEndianHost) + swapStruct(Res); + return Res; +} + +MachOUniversalBinary::ObjectForArch::ObjectForArch( + const MachOUniversalBinary *Parent, uint32_t Index) + : Parent(Parent), Index(Index) { + // The iterators use Parent as a nullptr and an Index+1 == NumberOfObjects. + if (!Parent || Index >= Parent->getNumberOfObjects()) { + clear(); + } else { + // Parse object header. + StringRef ParentData = Parent->getData(); + if (Parent->getMagic() == MachO::FAT_MAGIC) { + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch); + Header = getUniversalBinaryStruct<MachO::fat_arch>(HeaderPos); + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch_64); + Header64 = getUniversalBinaryStruct<MachO::fat_arch_64>(HeaderPos); + } + } +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOUniversalBinary::ObjectForArch::getAsObjectFile() const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsObjectFile() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + uint32_t cputype; + if (Parent->getMagic() == MachO::FAT_MAGIC) { + ObjectData = ParentData.substr(Header.offset, Header.size); + cputype = Header.cputype; + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + cputype = Header64.cputype; + } + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return ObjectFile::createMachOObjectFile(ObjBuffer, cputype, Index); +} + +Expected<std::unique_ptr<Archive>> +MachOUniversalBinary::ObjectForArch::getAsArchive() const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsArchive() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + if (Parent->getMagic() == MachO::FAT_MAGIC) + ObjectData = ParentData.substr(Header.offset, Header.size); + else // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return Archive::create(ObjBuffer); +} + +void MachOUniversalBinary::anchor() { } + +Expected<std::unique_ptr<MachOUniversalBinary>> +MachOUniversalBinary::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<MachOUniversalBinary> Ret( + new MachOUniversalBinary(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} + +MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) + : Binary(Binary::ID_MachOUniversalBinary, Source), Magic(0), + NumberOfObjects(0) { + ErrorAsOutParameter ErrAsOutParam(&Err); + if (Data.getBufferSize() < sizeof(MachO::fat_header)) { + Err = make_error<GenericBinaryError>("File too small to be a Mach-O " + "universal file", + object_error::invalid_file_type); + return; + } + // Check for magic value and sufficient header size. + StringRef Buf = getData(); + MachO::fat_header H = + getUniversalBinaryStruct<MachO::fat_header>(Buf.begin()); + Magic = H.magic; + NumberOfObjects = H.nfat_arch; + if (NumberOfObjects == 0) { + Err = malformedError("contains zero architecture types"); + return; + } + uint32_t MinSize = sizeof(MachO::fat_header); + if (Magic == MachO::FAT_MAGIC) + MinSize += sizeof(MachO::fat_arch) * NumberOfObjects; + else if (Magic == MachO::FAT_MAGIC_64) + MinSize += sizeof(MachO::fat_arch_64) * NumberOfObjects; + else { + Err = malformedError("bad magic number"); + return; + } + if (Buf.size() < MinSize) { + Err = malformedError("fat_arch" + + Twine(Magic == MachO::FAT_MAGIC ? "" : "_64") + + " structs would extend past the end of the file"); + return; + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + uint64_t bigSize = A.getOffset(); + bigSize += A.getSize(); + if (bigSize > Buf.size()) { + Err = malformedError("offset plus size of cputype (" + + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") extends past the end of the file"); + return; + } +#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */ + if (A.getAlign() > MAXSECTALIGN) { + Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large " + "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") (maximum 2^" + Twine(MAXSECTALIGN) + ")"); + return; + } + if(A.getOffset() % (1 << A.getAlign()) != 0){ + Err = malformedError("offset: " + Twine(A.getOffset()) + + " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") not aligned on it's alignment (2^" + Twine(A.getAlign()) + ")"); + return; + } + if (A.getOffset() < MinSize) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") offset " + Twine(A.getOffset()) + " overlaps universal headers"); + return; + } + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + for (uint32_t j = i + 1; j < NumberOfObjects; j++) { + ObjectForArch B(this, j); + if (A.getCPUType() == B.getCPUType() && + (A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) == + (B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK)) { + Err = malformedError("contains two of the same architecture (cputype " + "(" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + "))"); + return; + } + if ((A.getOffset() >= B.getOffset() && + A.getOffset() < B.getOffset() + B.getSize()) || + (A.getOffset() + A.getSize() > B.getOffset() && + A.getOffset() + A.getSize() < B.getOffset() + B.getSize()) || + (A.getOffset() <= B.getOffset() && + A.getOffset() + A.getSize() >= B.getOffset() + B.getSize())) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(A.getOffset()) + " with a size of " + + Twine(A.getSize()) + ", overlaps cputype (" + Twine(B.getCPUType()) + + ") cpusubtype (" + Twine(B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(B.getOffset()) + " with a size of " + + Twine(B.getSize())); + return; + } + } + } + Err = Error::success(); +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOUniversalBinary::getObjectForArch(StringRef ArchName) const { + if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch) + return make_error<GenericBinaryError>("Unknown architecture " + "named: " + + ArchName, + object_error::arch_not_found); + + for (auto &Obj : objects()) + if (Obj.getArchFlagName() == ArchName) + return Obj.getAsObjectFile(); + return make_error<GenericBinaryError>("fat file does not " + "contain " + + ArchName, + object_error::arch_not_found); +} diff --git a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp new file mode 100644 index 000000000000..11ace84b9ceb --- /dev/null +++ b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp @@ -0,0 +1,115 @@ +//===- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the ModuleSummaryIndexObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile( + "ignore-empty-index-file", llvm::cl::ZeroOrMore, + llvm::cl::desc( + "Ignore an empty index file and perform non-ThinLTO compilation"), + llvm::cl::init(false)); + +ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( + MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I) + : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { +} + +ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() {} + +std::unique_ptr<ModuleSummaryIndex> ModuleSummaryIndexObjectFile::takeIndex() { + return std::move(Index); +} + +ErrorOr<MemoryBufferRef> +ModuleSummaryIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + if (Sec.isBitcode()) { + StringRef SecContents; + if (std::error_code EC = Sec.getContents(SecContents)) + return EC; + return MemoryBufferRef(SecContents, Obj.getFileName()); + } + } + + return object_error::bitcode_section_not_found; +} + +ErrorOr<MemoryBufferRef> +ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: + return Object; + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) + return errorToErrorCode(ObjFile.takeError()); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return object_error::invalid_file_type; + } +} + +// Parse module summary index in the given memory buffer. +// Return new ModuleSummaryIndexObjectFile instance containing parsed +// module summary/index. +Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>> +ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) { + ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected<std::unique_ptr<ModuleSummaryIndex>> IOrErr = + getModuleSummaryIndex(BCOrErr.get()); + + if (!IOrErr) + return IOrErr.takeError(); + + std::unique_ptr<ModuleSummaryIndex> Index = std::move(IOrErr.get()); + return llvm::make_unique<ModuleSummaryIndexObjectFile>(Object, + std::move(Index)); +} + +// Parse the module summary index out of an IR file and return the summary +// index object if found, or nullptr if not. +Expected<std::unique_ptr<ModuleSummaryIndex>> +llvm::getModuleSummaryIndexForFile(StringRef Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + std::error_code EC = FileOrErr.getError(); + if (EC) + return errorCodeToError(EC); + MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); + if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) + return nullptr; + Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = + object::ModuleSummaryIndexObjectFile::create(BufferRef); + if (!ObjOrErr) + return ObjOrErr.takeError(); + + object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr; + return Obj.takeIndex(); +} diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp new file mode 100644 index 000000000000..90488007ff59 --- /dev/null +++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp @@ -0,0 +1,189 @@ +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a symbol table built from in-memory IR. It provides +// access to GlobalValues and should only be used if such access is required +// (e.g. in the LTO implementation). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRObjectFile.h" +#include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +void ModuleSymbolTable::addModule(Module *M) { + if (FirstMod) + assert(FirstMod->getTargetTriple() == M->getTargetTriple()); + else + FirstMod = M; + + for (Function &F : *M) + SymTab.push_back(&F); + for (GlobalVariable &GV : M->globals()) + SymTab.push_back(&GV); + for (GlobalAlias &GA : M->aliases()) + SymTab.push_back(&GA); + + CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(), + [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) + AsmSymbol(Name, Flags)); + }); +} + +void ModuleSymbolTable::CollectAsmSymbols( + const Triple &TT, StringRef InlineAsm, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + if (InlineAsm.empty()) + return; + + std::string Err; + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + assert(T && T->hasMCAsmParser()); + + std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str())); + if (!MRI) + return; + + std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str())); + if (!MAI) + return; + + std::unique_ptr<MCSubtargetInfo> STI( + T->createMCSubtargetInfo(TT.str(), "", "")); + if (!STI) + return; + + std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); + if (!MCII) + return; + + MCObjectFileInfo MOFI; + MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); + MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx); + RecordStreamer Streamer(MCCtx); + T->createNullTargetStreamer(Streamer); + + std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI)); + + MCTargetOptions MCOptions; + std::unique_ptr<MCTargetAsmParser> TAP( + T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + return; + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return; + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + } +} + +void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { + if (S.is<AsmSymbol *>()) { + OS << S.get<AsmSymbol *>()->first; + return; + } + + auto *GV = S.get<GlobalValue *>(); + if (GV->hasDLLImportStorageClass()) + OS << "__imp_"; + + Mang.getNameWithPrefix(OS, GV, false); +} + +uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const { + if (S.is<AsmSymbol *>()) + return S.get<AsmSymbol *>()->second; + + auto *GV = S.get<GlobalValue *>(); + + uint32_t Res = BasicSymbolRef::SF_None; + if (GV->isDeclarationForLinker()) + Res |= BasicSymbolRef::SF_Undefined; + else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Hidden; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (GVar->isConstant()) + Res |= BasicSymbolRef::SF_Const; + } + if (dyn_cast_or_null<Function>(GV->getBaseObject())) + Res |= BasicSymbolRef::SF_Executable; + if (isa<GlobalAlias>(GV)) + Res |= BasicSymbolRef::SF_Indirect; + if (GV->hasPrivateLinkage()) + Res |= BasicSymbolRef::SF_FormatSpecific; + if (!GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Global; + if (GV->hasCommonLinkage()) + Res |= BasicSymbolRef::SF_Common; + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasExternalWeakLinkage()) + Res |= BasicSymbolRef::SF_Weak; + + if (GV->getName().startswith("llvm.")) + Res |= BasicSymbolRef::SF_FormatSpecific; + else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { + if (Var->getSection() == "llvm.metadata") + Res |= BasicSymbolRef::SF_FormatSpecific; + } + + return Res; +} diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp new file mode 100644 index 000000000000..6df481b060e1 --- /dev/null +++ b/contrib/llvm/lib/Object/Object.cpp @@ -0,0 +1,240 @@ +//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the C bindings to the file-format-independent object +// library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm-c/Object.h" +#include "llvm/Object/ObjectFile.h" + +using namespace llvm; +using namespace object; + +inline OwningBinary<ObjectFile> *unwrap(LLVMObjectFileRef OF) { + return reinterpret_cast<OwningBinary<ObjectFile> *>(OF); +} + +inline LLVMObjectFileRef wrap(const OwningBinary<ObjectFile> *OF) { + return reinterpret_cast<LLVMObjectFileRef>( + const_cast<OwningBinary<ObjectFile> *>(OF)); +} + +inline section_iterator *unwrap(LLVMSectionIteratorRef SI) { + return reinterpret_cast<section_iterator*>(SI); +} + +inline LLVMSectionIteratorRef +wrap(const section_iterator *SI) { + return reinterpret_cast<LLVMSectionIteratorRef> + (const_cast<section_iterator*>(SI)); +} + +inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) { + return reinterpret_cast<symbol_iterator*>(SI); +} + +inline LLVMSymbolIteratorRef +wrap(const symbol_iterator *SI) { + return reinterpret_cast<LLVMSymbolIteratorRef> + (const_cast<symbol_iterator*>(SI)); +} + +inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) { + return reinterpret_cast<relocation_iterator*>(SI); +} + +inline LLVMRelocationIteratorRef +wrap(const relocation_iterator *SI) { + return reinterpret_cast<LLVMRelocationIteratorRef> + (const_cast<relocation_iterator*>(SI)); +} + +// ObjectFile creation +LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { + std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf)); + Expected<std::unique_ptr<ObjectFile>> ObjOrErr( + ObjectFile::createObjectFile(Buf->getMemBufferRef())); + std::unique_ptr<ObjectFile> Obj; + if (!ObjOrErr) { + // TODO: Actually report errors helpfully. + consumeError(ObjOrErr.takeError()); + return nullptr; + } + + auto *Ret = new OwningBinary<ObjectFile>(std::move(ObjOrErr.get()), std::move(Buf)); + return wrap(Ret); +} + +void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { + delete unwrap(ObjectFile); +} + +// ObjectFile Section iterators +LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef OF) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + section_iterator SI = OB->getBinary()->section_begin(); + return wrap(new section_iterator(SI)); +} + +void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef OF, + LLVMSectionIteratorRef SI) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + return (*unwrap(SI) == OB->getBinary()->section_end()) ? 1 : 0; +} + +void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { + ++(*unwrap(SI)); +} + +void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, + LLVMSymbolIteratorRef Sym) { + Expected<section_iterator> SecOrErr = (*unwrap(Sym))->getSection(); + if (!SecOrErr) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(SecOrErr.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } + *unwrap(Sect) = *SecOrErr; +} + +// ObjectFile Symbol iterators +LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef OF) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + symbol_iterator SI = OB->getBinary()->symbol_begin(); + return wrap(new symbol_iterator(SI)); +} + +void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef OF, + LLVMSymbolIteratorRef SI) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + return (*unwrap(SI) == OB->getBinary()->symbol_end()) ? 1 : 0; +} + +void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { + ++(*unwrap(SI)); +} + +// SectionRef accessors +const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { + StringRef ret; + if (std::error_code ec = (*unwrap(SI))->getName(ret)) + report_fatal_error(ec.message()); + return ret.data(); +} + +uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getSize(); +} + +const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { + StringRef ret; + if (std::error_code ec = (*unwrap(SI))->getContents(ret)) + report_fatal_error(ec.message()); + return ret.data(); +} + +uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getAddress(); +} + +LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, + LLVMSymbolIteratorRef Sym) { + return (*unwrap(SI))->containsSymbol(**unwrap(Sym)); +} + +// Section Relocation iterators +LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) { + relocation_iterator SI = (*unwrap(Section))->relocation_begin(); + return wrap(new relocation_iterator(SI)); +} + +void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section, + LLVMRelocationIteratorRef SI) { + return (*unwrap(SI) == (*unwrap(Section))->relocation_end()) ? 1 : 0; +} + +void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) { + ++(*unwrap(SI)); +} + + +// SymbolRef accessors +const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { + Expected<StringRef> Ret = (*unwrap(SI))->getName(); + if (!Ret) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(Ret.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } + return Ret->data(); +} + +uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { + Expected<uint64_t> Ret = (*unwrap(SI))->getAddress(); + if (!Ret) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(Ret.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } + return *Ret; +} + +uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { + return (*unwrap(SI))->getCommonSize(); +} + +// RelocationRef accessors +uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { + return (*unwrap(RI))->getOffset(); +} + +LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { + symbol_iterator ret = (*unwrap(RI))->getSymbol(); + return wrap(new symbol_iterator(ret)); +} + +uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { + return (*unwrap(RI))->getType(); +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { + SmallVector<char, 0> ret; + (*unwrap(RI))->getTypeName(ret); + char *str = static_cast<char*>(malloc(ret.size())); + std::copy(ret.begin(), ret.end(), str); + return str; +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) { + return strdup(""); +} + diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp new file mode 100644 index 000000000000..f36388b677f3 --- /dev/null +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -0,0 +1,130 @@ +//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a file format independent ObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + +using namespace llvm; +using namespace object; + +void ObjectFile::anchor() { } + +ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) + : SymbolicFile(Type, Source) {} + +bool SectionRef::containsSymbol(SymbolRef S) const { + Expected<section_iterator> SymSec = S.getSection(); + if (!SymSec) { + // TODO: Actually report errors helpfully. + consumeError(SymSec.takeError()); + return false; + } + return *this == **SymSec; +} + +uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const { + uint32_t Flags = getSymbolFlags(Ref); + if (Flags & SymbolRef::SF_Undefined) + return 0; + if (Flags & SymbolRef::SF_Common) + return getCommonSymbolSize(Ref); + return getSymbolValueImpl(Ref); +} + +std::error_code ObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + Expected<StringRef> Name = getSymbolName(Symb); + if (!Name) + return errorToErrorCode(Name.takeError()); + OS << *Name; + return std::error_code(); +} + +uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; } + +bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const { + StringRef SectName; + if (!getSectionName(Sec, SectName)) + return SectName == ".llvmbc"; + return false; +} + +section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { + return section_iterator(SectionRef(Sec, this)); +} + +Expected<std::unique_ptr<ObjectFile>> +ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { + StringRef Data = Object.getBuffer(); + if (Type == sys::fs::file_magic::unknown) + Type = sys::fs::identify_magic(Data); + + switch (Type) { + case sys::fs::file_magic::unknown: + case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::coff_cl_gl_object: + case sys::fs::file_magic::archive: + case sys::fs::file_magic::macho_universal_binary: + case sys::fs::file_magic::windows_resource: + return errorCodeToError(object_error::invalid_file_type); + case sys::fs::file_magic::elf: + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::elf_executable: + case sys::fs::file_magic::elf_shared_object: + case sys::fs::file_magic::elf_core: + return errorOrToExpected(createELFObjectFile(Object)); + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::macho_executable: + case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case sys::fs::file_magic::macho_core: + case sys::fs::file_magic::macho_preload_executable: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case sys::fs::file_magic::macho_dynamic_linker: + case sys::fs::file_magic::macho_bundle: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case sys::fs::file_magic::macho_dsym_companion: + case sys::fs::file_magic::macho_kext_bundle: + return createMachOObjectFile(Object); + case sys::fs::file_magic::coff_object: + case sys::fs::file_magic::coff_import_library: + case sys::fs::file_magic::pecoff_executable: + return errorOrToExpected(createCOFFObjectFile(Object)); + case sys::fs::file_magic::wasm_object: + return createWasmObjectFile(Object); + } + llvm_unreachable("Unexpected Object File Type"); +} + +Expected<OwningBinary<ObjectFile>> +ObjectFile::createObjectFile(StringRef ObjectPath) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFile(ObjectPath); + if (std::error_code EC = FileOrErr.getError()) + return errorCodeToError(EC); + std::unique_ptr<MemoryBuffer> Buffer = std::move(FileOrErr.get()); + + Expected<std::unique_ptr<ObjectFile>> ObjOrErr = + createObjectFile(Buffer->getMemBufferRef()); + if (Error Err = ObjOrErr.takeError()) + return std::move(Err); + std::unique_ptr<ObjectFile> Obj = std::move(ObjOrErr.get()); + + return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer)); +} diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp new file mode 100644 index 000000000000..572b960bc85f --- /dev/null +++ b/contrib/llvm/lib/Object/RecordStreamer.cpp @@ -0,0 +1,112 @@ +//===-- RecordStreamer.cpp - Record asm definde and used symbols ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RecordStreamer.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +void RecordStreamer::markDefined(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Global: + S = DefinedGlobal; + break; + case NeverSeen: + case Defined: + case Used: + S = Defined; + break; + case DefinedWeak: + break; + case UndefinedWeak: + S = DefinedWeak; + } +} + +void RecordStreamer::markGlobal(const MCSymbol &Symbol, + MCSymbolAttr Attribute) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + S = (Attribute == MCSA_Weak) ? DefinedWeak : DefinedGlobal; + break; + + case NeverSeen: + case Global: + case Used: + S = (Attribute == MCSA_Weak) ? UndefinedWeak : Global; + break; + case UndefinedWeak: + case DefinedWeak: + break; + } +} + +void RecordStreamer::markUsed(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + case Global: + case DefinedWeak: + case UndefinedWeak: + break; + + case NeverSeen: + case Used: + S = Used; + break; + } +} + +void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } + +RecordStreamer::const_iterator RecordStreamer::begin() { + return Symbols.begin(); +} + +RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } + +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + +void RecordStreamer::EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCStreamer::EmitInstruction(Inst, STI); +} + +void RecordStreamer::EmitLabel(MCSymbol *Symbol) { + MCStreamer::EmitLabel(Symbol); + markDefined(*Symbol); +} + +void RecordStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + markDefined(*Symbol); + MCStreamer::EmitAssignment(Symbol, Value); +} + +bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + if (Attribute == MCSA_Global || Attribute == MCSA_Weak) + markGlobal(*Symbol, Attribute); + if (Attribute == MCSA_LazyReference) + markUsed(*Symbol); + return true; +} + +void RecordStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + markDefined(*Symbol); +} + +void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + markDefined(*Symbol); +} diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h new file mode 100644 index 000000000000..617d8a43fbd2 --- /dev/null +++ b/contrib/llvm/lib/Object/RecordStreamer.h @@ -0,0 +1,43 @@ +//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H +#define LLVM_LIB_OBJECT_RECORDSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class RecordStreamer : public MCStreamer { +public: + enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, + UndefinedWeak}; + +private: + StringMap<State> Symbols; + void markDefined(const MCSymbol &Symbol); + void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); + void markUsed(const MCSymbol &Symbol); + void visitUsedSymbol(const MCSymbol &Sym) override; + +public: + typedef StringMap<State>::const_iterator const_iterator; + const_iterator begin(); + const_iterator end(); + RecordStreamer(MCContext &Context); + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void EmitLabel(MCSymbol *Symbol) override; + void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; + bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; + void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; + void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; +}; +} +#endif diff --git a/contrib/llvm/lib/Object/SymbolSize.cpp b/contrib/llvm/lib/Object/SymbolSize.cpp new file mode 100644 index 000000000000..dd49d5f116b3 --- /dev/null +++ b/contrib/llvm/lib/Object/SymbolSize.cpp @@ -0,0 +1,94 @@ +//===- SymbolSize.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/SymbolSize.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" + +using namespace llvm; +using namespace object; + +// Orders increasingly by (SectionID, Address). +int llvm::object::compareAddress(const SymEntry *A, const SymEntry *B) { + if (A->SectionID != B->SectionID) + return A->SectionID < B->SectionID ? -1 : 1; + if (A->Address != B->Address) + return A->Address < B->Address ? -1 : 1; + return 0; +} + +static unsigned getSectionID(const ObjectFile &O, SectionRef Sec) { + if (auto *M = dyn_cast<MachOObjectFile>(&O)) + return M->getSectionID(Sec); + return cast<COFFObjectFile>(O).getSectionID(Sec); +} + +static unsigned getSymbolSectionID(const ObjectFile &O, SymbolRef Sym) { + if (auto *M = dyn_cast<MachOObjectFile>(&O)) + return M->getSymbolSectionID(Sym); + return cast<COFFObjectFile>(O).getSymbolSectionID(Sym); +} + +std::vector<std::pair<SymbolRef, uint64_t>> +llvm::object::computeSymbolSizes(const ObjectFile &O) { + std::vector<std::pair<SymbolRef, uint64_t>> Ret; + + if (const auto *E = dyn_cast<ELFObjectFileBase>(&O)) { + auto Syms = E->symbols(); + if (Syms.begin() == Syms.end()) + Syms = E->getDynamicSymbolIterators(); + for (ELFSymbolRef Sym : Syms) + Ret.push_back({Sym, Sym.getSize()}); + return Ret; + } + + // Collect sorted symbol addresses. Include dummy addresses for the end + // of each section. + std::vector<SymEntry> Addresses; + unsigned SymNum = 0; + for (symbol_iterator I = O.symbol_begin(), E = O.symbol_end(); I != E; ++I) { + SymbolRef Sym = *I; + uint64_t Value = Sym.getValue(); + Addresses.push_back({I, Value, SymNum, getSymbolSectionID(O, Sym)}); + ++SymNum; + } + for (SectionRef Sec : O.sections()) { + uint64_t Address = Sec.getAddress(); + uint64_t Size = Sec.getSize(); + Addresses.push_back( + {O.symbol_end(), Address + Size, 0, getSectionID(O, Sec)}); + } + array_pod_sort(Addresses.begin(), Addresses.end(), compareAddress); + + // Compute the size as the gap to the next symbol + for (unsigned I = 0, N = Addresses.size() - 1; I < N; ++I) { + auto &P = Addresses[I]; + if (P.I == O.symbol_end()) + continue; + + // If multiple symbol have the same address, give both the same size. + unsigned NextI = I + 1; + while (NextI < N && Addresses[NextI].Address == P.Address) + ++NextI; + + uint64_t Size = Addresses[NextI].Address - P.Address; + P.Address = Size; + } + + // Assign the sorted symbols in the original order. + Ret.resize(SymNum); + for (SymEntry &P : Addresses) { + if (P.I == O.symbol_end()) + continue; + Ret[P.Number] = {*P.I, P.Address}; + } + return Ret; +} diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp new file mode 100644 index 000000000000..4b51a49cf342 --- /dev/null +++ b/contrib/llvm/lib/Object/SymbolicFile.cpp @@ -0,0 +1,84 @@ +//===- SymbolicFile.cpp - Interface that only provides symbols --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a file format independent SymbolicFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace object; + +SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) + : Binary(Type, Source) {} + +SymbolicFile::~SymbolicFile() {} + +Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile( + MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) { + StringRef Data = Object.getBuffer(); + if (Type == sys::fs::file_magic::unknown) + Type = sys::fs::identify_magic(Data); + + switch (Type) { + case sys::fs::file_magic::bitcode: + if (Context) + return IRObjectFile::create(Object, *Context); + LLVM_FALLTHROUGH; + case sys::fs::file_magic::unknown: + case sys::fs::file_magic::archive: + case sys::fs::file_magic::coff_cl_gl_object: + case sys::fs::file_magic::macho_universal_binary: + case sys::fs::file_magic::windows_resource: + return errorCodeToError(object_error::invalid_file_type); + case sys::fs::file_magic::elf: + case sys::fs::file_magic::elf_executable: + case sys::fs::file_magic::elf_shared_object: + case sys::fs::file_magic::elf_core: + case sys::fs::file_magic::macho_executable: + case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case sys::fs::file_magic::macho_core: + case sys::fs::file_magic::macho_preload_executable: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case sys::fs::file_magic::macho_dynamic_linker: + case sys::fs::file_magic::macho_bundle: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case sys::fs::file_magic::macho_dsym_companion: + case sys::fs::file_magic::macho_kext_bundle: + case sys::fs::file_magic::pecoff_executable: + case sys::fs::file_magic::wasm_object: + return ObjectFile::createObjectFile(Object, Type); + case sys::fs::file_magic::coff_import_library: + return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object)); + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> Obj = + ObjectFile::createObjectFile(Object, Type); + if (!Obj || !Context) + return std::move(Obj); + + ErrorOr<MemoryBufferRef> BCData = + IRObjectFile::findBitcodeInObject(*Obj->get()); + if (!BCData) + return std::move(Obj); + + return IRObjectFile::create( + MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), + *Context); + } + } + llvm_unreachable("Unexpected Binary File Type"); +} diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp new file mode 100644 index 000000000000..2b61a8a034f6 --- /dev/null +++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp @@ -0,0 +1,313 @@ +//===- WasmObjectFile.cpp - Wasm object file implementation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" + +namespace llvm { +namespace object { + +Expected<std::unique_ptr<WasmObjectFile>> +ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { + Error Err = Error::success(); + auto ObjectFile = llvm::make_unique<WasmObjectFile>(Buffer, Err); + if (Err) + return std::move(Err); + + return std::move(ObjectFile); +} + +namespace { + +uint32_t readUint32(const uint8_t *&Ptr) { + uint32_t Result = support::endian::read32le(Ptr); + Ptr += sizeof(Result); + return Result; +} + +uint64_t readULEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + return Result; +} + +StringRef readString(const uint8_t *&Ptr) { + uint32_t StringLen = readULEB128(Ptr); + StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen); + Ptr += StringLen; + return Return; +} + +Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr, + const uint8_t *Start) { + // TODO(sbc): Avoid reading past EOF in the case of malformed files. + Section.Offset = Ptr - Start; + Section.Type = readULEB128(Ptr); + uint32_t Size = readULEB128(Ptr); + if (Size == 0) + return make_error<StringError>("Zero length section", + object_error::parse_failed); + Section.Content = ArrayRef<uint8_t>(Ptr, Size); + Ptr += Size; + return Error::success(); +} +} + +WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) + : ObjectFile(Binary::ID_Wasm, Buffer) { + ErrorAsOutParameter ErrAsOutParam(&Err); + Header.Magic = getData().substr(0, 4); + if (Header.Magic != StringRef("\0asm", 4)) { + Err = make_error<StringError>("Bad magic number", + object_error::parse_failed); + return; + } + const uint8_t *Ptr = getPtr(4); + Header.Version = readUint32(Ptr); + if (Header.Version != wasm::WasmVersion) { + Err = make_error<StringError>("Bad version number", + object_error::parse_failed); + return; + } + + const uint8_t *Eof = getPtr(getData().size()); + wasm::WasmSection Sec; + while (Ptr < Eof) { + if ((Err = readSection(Sec, Ptr, getPtr(0)))) + return; + if (Sec.Type == wasm::WASM_SEC_USER) { + if ((Err = parseUserSection(Sec, Sec.Content.data(), Sec.Content.size()))) + return; + } + Sections.push_back(Sec); + } +} + +Error WasmObjectFile::parseUserSection(wasm::WasmSection &Sec, + const uint8_t *Ptr, size_t Length) { + Sec.Name = readString(Ptr); + return Error::success(); +} + +const uint8_t *WasmObjectFile::getPtr(size_t Offset) const { + return reinterpret_cast<const uint8_t *>(getData().substr(Offset, 1).data()); +} + +const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { + return Header; +} + +void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + llvm_unreachable("not yet implemented"); +} + +std::error_code WasmObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return object_error::invalid_symbol_index; +} + +uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +basic_symbol_iterator WasmObjectFile::symbol_begin() const { + return BasicSymbolRef(DataRefImpl(), this); +} + +basic_symbol_iterator WasmObjectFile::symbol_end() const { + return BasicSymbolRef(DataRefImpl(), this); +} + +Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +Expected<SymbolRef::Type> +WasmObjectFile::getSymbolType(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +Expected<section_iterator> +WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } + +std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Res) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; +#define ECase(X) \ + case wasm::WASM_SEC_##X: \ + Res = #X; \ + break + switch (S.Type) { + ECase(TYPE); + ECase(IMPORT); + ECase(FUNCTION); + ECase(TABLE); + ECase(MEMORY); + ECase(GLOBAL); + ECase(EXPORT); + ECase(START); + ECase(ELEM); + ECase(CODE); + ECase(DATA); + case wasm::WASM_SEC_USER: + Res = S.Name; + break; + default: + return object_error::invalid_section_index; + } +#undef ECase + return std::error_code(); +} + +uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } + +uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + return S.Content.size(); +} + +std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Res) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + // This will never fail since wasm sections can never be empty (user-sections + // must have a name and non-user sections each have a defined structure). + Res = StringRef(reinterpret_cast<const char *>(S.Content.data()), + S.Content.size()); + return std::error_code(); +} + +uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const { + return 1; +} + +bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool WasmObjectFile::isSectionText(DataRefImpl Sec) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + return S.Type == wasm::WASM_SEC_CODE; +} + +bool WasmObjectFile::isSectionData(DataRefImpl Sec) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + return S.Type == wasm::WASM_SEC_DATA; +} + +bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; } + +relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Sec) const { + llvm_unreachable("not yet implemented"); + RelocationRef Rel; + return relocation_iterator(Rel); +} + +relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Sec) const { + llvm_unreachable("not yet implemented"); + RelocationRef Rel; + return relocation_iterator(Rel); +} + +section_iterator WasmObjectFile::getRelocatedSection(DataRefImpl Sec) const { + llvm_unreachable("not yet implemented"); + SectionRef Ref; + return section_iterator(Ref); +} + +void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + llvm_unreachable("not yet implemented"); +} + +uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + SymbolRef Ref; + return symbol_iterator(Ref); +} + +uint64_t WasmObjectFile::getRelocationType(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +void WasmObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + llvm_unreachable("not yet implemented"); +} + +section_iterator WasmObjectFile::section_begin() const { + DataRefImpl Ref; + Ref.d.a = 0; + return section_iterator(SectionRef(Ref, this)); +} + +section_iterator WasmObjectFile::section_end() const { + DataRefImpl Ref; + Ref.d.a = Sections.size(); + return section_iterator(SectionRef(Ref, this)); +} + +uint8_t WasmObjectFile::getBytesInAddress() const { return 4; } + +StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; } + +unsigned WasmObjectFile::getArch() const { return Triple::wasm32; } + +SubtargetFeatures WasmObjectFile::getFeatures() const { + return SubtargetFeatures(); +} + +bool WasmObjectFile::isRelocatableObject() const { return false; } + +const wasm::WasmSection * +WasmObjectFile::getWasmSection(const SectionRef &Section) const { + return &Sections[Section.getRawDataRefImpl().d.a]; +} + +} // end namespace object +} // end namespace llvm |
