diff options
Diffstat (limited to 'contrib/llvm/lib/Object')
23 files changed, 12628 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp new file mode 100644 index 000000000000..977cccc11dcd --- /dev/null +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -0,0 +1,993 @@ +//===- Archive.cpp - ar File Format implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ArchiveObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Archive.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Chrono.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <system_error> + +using namespace llvm; +using namespace object; +using namespace llvm::support::endian; + +static const char *const Magic = "!<arch>\n"; +static const char *const ThinMagic = "!<thin>\n"; + +void Archive::anchor() {} + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : Parent(Parent), + ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + if (Size < sizeof(ArMemHdrType)) { + if (Err) { + std::string Msg("remaining size of archive too small for next archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } + if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { + if (Err) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(StringRef(ArMemHdr->Terminator, + sizeof(ArMemHdr->Terminator))); + OS.flush(); + std::string Msg("terminator characters in archive member \"" + Buf + + "\" not the correct \"`\\n\" values for the archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } +} + +// This gets the raw name from the ArMemHdr->Name field and checks that it is +// valid for the kind of archive. If it is not valid it returns an Error. +Expected<StringRef> ArchiveMemberHeader::getRawName() const { + char EndCond; + auto Kind = Parent->kind(); + if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { + if (ArMemHdr->Name[0] == ' ') { + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("name contains a leading space for archive member " + "header at offset " + Twine(Offset)); + } + EndCond = ' '; + } + else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') + EndCond = ' '; + else + EndCond = '/'; + StringRef::size_type end = + StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); + if (end == StringRef::npos) + end = sizeof(ArMemHdr->Name); + assert(end <= sizeof(ArMemHdr->Name) && end > 0); + // Don't include the EndCond if there is one. + return StringRef(ArMemHdr->Name, end); +} + +// This gets the name looking up long names. Size is the size of the archive +// member including the header, so the size of any name following the header +// is checked to make sure it does not overflow. +Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { + + // This can be called from the ArchiveMemberHeader constructor when the + // archive header is truncated to produce an error message with the name. + // Make sure the name field is not truncated. + if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("archive header truncated before the name field " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + + // The raw name itself can be invalid. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + + // Check if it's a special name. + if (Name[0] == '/') { + if (Name.size() == 1) // Linker member. + return Name; + if (Name.size() == 2 && Name[1] == '/') // String table. + return Name; + // It's a long name. + // Get the string table offset. + std::size_t StringOffset; + if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(1).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name offset characters after the '/' are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(ArchiveOffset)); + } + + // Verify it. + if (StringOffset >= Parent->getStringTable().size()) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name offset " + Twine(StringOffset) + " past " + "the end of the string table for archive member " + "header at offset " + Twine(ArchiveOffset)); + } + const char *addr = Parent->getStringTable().begin() + StringOffset; + + // GNU long file names end with a "/\n". + if (Parent->kind() == Archive::K_GNU || + Parent->kind() == Archive::K_MIPS64) { + StringRef::size_type End = StringRef(addr).find('\n'); + return StringRef(addr, End - 1); + } + return addr; + } + + if (Name.startswith("#1/")) { + uint64_t NameLength; + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(ArchiveOffset)); + } + if (getSizeOf() + NameLength > Size) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name length: " + Twine(NameLength) + + " extends past the end of the member or archive " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), + NameLength).rtrim('\0'); + } + + // It is not a long name so trim the blanks at the end of the name. + if (Name[Name.size() - 1] != '/') + return Name.rtrim(' '); + + // It's a simple name. + return Name.drop_back(1); +} + +Expected<uint32_t> ArchiveMemberHeader::getSize() const { + uint32_t Ret; + if (StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in size field in archive header are not " + "all decimal numbers: '" + Buf + "' for archive " + "member header at offset " + Twine(Offset)); + } + return Ret; +} + +Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const { + unsigned Ret; + if (StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in AccessMode field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + return static_cast<sys::fs::perms>(Ret); +} + +Expected<sys::TimePoint<std::chrono::seconds>> +ArchiveMemberHeader::getLastModified() const { + unsigned Seconds; + if (StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(' ') + .getAsInteger(10, Seconds)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in LastModified field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + + return sys::toTimePoint(Seconds); +} + +Expected<unsigned> ArchiveMemberHeader::getUID() const { + unsigned Ret; + StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); + if (User.empty()) + return 0; + if (User.getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(User); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in UID field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + return Ret; +} + +Expected<unsigned> ArchiveMemberHeader::getGID() const { + unsigned Ret; + StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); + if (Group.empty()) + return 0; + if (Group.getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Group); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in GID field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } + return Ret; +} + +Archive::Child::Child(const Archive *Parent, StringRef Data, + uint16_t StartOfFile) + : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr), + Data(Data), StartOfFile(StartOfFile) { +} + +Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) + : Parent(Parent), + Header(Parent, Start, + Parent + ? Parent->getData().size() - (Start - Parent->getData().data()) + : 0, Err) { + if (!Start) + return; + + // If we are pointed to real data, Start is not a nullptr, then there must be + // a non-null Err pointer available to report malformed data on. Only in + // the case sentinel value is being constructed is Err is permitted to be a + // nullptr. + assert(Err && "Err can't be nullptr if Start is not a nullptr"); + + ErrorAsOutParameter ErrAsOutParam(Err); + + // If there was an error in the construction of the Header + // then just return with the error now set. + if (*Err) + return; + + uint64_t Size = Header.getSizeOf(); + Data = StringRef(Start, Size); + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) { + *Err = isThinOrErr.takeError(); + return; + } + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint64_t> MemberSize = getRawSize(); + if (!MemberSize) { + *Err = MemberSize.takeError(); + return; + } + Size += MemberSize.get(); + Data = StringRef(Start, Size); + } + + // Setup StartOfFile and PaddingBytes. + StartOfFile = Header.getSizeOf(); + // Don't include attached name. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr){ + *Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); + if (Name.startswith("#1/")) { + uint64_t NameSize; + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t Offset = Start - Parent->getData().data(); + *Err = malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(Offset)); + return; + } + StartOfFile += NameSize; + } +} + +Expected<uint64_t> Archive::Child::getSize() const { + if (Parent->IsThin) { + Expected<uint32_t> Size = Header.getSize(); + if (!Size) + return Size.takeError(); + return Size.get(); + } + return Data.size() - StartOfFile; +} + +Expected<uint64_t> Archive::Child::getRawSize() const { + return Header.getSize(); +} + +Expected<bool> Archive::Child::isThinMember() const { + Expected<StringRef> NameOrErr = Header.getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Parent->IsThin && Name != "/" && Name != "//"; +} + +Expected<std::string> Archive::Child::getFullName() const { + Expected<bool> isThin = isThinMember(); + if (!isThin) + return isThin.takeError(); + assert(isThin.get()); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; + if (sys::path::is_absolute(Name)) + return Name; + + SmallString<128> FullName = sys::path::parent_path( + Parent->getMemoryBufferRef().getBufferIdentifier()); + sys::path::append(FullName, Name); + return StringRef(FullName); +} + +Expected<StringRef> Archive::Child::getBuffer() const { + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) + return isThinOrErr.takeError(); + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint32_t> Size = getSize(); + if (!Size) + return Size.takeError(); + return StringRef(Data.data() + StartOfFile, Size.get()); + } + Expected<std::string> FullNameOrErr = getFullName(); + if (!FullNameOrErr) + return FullNameOrErr.takeError(); + const std::string &FullName = *FullNameOrErr; + ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); + if (std::error_code EC = Buf.getError()) + return errorCodeToError(EC); + Parent->ThinBuffers.push_back(std::move(*Buf)); + return Parent->ThinBuffers.back()->getBuffer(); +} + +Expected<Archive::Child> Archive::Child::getNext() const { + size_t SpaceToSkip = Data.size(); + // If it's odd, add 1 to make it even. + if (SpaceToSkip & 1) + ++SpaceToSkip; + + const char *NextLoc = Data.data() + SpaceToSkip; + + // Check to see if this is at the end of the archive. + if (NextLoc == Parent->Data.getBufferEnd()) + return Child(nullptr, nullptr, nullptr); + + // Check to see if this is past the end of the archive. + if (NextLoc > Parent->Data.getBufferEnd()) { + std::string Msg("offset to next archive member past the end of the archive " + "after member "); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = Data.data() - Parent->getData().data(); + return malformedError(Msg + "at offset " + Twine(Offset)); + } else + return malformedError(Msg + NameOrErr.get()); + } + + Error Err = Error::success(); + Child Ret(Parent, NextLoc, &Err); + if (Err) + return std::move(Err); + return Ret; +} + +uint64_t Archive::Child::getChildOffset() const { + const char *a = Parent->Data.getBuffer().data(); + const char *c = Data.data(); + uint64_t offset = c - a; + return offset; +} + +Expected<StringRef> Archive::Child::getName() const { + Expected<uint64_t> RawSizeOrErr = getRawSize(); + if (!RawSizeOrErr) + return RawSizeOrErr.takeError(); + uint64_t RawSize = RawSizeOrErr.get(); + Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Name; +} + +Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + Expected<StringRef> Buf = getBuffer(); + if (!Buf) + return Buf.takeError(); + return MemoryBufferRef(*Buf, Name); +} + +Expected<std::unique_ptr<Binary>> +Archive::Child::getAsBinary(LLVMContext *Context) const { + Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); + if (!BuffOrErr) + return BuffOrErr.takeError(); + + auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); + if (BinaryOrErr) + return std::move(*BinaryOrErr); + return BinaryOrErr.takeError(); +} + +Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<Archive> Ret(new Archive(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} + +void Archive::setFirstRegular(const Child &C) { + FirstRegularData = C.Data; + FirstRegularStartOfFile = C.StartOfFile; +} + +Archive::Archive(MemoryBufferRef Source, Error &Err) + : Binary(Binary::ID_Archive, Source) { + ErrorAsOutParameter ErrAsOutParam(&Err); + StringRef Buffer = Data.getBuffer(); + // Check for sufficient magic. + if (Buffer.startswith(ThinMagic)) { + IsThin = true; + } else if (Buffer.startswith(Magic)) { + IsThin = false; + } else { + Err = make_error<GenericBinaryError>("File too small to be an archive", + object_error::invalid_file_type); + return; + } + + // Make sure Format is initialized before any call to + // ArchiveMemberHeader::getName() is made. This could be a valid empty + // archive which is the same in all formats. So claiming it to be gnu to is + // fine if not totally correct before we look for a string table or table of + // contents. + Format = K_GNU; + + // Get the special members. + child_iterator I = child_begin(Err, false); + if (Err) + return; + child_iterator E = child_end(); + + // See if this is a valid empty archive and if so return. + if (I == E) { + Err = Error::success(); + return; + } + const Child *C = &*I; + + auto Increment = [&]() { + ++I; + if (Err) + return true; + C = &*I; + return false; + }; + + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); + + // Below is the pattern that is used to figure out the archive format + // GNU archive format + // First member : / (may exist, if it exists, points to the symbol table ) + // Second member : // (may exist, if it exists, points to the string table) + // Note : The string table is used if the filename exceeds 15 characters + // BSD archive format + // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) + // There is no string table, if the filename exceeds 15 characters or has a + // embedded space, the filename has #1/<size>, The size represents the size + // of the filename that needs to be read after the archive header + // COFF archive format + // First member : / + // Second member : / (provides a directory of symbols) + // Third member : // (may exist, if it exists, contains the string table) + // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present + // even if the string table is empty. However, lib.exe does not in fact + // seem to create the third member if there's no member whose filename + // exceeds 15 characters. So the third member is optional. + + if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { + if (Name == "__.SYMDEF") + Format = K_BSD; + else // Name == "__.SYMDEF_64" + Format = K_DARWIN64; + // We know that the symbol table is not an external file, but we still must + // check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + setFirstRegular(*C); + + Err = Error::success(); + return; + } + + if (Name.startswith("#1/")) { + Format = K_BSD; + // We know this is BSD, so getName will work since there is no string table. + Expected<StringRef> NameOrErr = C->getName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + } + else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { + Format = K_DARWIN64; + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Increment()) + return; + } + setFirstRegular(*C); + return; + } + + // MIPS 64-bit ELF archives use a special format of a symbol table. + // This format is marked by `ar_name` field equals to "/SYM64/". + // For detailed description see page 96 in the following document: + // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf + + bool has64SymTable = false; + if (Name == "/" || Name == "/SYM64/") { + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + if (Name == "/SYM64/") + has64SymTable = true; + + if (Increment()) + return; + if (I == E) { + Err = Error::success(); + return; + } + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + } + + if (Name == "//") { + Format = has64SymTable ? K_MIPS64 : K_GNU; + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); + if (Increment()) + return; + setFirstRegular(*C); + Err = Error::success(); + return; + } + + if (Name[0] != '/') { + Format = has64SymTable ? K_MIPS64 : K_GNU; + setFirstRegular(*C); + Err = Error::success(); + return; + } + + if (Name != "/") { + Err = errorCodeToError(object_error::parse_failed); + return; + } + + Format = K_COFF; + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); + + if (Increment()) + return; + + if (I == E) { + setFirstRegular(*C); + Err = Error::success(); + return; + } + + NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); + + if (Name == "//") { + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); + if (Increment()) + return; + } + + setFirstRegular(*C); + Err = Error::success(); +} + +Archive::child_iterator Archive::child_begin(Error &Err, + bool SkipInternal) const { + if (isEmpty()) + return child_end(); + + if (SkipInternal) + return child_iterator(Child(this, FirstRegularData, + FirstRegularStartOfFile), + &Err); + + const char *Loc = Data.getBufferStart() + strlen(Magic); + Child C(this, Loc, &Err); + if (Err) + return child_end(); + return child_iterator(C, &Err); +} + +Archive::child_iterator Archive::child_end() const { + return child_iterator(Child(nullptr, nullptr, nullptr), nullptr); +} + +StringRef Archive::Symbol::getName() const { + return Parent->getSymbolTable().begin() + StringIndex; +} + +Expected<Archive::Child> Archive::Symbol::getMember() const { + const char *Buf = Parent->getSymbolTable().begin(); + const char *Offsets = Buf; + if (Parent->kind() == K_MIPS64 || Parent->kind() == K_DARWIN64) + Offsets += sizeof(uint64_t); + else + Offsets += sizeof(uint32_t); + uint32_t Offset = 0; + if (Parent->kind() == K_GNU) { + Offset = read32be(Offsets + SymbolIndex * 4); + } else if (Parent->kind() == K_MIPS64) { + Offset = read64be(Offsets + SymbolIndex * 8); + } else if (Parent->kind() == K_BSD) { + // The SymbolIndex is an index into the ranlib structs that start at + // Offsets (the first uint32_t is the number of bytes of the ranlib + // structs). The ranlib structs are a pair of uint32_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = read32le(Offsets + SymbolIndex * 8 + 4); + } else if (Parent->kind() == K_DARWIN64) { + // The SymbolIndex is an index into the ranlib_64 structs that start at + // Offsets (the first uint64_t is the number of bytes of the ranlib_64 + // structs). The ranlib_64 structs are a pair of uint64_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = read64le(Offsets + SymbolIndex * 16 + 8); + } else { + // Skip offsets. + uint32_t MemberCount = read32le(Buf); + Buf += MemberCount * 4 + 4; + + uint32_t SymbolCount = read32le(Buf); + if (SymbolIndex >= SymbolCount) + return errorCodeToError(object_error::parse_failed); + + // Skip SymbolCount to get to the indices table. + const char *Indices = Buf + 4; + + // Get the index of the offset in the file member offset table for this + // symbol. + uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); + // Subtract 1 since OffsetIndex is 1 based. + --OffsetIndex; + + if (OffsetIndex >= MemberCount) + return errorCodeToError(object_error::parse_failed); + + Offset = read32le(Offsets + OffsetIndex * 4); + } + + const char *Loc = Parent->getData().begin() + Offset; + Error Err = Error::success(); + Child C(Parent, Loc, &Err); + if (Err) + return std::move(Err); + return C; +} + +Archive::Symbol Archive::Symbol::getNext() const { + Symbol t(*this); + if (Parent->kind() == K_BSD) { + // t.StringIndex is an offset from the start of the __.SYMDEF or + // "__.SYMDEF SORTED" member into the string table for the ranlib + // struct indexed by t.SymbolIndex . To change t.StringIndex to the + // offset in the string table for t.SymbolIndex+1 we subtract the + // its offset from the start of the string table for t.SymbolIndex + // and add the offset of the string table for t.SymbolIndex+1. + + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + const char *Buf = Parent->getSymbolTable().begin(); + uint32_t RanlibCount = 0; + RanlibCount = read32le(Buf) / 8; + // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) + // don't change the t.StringIndex as we don't want to reference a ranlib + // past RanlibCount. + if (t.SymbolIndex + 1 < RanlibCount) { + const char *Ranlibs = Buf + 4; + uint32_t CurRanStrx = 0; + uint32_t NextRanStrx = 0; + CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); + NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); + t.StringIndex -= CurRanStrx; + t.StringIndex += NextRanStrx; + } + } else { + // Go to one past next null. + t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; + } + ++t.SymbolIndex; + return t; +} + +Archive::symbol_iterator Archive::symbol_begin() const { + if (!hasSymbolTable()) + return symbol_iterator(Symbol(this, 0, 0)); + + const char *buf = getSymbolTable().begin(); + if (kind() == K_GNU) { + uint32_t symbol_count = 0; + symbol_count = read32be(buf); + buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); + } else if (kind() == K_MIPS64) { + uint64_t symbol_count = read64be(buf); + buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); + } else if (kind() == K_BSD) { + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + uint32_t ranlib_count = 0; + ranlib_count = read32le(buf) / 8; + const char *ranlibs = buf + 4; + uint32_t ran_strx = 0; + ran_strx = read32le(ranlibs); + buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint32_t); + buf += ran_strx; + } else if (kind() == K_DARWIN64) { + // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t + // which is the number of bytes of ranlib_64 structs that follow. The + // ranlib_64 structs are a pair of uint64_t's the first being a string + // table offset and the second being the offset into the archive of the + // member that define the symbol. After that the next uint64_t is the byte + // count of the string table followed by the string table. + uint64_t ranlib_count = 0; + ranlib_count = read64le(buf) / 16; + const char *ranlibs = buf + 8; + uint64_t ran_strx = 0; + ran_strx = read64le(ranlibs); + buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint64_t); + buf += ran_strx; + } else { + uint32_t member_count = 0; + uint32_t symbol_count = 0; + member_count = read32le(buf); + buf += 4 + (member_count * 4); // Skip offsets. + symbol_count = read32le(buf); + buf += 4 + (symbol_count * 2); // Skip indices. + } + uint32_t string_start_offset = buf - getSymbolTable().begin(); + return symbol_iterator(Symbol(this, 0, string_start_offset)); +} + +Archive::symbol_iterator Archive::symbol_end() const { + return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); +} + +uint32_t Archive::getNumberOfSymbols() const { + if (!hasSymbolTable()) + return 0; + const char *buf = getSymbolTable().begin(); + if (kind() == K_GNU) + return read32be(buf); + if (kind() == K_MIPS64) + return read64be(buf); + if (kind() == K_BSD) + return read32le(buf) / 8; + if (kind() == K_DARWIN64) + return read64le(buf) / 16; + uint32_t member_count = 0; + member_count = read32le(buf); + buf += 4 + (member_count * 4); // Skip offsets. + return read32le(buf); +} + +Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { + Archive::symbol_iterator bs = symbol_begin(); + Archive::symbol_iterator es = symbol_end(); + + for (; bs != es; ++bs) { + StringRef SymName = bs->getName(); + if (SymName == name) { + if (auto MemberOrErr = bs->getMember()) + return Child(*MemberOrErr); + else + return MemberOrErr.takeError(); + } + } + return Optional<Child>(); +} + +// Returns true if archive file contains no member file. +bool Archive::isEmpty() const { return Data.getBufferSize() == 8; } + +bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp new file mode 100644 index 000000000000..4034f9039dda --- /dev/null +++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp @@ -0,0 +1,468 @@ +//===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeArchive function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; + +NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), + MemberName(BufRef.getBufferIdentifier()) {} + +Expected<NewArchiveMember> +NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, + bool Deterministic) { + Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); + if (!BufOrErr) + return BufOrErr.takeError(); + + NewArchiveMember M; + assert(M.IsNew == false); + M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + M.MemberName = M.Buf->getBufferIdentifier(); + if (!Deterministic) { + auto ModTimeOrErr = OldMember.getLastModified(); + if (!ModTimeOrErr) + return ModTimeOrErr.takeError(); + M.ModTime = ModTimeOrErr.get(); + Expected<unsigned> UIDOrErr = OldMember.getUID(); + if (!UIDOrErr) + return UIDOrErr.takeError(); + M.UID = UIDOrErr.get(); + Expected<unsigned> GIDOrErr = OldMember.getGID(); + if (!GIDOrErr) + return GIDOrErr.takeError(); + M.GID = GIDOrErr.get(); + Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); + if (!AccessModeOrErr) + return AccessModeOrErr.takeError(); + M.Perms = AccessModeOrErr.get(); + } + return std::move(M); +} + +Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, + bool Deterministic) { + sys::fs::file_status Status; + int FD; + if (auto EC = sys::fs::openFileForRead(FileName, FD)) + return errorCodeToError(EC); + assert(FD != -1); + + if (auto EC = sys::fs::status(FD, Status)) + return errorCodeToError(EC); + + // Opening a directory doesn't make sense. Let it fail. + // Linux cannot open directories with open(2), although + // cygwin and *bsd can. + if (Status.type() == sys::fs::file_type::directory_file) + return errorCodeToError(make_error_code(errc::is_a_directory)); + + ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = + MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); + if (!MemberBufferOrErr) + return errorCodeToError(MemberBufferOrErr.getError()); + + if (close(FD) != 0) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + NewArchiveMember M; + M.IsNew = true; + M.Buf = std::move(*MemberBufferOrErr); + M.MemberName = M.Buf->getBufferIdentifier(); + if (!Deterministic) { + M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( + Status.getLastModificationTime()); + M.UID = Status.getUser(); + M.GID = Status.getGroup(); + M.Perms = Status.permissions(); + } + return std::move(M); +} + +template <typename T> +static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, + bool MayTruncate = false) { + uint64_t OldPos = OS.tell(); + OS << Data; + unsigned SizeSoFar = OS.tell() - OldPos; + if (Size > SizeSoFar) { + OS.indent(Size - SizeSoFar); + } else if (Size < SizeSoFar) { + assert(MayTruncate && "Data doesn't fit in Size"); + // Some of the data this is used for (like UID) can be larger than the + // space available in the archive format. Truncate in that case. + OS.seek(OldPos + Size); + } +} + +static bool isBSDLike(object::Archive::Kind Kind) { + switch (Kind) { + case object::Archive::K_GNU: + return false; + case object::Archive::K_BSD: + case object::Archive::K_DARWIN: + return true; + case object::Archive::K_MIPS64: + case object::Archive::K_DARWIN64: + case object::Archive::K_COFF: + break; + } + llvm_unreachable("not supported for writting"); +} + +static void print32(raw_ostream &Out, object::Archive::Kind Kind, + uint32_t Val) { + if (isBSDLike(Kind)) + support::endian::Writer<support::little>(Out).write(Val); + else + support::endian::Writer<support::big>(Out).write(Val); +} + +static void printRestOfMemberHeader( + raw_fd_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); + printWithSpacePadding(Out, UID, 6, true); + printWithSpacePadding(Out, GID, 6, true); + printWithSpacePadding(Out, format("%o", Perms), 8); + printWithSpacePadding(Out, Size, 10); + Out << "`\n"; +} + +static void +printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + unsigned Size) { + printWithSpacePadding(Out, Twine(Name) + "/", 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +static void +printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + unsigned Size) { + uint64_t PosAfterHeader = Out.tell() + 60 + Name.size(); + // Pad so that even 64 bit object files are aligned. + unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); + unsigned NameWithPadding = Name.size() + Pad; + printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, + NameWithPadding + Size); + Out << Name; + assert(PosAfterHeader == Out.tell()); + while (Pad--) + Out.write(uint8_t(0)); +} + +static bool useStringTable(bool Thin, StringRef Name) { + return Thin || Name.size() >= 16 || Name.contains('/'); +} + +static void +printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin, + StringRef Name, + std::vector<unsigned>::iterator &StringMapIndexIter, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + if (isBSDLike(Kind)) + return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); + if (!useStringTable(Thin, Name)) + return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); + Out << '/'; + printWithSpacePadding(Out, *StringMapIndexIter++, 15); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +// Compute the relative path from From to To. +static std::string computeRelativePath(StringRef From, StringRef To) { + if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) + return To; + + StringRef DirFrom = sys::path::parent_path(From); + auto FromI = sys::path::begin(DirFrom); + auto ToI = sys::path::begin(To); + while (*FromI == *ToI) { + ++FromI; + ++ToI; + } + + SmallString<128> Relative; + for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) + sys::path::append(Relative, ".."); + + for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) + sys::path::append(Relative, *ToI); + +#ifdef LLVM_ON_WIN32 + // Replace backslashes with slashes so that the path is portable between *nix + // and Windows. + std::replace(Relative.begin(), Relative.end(), '\\', '/'); +#endif + + return Relative.str(); +} + +static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, + ArrayRef<NewArchiveMember> Members, + std::vector<unsigned> &StringMapIndexes, + bool Thin) { + unsigned StartOffset = 0; + for (const NewArchiveMember &M : Members) { + StringRef Path = M.Buf->getBufferIdentifier(); + StringRef Name = M.MemberName; + if (!useStringTable(Thin, Name)) + continue; + if (StartOffset == 0) { + printWithSpacePadding(Out, "//", 58); + Out << "`\n"; + StartOffset = Out.tell(); + } + StringMapIndexes.push_back(Out.tell() - StartOffset); + + if (Thin) { + if (M.IsNew) + Out << computeRelativePath(ArcName, Path); + else + Out << M.Buf->getBufferIdentifier(); + } else + Out << Name; + + Out << "/\n"; + } + if (StartOffset == 0) + return; + if (Out.tell() % 2) + Out << '\n'; + int Pos = Out.tell(); + Out.seek(StartOffset - 12); + printWithSpacePadding(Out, Pos - StartOffset, 10); + Out.seek(Pos); +} + +static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { + using namespace std::chrono; + + if (!Deterministic) + return time_point_cast<seconds>(system_clock::now()); + return sys::TimePoint<seconds>(); +} + +// Returns the offset of the first reference to a member offset. +static ErrorOr<unsigned> +writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, + ArrayRef<NewArchiveMember> Members, + std::vector<unsigned> &MemberOffsetRefs, bool Deterministic) { + unsigned HeaderStartOffset = 0; + unsigned BodyStartOffset = 0; + SmallString<128> NameBuf; + raw_svector_ostream NameOS(NameBuf); + LLVMContext Context; + for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) { + MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); + Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr = + object::SymbolicFile::createSymbolicFile( + MemberBuffer, llvm::file_magic::unknown, &Context); + if (!ObjOrErr) { + // FIXME: check only for "not an object file" errors. + consumeError(ObjOrErr.takeError()); + continue; + } + object::SymbolicFile &Obj = *ObjOrErr.get(); + + if (!HeaderStartOffset) { + HeaderStartOffset = Out.tell(); + if (isBSDLike(Kind)) + printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0); + else + printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); + BodyStartOffset = Out.tell(); + print32(Out, Kind, 0); // number of entries or bytes + } + + for (const object::BasicSymbolRef &S : Obj.symbols()) { + uint32_t Symflags = S.getFlags(); + if (Symflags & object::SymbolRef::SF_FormatSpecific) + continue; + if (!(Symflags & object::SymbolRef::SF_Global)) + continue; + if (Symflags & object::SymbolRef::SF_Undefined) + continue; + + unsigned NameOffset = NameOS.tell(); + if (auto EC = S.printName(NameOS)) + return EC; + NameOS << '\0'; + MemberOffsetRefs.push_back(MemberNum); + if (isBSDLike(Kind)) + print32(Out, Kind, NameOffset); + print32(Out, Kind, 0); // member offset + } + } + + if (HeaderStartOffset == 0) + return 0; + + // ld64 prefers the cctools type archive which pads its string table to a + // boundary of sizeof(int32_t). + if (isBSDLike(Kind)) + for (unsigned P = OffsetToAlignment(NameOS.tell(), sizeof(int32_t)); P--;) + NameOS << '\0'; + + StringRef StringTable = NameOS.str(); + if (isBSDLike(Kind)) + print32(Out, Kind, StringTable.size()); // byte count of the string table + Out << StringTable; + // If there are no symbols, emit an empty symbol table, to satisfy Solaris + // tools, older versions of which expect a symbol table in a non-empty + // archive, regardless of whether there are any symbols in it. + if (StringTable.size() == 0) + print32(Out, Kind, 0); + + // ld64 requires the next member header to start at an offset that is + // 4 bytes aligned. + unsigned Pad = OffsetToAlignment(Out.tell(), 4); + while (Pad--) + Out.write(uint8_t(0)); + + // Patch up the size of the symbol table now that we know how big it is. + unsigned Pos = Out.tell(); + const unsigned MemberHeaderSize = 60; + Out.seek(HeaderStartOffset + 48); // offset of the size field. + printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10); + + // Patch up the number of symbols. + Out.seek(BodyStartOffset); + unsigned NumSyms = MemberOffsetRefs.size(); + if (isBSDLike(Kind)) + print32(Out, Kind, NumSyms * 8); + else + print32(Out, Kind, NumSyms); + + Out.seek(Pos); + return BodyStartOffset + 4; +} + +std::pair<StringRef, std::error_code> +llvm::writeArchive(StringRef ArcName, + std::vector<NewArchiveMember> &NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin, + std::unique_ptr<MemoryBuffer> OldArchiveBuf) { + assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); + SmallString<128> TmpArchive; + int TmpArchiveFD; + if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", + TmpArchiveFD, TmpArchive)) + return std::make_pair(ArcName, EC); + + tool_output_file Output(TmpArchive, TmpArchiveFD); + raw_fd_ostream &Out = Output.os(); + if (Thin) + Out << "!<thin>\n"; + else + Out << "!<arch>\n"; + + std::vector<unsigned> MemberOffsetRefs; + + unsigned MemberReferenceOffset = 0; + if (WriteSymtab) { + ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable( + Out, Kind, NewMembers, MemberOffsetRefs, Deterministic); + if (auto EC = MemberReferenceOffsetOrErr.getError()) + return std::make_pair(ArcName, EC); + MemberReferenceOffset = MemberReferenceOffsetOrErr.get(); + } + + std::vector<unsigned> StringMapIndexes; + if (!isBSDLike(Kind)) + writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin); + + std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin(); + std::vector<unsigned> MemberOffset; + for (const NewArchiveMember &M : NewMembers) { + MemoryBufferRef File = M.Buf->getMemBufferRef(); + unsigned Padding = 0; + + unsigned Pos = Out.tell(); + MemberOffset.push_back(Pos); + + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. This matches the behaviour with cctools and ensures that ld64 + // is happy with archives that we generate. + if (Kind == object::Archive::K_DARWIN) + Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); + + printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter, + M.ModTime, M.UID, M.GID, M.Perms, + M.Buf->getBufferSize() + Padding); + + if (!Thin) + Out << File.getBuffer(); + + while (Padding--) + Out << '\n'; + if (Out.tell() % 2) + Out << '\n'; + } + + if (MemberReferenceOffset) { + Out.seek(MemberReferenceOffset); + for (unsigned MemberNum : MemberOffsetRefs) { + if (isBSDLike(Kind)) + Out.seek(Out.tell() + 4); // skip over the string offset + print32(Out, Kind, MemberOffset[MemberNum]); + } + } + + Output.keep(); + Out.close(); + + // At this point, we no longer need whatever backing memory + // was used to generate the NewMembers. On Windows, this buffer + // could be a mapped view of the file we want to replace (if + // we're updating an existing archive, say). In that case, the + // rename would still succeed, but it would leave behind a + // temporary file (actually the original file renamed) because + // a file cannot be deleted while there's a handle open on it, + // only renamed. So by freeing this buffer, this ensures that + // the last open handle on the destination file, if any, is + // closed before we attempt to rename. + OldArchiveBuf.reset(); + + sys::fs::rename(TmpArchive, ArcName); + return std::make_pair("", std::error_code()); +} diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp new file mode 100644 index 000000000000..c4565db459e6 --- /dev/null +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -0,0 +1,100 @@ +//===- Binary.cpp - A generic binary file ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Binary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/WindowsResource.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +Binary::~Binary() = default; + +Binary::Binary(unsigned int Type, MemoryBufferRef Source) + : TypeID(Type), Data(Source) {} + +StringRef Binary::getData() const { return Data.getBuffer(); } + +StringRef Binary::getFileName() const { return Data.getBufferIdentifier(); } + +MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } + +Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer, + LLVMContext *Context) { + file_magic Type = identify_magic(Buffer.getBuffer()); + + switch (Type) { + case file_magic::archive: + return Archive::create(Buffer); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + case file_magic::bitcode: + case file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context); + case file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case file_magic::windows_resource: + return WindowsResource::createWindowsResource(Buffer); + case file_magic::unknown: + case file_magic::coff_cl_gl_object: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); + } + llvm_unreachable("Unexpected Binary File Type"); +} + +Expected<OwningBinary<Binary>> object::createBinary(StringRef Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = FileOrErr.getError()) + return errorCodeToError(EC); + std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get(); + + Expected<std::unique_ptr<Binary>> BinOrErr = + createBinary(Buffer->getMemBufferRef()); + if (!BinOrErr) + return BinOrErr.takeError(); + std::unique_ptr<Binary> &Bin = BinOrErr.get(); + + return OwningBinary<Binary>(std::move(Bin), std::move(Buffer)); +} diff --git a/contrib/llvm/lib/Object/COFFImportFile.cpp b/contrib/llvm/lib/Object/COFFImportFile.cpp new file mode 100644 index 000000000000..740bf94d40e0 --- /dev/null +++ b/contrib/llvm/lib/Object/COFFImportFile.cpp @@ -0,0 +1,527 @@ +//===- COFFImportFile.cpp - COFF short import file implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeImportLibrary function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFImportFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +#include <cstdint> +#include <map> +#include <set> +#include <string> +#include <vector> + +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +namespace llvm { +namespace object { + +static bool is32bit(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template <class T> static void append(std::vector<uint8_t> &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector<uint8_t> &B, + ArrayRef<const std::string> Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + size_t Pos = B.size(); + size_t Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + support::endian::write32le(&B[Offset], Length); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName, + MachineTypes Machine) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static Expected<std::string> replace(StringRef S, StringRef From, + StringRef To) { + size_t Pos = S.find(From); + + // From and To may be mangled, but substrings in S may not. + if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) { + From = From.substr(1); + To = To.substr(1); + Pos = S.find(From); + } + + if (Pos == StringRef::npos) { + return make_error<StringError>( + StringRef(Twine(S + ": replacing '" + From + + "' with '" + To + "' failed").str()), object_error::parse_failed); + } + + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + MachineTypes Machine; + BumpPtrAllocator Alloc; + StringRef DLLName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S, MachineTypes M) + : Machine(M), DLLName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportType Type, ImportNameType NameType); +}; +} // namespace + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 7; + static const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (DLLName.size() + 1)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(DLLName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + static const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation(Machine))}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + DLLName.size() + 1); + memcpy(&Buffer[S], DLLName.data(), DLLName.size()); + Buffer[S + DLLName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + // TODO: Name.Offset.Offset here and in the all similar places below + // suggests a names refactoring. Maybe StringTableOffset.Value? + SymbolTable[0].Name.Offset.Offset = + sizeof(uint32_t); + SymbolTable[5].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + SymbolTable[6].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 1; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit(Machine) ? 4 : 8; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + VASize + + // .idata$4 + VASize), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + VASize), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, DLLName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportType ImportType, + ImportNameType NameType) { + size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate<char>(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast<coff_import_header *>(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (NameType << 2) | ImportType; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, DLLName.data(), DLLName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), DLLName)}; +} + +std::error_code writeImportLibrary(StringRef DLLName, StringRef Path, + ArrayRef<COFFShortExport> Exports, + MachineTypes Machine) { + + std::vector<NewArchiveMember> Members; + ObjectFactory OF(llvm::sys::path::filename(DLLName), Machine); + + std::vector<uint8_t> ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector<uint8_t> NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector<uint8_t> NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (COFFShortExport E : Exports) { + if (E.Private) + continue; + + ImportType ImportType = IMPORT_CODE; + if (E.Data) + ImportType = IMPORT_DATA; + if (E.Constant) + ImportType = IMPORT_CONST; + + StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name; + ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); + Expected<std::string> Name = E.ExtName.empty() + ? SymbolName + : replace(SymbolName, E.Name, E.ExtName); + + if (!Name) { + return errorToErrorCode(Name.takeError()); + } + + Members.push_back( + OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); + } + + std::pair<StringRef, std::error_code> Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + + return Result.second; +} + +} // namespace object +} // namespace llvm diff --git a/contrib/llvm/lib/Object/COFFModuleDefinition.cpp b/contrib/llvm/lib/Object/COFFModuleDefinition.cpp new file mode 100644 index 000000000000..0d69cb6b709c --- /dev/null +++ b/contrib/llvm/lib/Object/COFFModuleDefinition.cpp @@ -0,0 +1,319 @@ +//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::COFF; +using namespace llvm; + +namespace llvm { +namespace object { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + KwBase, + KwConstant, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym) { + return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +} + +static Error createError(const Twine &Err) { + return make_error<StringError>(StringRef(Err.str()), + object_error::parse_failed); +} + +class Lexer { +public: + Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch<Kind>(Word) + .Case("BASE", KwBase) + .Case("CONSTANT", KwConstant) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S, MachineTypes M) : Lex(S), Machine(M) {} + + Expected<COFFModuleDefinition> parse() { + do { + if (Error Err = parseOne()) + return std::move(Err); + } while (Tok.K != Eof); + return Info; + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + Error readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + return createError("integer expected"); + return Error::success(); + } + + Error expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + return createError(Msg); + return Error::success(); + } + + void unget() { Stack.push_back(Tok); } + + Error parseOne() { + read(); + switch (Tok.K) { + case Eof: + return Error::success(); + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return Error::success(); + } + if (Error Err = parseExport()) + return Err; + } + case KwHeapsize: + return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); + case KwStacksize: + return parseNumbers(&Info.StackReserve, &Info.StackCommit); + case KwLibrary: + case KwName: { + bool IsDll = Tok.K == KwLibrary; // Check before parseName. + std::string Name; + if (Error Err = parseName(&Name, &Info.ImageBase)) + return Err; + // Append the appropriate file extension if not already present. + StringRef Ext = IsDll ? ".dll" : ".exe"; + if (!StringRef(Name).endswith_lower(Ext)) + Name += Ext; + + // Set the output file, but don't override /out if it was already passed. + if (Info.OutputFile.empty()) + Info.OutputFile = Name; + return Error::success(); + } + case KwVersion: + return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); + default: + return createError("unknown directive: " + Tok.Value); + } + } + + Error parseExport() { + COFFShortExport E; + E.Name = Tok.Value; + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = Tok.Value; + } else { + unget(); + } + + if (Machine == IMAGE_FILE_MACHINE_I386) { + if (!isDecorated(E.Name)) + E.Name = (std::string("_").append(E.Name)); + if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + E.ExtName = (std::string("_").append(E.ExtName)); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + Tok.Value.drop_front().getAsInteger(10, E.Ordinal); + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwConstant) { + E.Constant = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + if (Error Err = readAsInt(Reserve)) + return Err; + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return Error::success(); + } + if (Error Err = readAsInt(Commit)) + return Err; + return Error::success(); + } + + // NAME outputPath [BASE=address] + Error parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = Tok.Value; + } else { + *Out = ""; + unget(); + return Error::success(); + } + read(); + if (Tok.K == KwBase) { + if (Error Err = expect(Equal, "'=' expected")) + return Err; + if (Error Err = readAsInt(Baseaddr)) + return Err; + } else { + unget(); + *Baseaddr = 0; + } + return Error::success(); + } + + // VERSION major[.minor] + Error parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + return createError("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + return createError("integer expected, but got " + Tok.Value); + return Error::success(); + } + + Lexer Lex; + Token Tok; + std::vector<Token> Stack; + MachineTypes Machine; + COFFModuleDefinition Info; +}; + +Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, + MachineTypes Machine) { + return Parser(MB.getBuffer(), Machine).parse(); +} + +} // namespace object +} // namespace llvm diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp new file mode 100644 index 000000000000..1e9b0c5b0454 --- /dev/null +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -0,0 +1,1679 @@ +//===- COFFObjectFile.cpp - COFF object file implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the COFFObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +using support::ulittle16_t; +using support::ulittle32_t; +using support::ulittle64_t; +using support::little16_t; + +// Returns false if size is greater than the buffer size. And sets ec. +static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) { + if (M.getBufferSize() < Size) { + EC = object_error::unexpected_eof; + return false; + } + return true; +} + +static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr, + const uint64_t Size) { + if (Addr + Size < Addr || Addr + Size < Size || + Addr + Size > uintptr_t(M.getBufferEnd()) || + Addr < uintptr_t(M.getBufferStart())) { + return object_error::unexpected_eof; + } + return std::error_code(); +} + +// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. +// Returns unexpected_eof if error. +template <typename T> +static std::error_code getObject(const T *&Obj, MemoryBufferRef M, + const void *Ptr, + const uint64_t Size = sizeof(T)) { + uintptr_t Addr = uintptr_t(Ptr); + if (std::error_code EC = checkOffset(M, Addr, Size)) + return EC; + Obj = reinterpret_cast<const T *>(Addr); + return std::error_code(); +} + +// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without +// prefixed slashes. +static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) { + assert(Str.size() <= 6 && "String too long, possible overflow."); + if (Str.size() > 6) + return true; + + uint64_t Value = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25 + CharVal = Str[0] - 'A'; + else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51 + CharVal = Str[0] - 'a' + 26; + else if (Str[0] >= '0' && Str[0] <= '9') // 52..61 + CharVal = Str[0] - '0' + 52; + else if (Str[0] == '+') // 62 + CharVal = 62; + else if (Str[0] == '/') // 63 + CharVal = 63; + else + return true; + + Value = (Value * 64) + CharVal; + Str = Str.substr(1); + } + + if (Value > std::numeric_limits<uint32_t>::max()) + return true; + + Result = static_cast<uint32_t>(Value); + return false; +} + +template <typename coff_symbol_type> +const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { + const coff_symbol_type *Addr = + reinterpret_cast<const coff_symbol_type *>(Ref.p); + + assert(!checkOffset(Data, uintptr_t(Addr), sizeof(*Addr))); +#ifndef NDEBUG + // Verify that the symbol points to a valid entry in the symbol table. + uintptr_t Offset = uintptr_t(Addr) - uintptr_t(base()); + + assert((Offset - getPointerToSymbolTable()) % sizeof(coff_symbol_type) == 0 && + "Symbol did not point to the beginning of a symbol"); +#endif + + return Addr; +} + +const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { + const coff_section *Addr = reinterpret_cast<const coff_section*>(Ref.p); + +#ifndef NDEBUG + // Verify that the section points to a valid entry in the section table. + if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) + report_fatal_error("Section was outside of section table."); + + uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable); + assert(Offset % sizeof(coff_section) == 0 && + "Section did not point to the beginning of a section"); +#endif + + return Addr; +} + +void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const { + auto End = reinterpret_cast<uintptr_t>(StringTable); + if (SymbolTable16) { + const coff_symbol16 *Symb = toSymb<coff_symbol16>(Ref); + Symb += 1 + Symb->NumberOfAuxSymbols; + Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End); + } else if (SymbolTable32) { + const coff_symbol32 *Symb = toSymb<coff_symbol32>(Ref); + Symb += 1 + Symb->NumberOfAuxSymbols; + Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End); + } else { + llvm_unreachable("no symbol table pointer!"); + } +} + +Expected<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + StringRef Result; + if (std::error_code EC = getSymbolName(Symb, Result)) + return errorCodeToError(EC); + return Result; +} + +uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const { + return getCOFFSymbol(Ref).getValue(); +} + +uint32_t COFFObjectFile::getSymbolAlignment(DataRefImpl Ref) const { + // MSVC/link.exe seems to align symbols to the next-power-of-2 + // up to 32 bytes. + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return std::min(uint64_t(32), PowerOf2Ceil(Symb.getValue())); +} + +Expected<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { + uint64_t Result = getSymbolValue(Ref); + COFFSymbolRef Symb = getCOFFSymbol(Ref); + int32_t SectionNumber = Symb.getSectionNumber(); + + if (Symb.isAnyUndefined() || Symb.isCommon() || + COFF::isReservedSectionNumber(SectionNumber)) + return Result; + + const coff_section *Section = nullptr; + if (std::error_code EC = getSection(SectionNumber, Section)) + return errorCodeToError(EC); + Result += Section->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + + return Result; +} + +Expected<SymbolRef::Type> COFFObjectFile::getSymbolType(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + int32_t SectionNumber = Symb.getSectionNumber(); + + if (Symb.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) + return SymbolRef::ST_Function; + if (Symb.isAnyUndefined()) + return SymbolRef::ST_Unknown; + if (Symb.isCommon()) + return SymbolRef::ST_Data; + if (Symb.isFileRecord()) + return SymbolRef::ST_File; + + // TODO: perhaps we need a new symbol type ST_Section. + if (SectionNumber == COFF::IMAGE_SYM_DEBUG || Symb.isSectionDefinition()) + return SymbolRef::ST_Debug; + + if (!COFF::isReservedSectionNumber(SectionNumber)) + return SymbolRef::ST_Data; + + return SymbolRef::ST_Other; +} + +uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + uint32_t Result = SymbolRef::SF_None; + + if (Symb.isExternal() || Symb.isWeakExternal()) + Result |= SymbolRef::SF_Global; + + if (Symb.isWeakExternal()) + Result |= SymbolRef::SF_Weak; + + if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) + Result |= SymbolRef::SF_Absolute; + + if (Symb.isFileRecord()) + Result |= SymbolRef::SF_FormatSpecific; + + if (Symb.isSectionDefinition()) + Result |= SymbolRef::SF_FormatSpecific; + + if (Symb.isCommon()) + Result |= SymbolRef::SF_Common; + + if (Symb.isAnyUndefined()) + Result |= SymbolRef::SF_Undefined; + + return Result; +} + +uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return Symb.getValue(); +} + +Expected<section_iterator> +COFFObjectFile::getSymbolSection(DataRefImpl Ref) const { + COFFSymbolRef Symb = getCOFFSymbol(Ref); + if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) + return section_end(); + const coff_section *Sec = nullptr; + if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec)) + return errorCodeToError(EC); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(Sec); + return section_iterator(SectionRef(Ret, this)); +} + +unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const { + COFFSymbolRef Symb = getCOFFSymbol(Sym.getRawDataRefImpl()); + return Symb.getSectionNumber(); +} + +void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const { + const coff_section *Sec = toSec(Ref); + Sec += 1; + Ref.p = reinterpret_cast<uintptr_t>(Sec); +} + +std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref, + StringRef &Result) const { + const coff_section *Sec = toSec(Ref); + return getSectionName(Sec, Result); +} + +uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + uint64_t Result = Sec->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + return Result; +} + +uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + return toSec(Sec) - SectionTable; +} + +uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { + return getSectionSize(toSec(Ref)); +} + +std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref, + StringRef &Result) const { + const coff_section *Sec = toSec(Ref); + ArrayRef<uint8_t> Res; + std::error_code EC = getSectionContents(Sec, Res); + Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size()); + return EC; +} + +uint64_t COFFObjectFile::getSectionAlignment(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->getAlignment(); +} + +bool COFFObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool COFFObjectFile::isSectionText(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; +} + +bool COFFObjectFile::isSectionData(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + return Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; +} + +bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const uint32_t BssFlags = COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + return (Sec->Characteristics & BssFlags) == BssFlags; +} + +unsigned COFFObjectFile::getSectionID(SectionRef Sec) const { + uintptr_t Offset = + uintptr_t(Sec.getRawDataRefImpl().p) - uintptr_t(SectionTable); + assert((Offset % sizeof(coff_section)) == 0); + return (Offset / sizeof(coff_section)) + 1; +} + +bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + return Sec->PointerToRawData == 0; +} + +static uint32_t getNumberOfRelocations(const coff_section *Sec, + MemoryBufferRef M, const uint8_t *base) { + // The field for the number of relocations in COFF section table is only + // 16-bit wide. If a section has more than 65535 relocations, 0xFFFF is set to + // NumberOfRelocations field, and the actual relocation count is stored in the + // VirtualAddress field in the first relocation entry. + if (Sec->hasExtendedRelocations()) { + const coff_relocation *FirstReloc; + if (getObject(FirstReloc, M, reinterpret_cast<const coff_relocation*>( + base + Sec->PointerToRelocations))) + return 0; + // -1 to exclude this first relocation entry. + return FirstReloc->VirtualAddress - 1; + } + return Sec->NumberOfRelocations; +} + +static const coff_relocation * +getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) { + uint64_t NumRelocs = getNumberOfRelocations(Sec, M, Base); + if (!NumRelocs) + return nullptr; + auto begin = reinterpret_cast<const coff_relocation *>( + Base + Sec->PointerToRelocations); + if (Sec->hasExtendedRelocations()) { + // Skip the first relocation entry repurposed to store the number of + // relocations. + begin++; + } + if (checkOffset(M, uintptr_t(begin), sizeof(coff_relocation) * NumRelocs)) + return nullptr; + return begin; +} + +relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const coff_relocation *begin = getFirstReloc(Sec, Data, base()); + if (begin && Sec->VirtualAddress != 0) + report_fatal_error("Sections with relocations should have an address of 0"); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(begin); + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const { + const coff_section *Sec = toSec(Ref); + const coff_relocation *I = getFirstReloc(Sec, Data, base()); + if (I) + I += getNumberOfRelocations(Sec, Data, base()); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(I); + return relocation_iterator(RelocationRef(Ret, this)); +} + +// Initialize the pointer to the symbol table. +std::error_code COFFObjectFile::initSymbolTablePtr() { + if (COFFHeader) + if (std::error_code EC = getObject( + SymbolTable16, Data, base() + getPointerToSymbolTable(), + (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize())) + return EC; + + if (COFFBigObjHeader) + if (std::error_code EC = getObject( + SymbolTable32, Data, base() + getPointerToSymbolTable(), + (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize())) + return EC; + + // Find string table. The first four byte of the string table contains the + // total size of the string table, including the size field itself. If the + // string table is empty, the value of the first four byte would be 4. + uint32_t StringTableOffset = getPointerToSymbolTable() + + getNumberOfSymbols() * getSymbolTableEntrySize(); + const uint8_t *StringTableAddr = base() + StringTableOffset; + const ulittle32_t *StringTableSizePtr; + if (std::error_code EC = getObject(StringTableSizePtr, Data, StringTableAddr)) + return EC; + StringTableSize = *StringTableSizePtr; + if (std::error_code EC = + getObject(StringTable, Data, StringTableAddr, StringTableSize)) + return EC; + + // Treat table sizes < 4 as empty because contrary to the PECOFF spec, some + // tools like cvtres write a size of 0 for an empty table instead of 4. + if (StringTableSize < 4) + StringTableSize = 4; + + // Check that the string table is null terminated if has any in it. + if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0) + return object_error::parse_failed; + return std::error_code(); +} + +uint64_t COFFObjectFile::getImageBase() const { + if (PE32Header) + return PE32Header->ImageBase; + else if (PE32PlusHeader) + return PE32PlusHeader->ImageBase; + // This actually comes up in practice. + return 0; +} + +// Returns the file offset for the given VA. +std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { + uint64_t ImageBase = getImageBase(); + uint64_t Rva = Addr - ImageBase; + assert(Rva <= UINT32_MAX); + return getRvaPtr((uint32_t)Rva, Res); +} + +// Returns the file offset for the given RVA. +std::error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { + for (const SectionRef &S : sections()) { + const coff_section *Section = getCOFFSection(S); + uint32_t SectionStart = Section->VirtualAddress; + uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize; + if (SectionStart <= Addr && Addr < SectionEnd) { + uint32_t Offset = Addr - SectionStart; + Res = uintptr_t(base()) + Section->PointerToRawData + Offset; + return std::error_code(); + } + } + return object_error::parse_failed; +} + +std::error_code +COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size, + ArrayRef<uint8_t> &Contents) const { + for (const SectionRef &S : sections()) { + const coff_section *Section = getCOFFSection(S); + uint32_t SectionStart = Section->VirtualAddress; + // Check if this RVA is within the section bounds. Be careful about integer + // overflow. + uint32_t OffsetIntoSection = RVA - SectionStart; + if (SectionStart <= RVA && OffsetIntoSection < Section->VirtualSize && + Size <= Section->VirtualSize - OffsetIntoSection) { + uintptr_t Begin = + uintptr_t(base()) + Section->PointerToRawData + OffsetIntoSection; + Contents = + ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Begin), Size); + return std::error_code(); + } + } + return object_error::parse_failed; +} + +// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name +// table entry. +std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint, + StringRef &Name) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(Rva, IntPtr)) + return EC; + const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr); + Hint = *reinterpret_cast<const ulittle16_t *>(Ptr); + Name = StringRef(reinterpret_cast<const char *>(Ptr + 2)); + return std::error_code(); +} + +std::error_code +COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, + const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { + ArrayRef<uint8_t> InfoBytes; + if (std::error_code EC = getRvaAndSizeAsBytes( + DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes)) + return EC; + if (InfoBytes.size() < sizeof(*PDBInfo) + 1) + return object_error::parse_failed; + PDBInfo = reinterpret_cast<const codeview::DebugInfo *>(InfoBytes.data()); + InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo)); + PDBFileName = StringRef(reinterpret_cast<const char *>(InfoBytes.data()), + InfoBytes.size()); + // Truncate the name at the first null byte. Ignore any padding. + PDBFileName = PDBFileName.split('\0').first; + return std::error_code(); +} + +std::error_code +COFFObjectFile::getDebugPDBInfo(const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { + for (const debug_directory &D : debug_directories()) + if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) + return getDebugPDBInfo(&D, PDBInfo, PDBFileName); + // If we get here, there is no PDB info to return. + PDBInfo = nullptr; + PDBFileName = StringRef(); + return std::error_code(); +} + +// Find the import table. +std::error_code COFFObjectFile::initImportTablePtr() { + // First, we get the RVA of the import table. If the file lacks a pointer to + // the import table, do nothing. + const data_directory *DataEntry; + if (getDataDirectory(COFF::IMPORT_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the pointer to import table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress; + + // Find the section that contains the RVA. This is needed because the RVA is + // the import table's memory address which is different from its file offset. + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(ImportTableRva, IntPtr)) + return EC; + if (std::error_code EC = checkOffset(Data, IntPtr, DataEntry->Size)) + return EC; + ImportDirectory = reinterpret_cast< + const coff_import_directory_table_entry *>(IntPtr); + return std::error_code(); +} + +// Initializes DelayImportDirectory and NumberOfDelayImportDirectory. +std::error_code COFFObjectFile::initDelayImportTablePtr() { + const data_directory *DataEntry; + if (getDataDirectory(COFF::DELAY_IMPORT_DESCRIPTOR, DataEntry)) + return std::error_code(); + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uint32_t RVA = DataEntry->RelativeVirtualAddress; + NumberOfDelayImportDirectory = DataEntry->Size / + sizeof(delay_import_directory_table_entry) - 1; + + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(RVA, IntPtr)) + return EC; + DelayImportDirectory = reinterpret_cast< + const delay_import_directory_table_entry *>(IntPtr); + return std::error_code(); +} + +// Find the export table. +std::error_code COFFObjectFile::initExportTablePtr() { + // First, we get the RVA of the export table. If the file lacks a pointer to + // the export table, do nothing. + const data_directory *DataEntry; + if (getDataDirectory(COFF::EXPORT_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the pointer to export table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress; + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(ExportTableRva, IntPtr)) + return EC; + ExportDirectory = + reinterpret_cast<const export_directory_table_entry *>(IntPtr); + return std::error_code(); +} + +std::error_code COFFObjectFile::initBaseRelocPtr() { + const data_directory *DataEntry; + if (getDataDirectory(COFF::BASE_RELOCATION_TABLE, DataEntry)) + return std::error_code(); + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + BaseRelocHeader = reinterpret_cast<const coff_base_reloc_block_header *>( + IntPtr); + BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>( + IntPtr + DataEntry->Size); + return std::error_code(); +} + +std::error_code COFFObjectFile::initDebugDirectoryPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry; + if (getDataDirectory(COFF::DEBUG_DIRECTORY, DataEntry)) + return std::error_code(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + + // Check that the size is a multiple of the entry size. + if (DataEntry->Size % sizeof(debug_directory) != 0) + return object_error::parse_failed; + + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + DebugDirectoryBegin = reinterpret_cast<const debug_directory *>(IntPtr); + if (std::error_code EC = getRvaPtr( + DataEntry->RelativeVirtualAddress + DataEntry->Size, IntPtr)) + return EC; + DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(IntPtr); + return std::error_code(); +} + +std::error_code COFFObjectFile::initLoadConfigPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry; + if (getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + + LoadConfig = (const void *)IntPtr; + return std::error_code(); +} + +COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) + : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr), + COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), + DataDirectory(nullptr), SectionTable(nullptr), SymbolTable16(nullptr), + SymbolTable32(nullptr), StringTable(nullptr), StringTableSize(0), + ImportDirectory(nullptr), + DelayImportDirectory(nullptr), NumberOfDelayImportDirectory(0), + ExportDirectory(nullptr), BaseRelocHeader(nullptr), BaseRelocEnd(nullptr), + DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr) { + // Check that we at least have enough room for a header. + if (!checkSize(Data, EC, sizeof(coff_file_header))) + return; + + // The current location in the file where we are looking at. + uint64_t CurPtr = 0; + + // PE header is optional and is present only in executables. If it exists, + // it is placed right after COFF header. + bool HasPEHeader = false; + + // Check if this is a PE/COFF file. + if (checkSize(Data, EC, sizeof(dos_header) + sizeof(COFF::PEMagic))) { + // PE/COFF, seek through MS-DOS compatibility stub and 4-byte + // PE signature to find 'normal' COFF header. + const auto *DH = reinterpret_cast<const dos_header *>(base()); + if (DH->Magic[0] == 'M' && DH->Magic[1] == 'Z') { + CurPtr = DH->AddressOfNewExeHeader; + // Check the PE magic bytes. ("PE\0\0") + if (memcmp(base() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != 0) { + EC = object_error::parse_failed; + return; + } + CurPtr += sizeof(COFF::PEMagic); // Skip the PE magic bytes. + HasPEHeader = true; + } + } + + if ((EC = getObject(COFFHeader, Data, base() + CurPtr))) + return; + + // It might be a bigobj file, let's check. Note that COFF bigobj and COFF + // import libraries share a common prefix but bigobj is more restrictive. + if (!HasPEHeader && COFFHeader->Machine == COFF::IMAGE_FILE_MACHINE_UNKNOWN && + COFFHeader->NumberOfSections == uint16_t(0xffff) && + checkSize(Data, EC, sizeof(coff_bigobj_file_header))) { + if ((EC = getObject(COFFBigObjHeader, Data, base() + CurPtr))) + return; + + // Verify that we are dealing with bigobj. + if (COFFBigObjHeader->Version >= COFF::BigObjHeader::MinBigObjectVersion && + std::memcmp(COFFBigObjHeader->UUID, COFF::BigObjMagic, + sizeof(COFF::BigObjMagic)) == 0) { + COFFHeader = nullptr; + CurPtr += sizeof(coff_bigobj_file_header); + } else { + // It's not a bigobj. + COFFBigObjHeader = nullptr; + } + } + if (COFFHeader) { + // The prior checkSize call may have failed. This isn't a hard error + // because we were just trying to sniff out bigobj. + EC = std::error_code(); + CurPtr += sizeof(coff_file_header); + + if (COFFHeader->isImportLibrary()) + return; + } + + if (HasPEHeader) { + const pe32_header *Header; + if ((EC = getObject(Header, Data, base() + CurPtr))) + return; + + const uint8_t *DataDirAddr; + uint64_t DataDirSize; + if (Header->Magic == COFF::PE32Header::PE32) { + PE32Header = Header; + DataDirAddr = base() + CurPtr + sizeof(pe32_header); + DataDirSize = sizeof(data_directory) * PE32Header->NumberOfRvaAndSize; + } else if (Header->Magic == COFF::PE32Header::PE32_PLUS) { + PE32PlusHeader = reinterpret_cast<const pe32plus_header *>(Header); + DataDirAddr = base() + CurPtr + sizeof(pe32plus_header); + DataDirSize = sizeof(data_directory) * PE32PlusHeader->NumberOfRvaAndSize; + } else { + // It's neither PE32 nor PE32+. + EC = object_error::parse_failed; + return; + } + if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize))) + return; + } + + if (COFFHeader) + CurPtr += COFFHeader->SizeOfOptionalHeader; + + if ((EC = getObject(SectionTable, Data, base() + CurPtr, + (uint64_t)getNumberOfSections() * sizeof(coff_section)))) + return; + + // Initialize the pointer to the symbol table. + if (getPointerToSymbolTable() != 0) { + if ((EC = initSymbolTablePtr())) { + SymbolTable16 = nullptr; + SymbolTable32 = nullptr; + StringTable = nullptr; + StringTableSize = 0; + } + } else { + // We had better not have any symbols if we don't have a symbol table. + if (getNumberOfSymbols() != 0) { + EC = object_error::parse_failed; + return; + } + } + + // Initialize the pointer to the beginning of the import table. + if ((EC = initImportTablePtr())) + return; + if ((EC = initDelayImportTablePtr())) + return; + + // Initialize the pointer to the export table. + if ((EC = initExportTablePtr())) + return; + + // Initialize the pointer to the base relocation table. + if ((EC = initBaseRelocPtr())) + return; + + // Initialize the pointer to the export table. + if ((EC = initDebugDirectoryPtr())) + return; + + if ((EC = initLoadConfigPtr())) + return; + + EC = std::error_code(); +} + +basic_symbol_iterator COFFObjectFile::symbol_begin() const { + DataRefImpl Ret; + Ret.p = getSymbolTable(); + return basic_symbol_iterator(SymbolRef(Ret, this)); +} + +basic_symbol_iterator COFFObjectFile::symbol_end() const { + // The symbol table ends where the string table begins. + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(StringTable); + return basic_symbol_iterator(SymbolRef(Ret, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_begin() const { + if (!ImportDirectory) + return import_directory_end(); + if (ImportDirectory->isNull()) + return import_directory_end(); + return import_directory_iterator( + ImportDirectoryEntryRef(ImportDirectory, 0, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_end() const { + return import_directory_iterator( + ImportDirectoryEntryRef(nullptr, -1, this)); +} + +delay_import_directory_iterator +COFFObjectFile::delay_import_directory_begin() const { + return delay_import_directory_iterator( + DelayImportDirectoryEntryRef(DelayImportDirectory, 0, this)); +} + +delay_import_directory_iterator +COFFObjectFile::delay_import_directory_end() const { + return delay_import_directory_iterator( + DelayImportDirectoryEntryRef( + DelayImportDirectory, NumberOfDelayImportDirectory, this)); +} + +export_directory_iterator COFFObjectFile::export_directory_begin() const { + return export_directory_iterator( + ExportDirectoryEntryRef(ExportDirectory, 0, this)); +} + +export_directory_iterator COFFObjectFile::export_directory_end() const { + if (!ExportDirectory) + return export_directory_iterator(ExportDirectoryEntryRef(nullptr, 0, this)); + ExportDirectoryEntryRef Ref(ExportDirectory, + ExportDirectory->AddressTableEntries, this); + return export_directory_iterator(Ref); +} + +section_iterator COFFObjectFile::section_begin() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SectionTable); + return section_iterator(SectionRef(Ret, this)); +} + +section_iterator COFFObjectFile::section_end() const { + DataRefImpl Ret; + int NumSections = + COFFHeader && COFFHeader->isImportLibrary() ? 0 : getNumberOfSections(); + Ret.p = reinterpret_cast<uintptr_t>(SectionTable + NumSections); + return section_iterator(SectionRef(Ret, this)); +} + +base_reloc_iterator COFFObjectFile::base_reloc_begin() const { + return base_reloc_iterator(BaseRelocRef(BaseRelocHeader, this)); +} + +base_reloc_iterator COFFObjectFile::base_reloc_end() const { + return base_reloc_iterator(BaseRelocRef(BaseRelocEnd, this)); +} + +uint8_t COFFObjectFile::getBytesInAddress() const { + return getArch() == Triple::x86_64 || getArch() == Triple::aarch64 ? 8 : 4; +} + +StringRef COFFObjectFile::getFileFormatName() const { + switch(getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + return "COFF-i386"; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return "COFF-x86-64"; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return "COFF-ARM"; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return "COFF-ARM64"; + default: + return "COFF-<unknown arch>"; + } +} + +unsigned COFFObjectFile::getArch() const { + switch (getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + return Triple::x86; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return Triple::x86_64; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return Triple::thumb; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return Triple::aarch64; + default: + return Triple::UnknownArch; + } +} + +iterator_range<import_directory_iterator> +COFFObjectFile::import_directories() const { + return make_range(import_directory_begin(), import_directory_end()); +} + +iterator_range<delay_import_directory_iterator> +COFFObjectFile::delay_import_directories() const { + return make_range(delay_import_directory_begin(), + delay_import_directory_end()); +} + +iterator_range<export_directory_iterator> +COFFObjectFile::export_directories() const { + return make_range(export_directory_begin(), export_directory_end()); +} + +iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const { + return make_range(base_reloc_begin(), base_reloc_end()); +} + +std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { + Res = PE32Header; + return std::error_code(); +} + +std::error_code +COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { + Res = PE32PlusHeader; + return std::error_code(); +} + +std::error_code +COFFObjectFile::getDataDirectory(uint32_t Index, + const data_directory *&Res) const { + // Error if if there's no data directory or the index is out of range. + if (!DataDirectory) { + Res = nullptr; + return object_error::parse_failed; + } + assert(PE32Header || PE32PlusHeader); + uint32_t NumEnt = PE32Header ? PE32Header->NumberOfRvaAndSize + : PE32PlusHeader->NumberOfRvaAndSize; + if (Index >= NumEnt) { + Res = nullptr; + return object_error::parse_failed; + } + Res = &DataDirectory[Index]; + return std::error_code(); +} + +std::error_code COFFObjectFile::getSection(int32_t Index, + const coff_section *&Result) const { + Result = nullptr; + if (COFF::isReservedSectionNumber(Index)) + return std::error_code(); + if (static_cast<uint32_t>(Index) <= getNumberOfSections()) { + // We already verified the section table data, so no need to check again. + Result = SectionTable + (Index - 1); + return std::error_code(); + } + return object_error::parse_failed; +} + +std::error_code COFFObjectFile::getString(uint32_t Offset, + StringRef &Result) const { + if (StringTableSize <= 4) + // Tried to get a string from an empty string table. + return object_error::parse_failed; + if (Offset >= StringTableSize) + return object_error::unexpected_eof; + Result = StringRef(StringTable + Offset); + return std::error_code(); +} + +std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol, + StringRef &Res) const { + return getSymbolName(Symbol.getGeneric(), Res); +} + +std::error_code COFFObjectFile::getSymbolName(const coff_symbol_generic *Symbol, + StringRef &Res) const { + // Check for string table entry. First 4 bytes are 0. + if (Symbol->Name.Offset.Zeroes == 0) { + if (std::error_code EC = getString(Symbol->Name.Offset.Offset, Res)) + return EC; + return std::error_code(); + } + + if (Symbol->Name.ShortName[COFF::NameSize - 1] == 0) + // Null terminated, let ::strlen figure out the length. + Res = StringRef(Symbol->Name.ShortName); + else + // Not null terminated, use all 8 bytes. + Res = StringRef(Symbol->Name.ShortName, COFF::NameSize); + return std::error_code(); +} + +ArrayRef<uint8_t> +COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { + const uint8_t *Aux = nullptr; + + size_t SymbolSize = getSymbolTableEntrySize(); + if (Symbol.getNumberOfAuxSymbols() > 0) { + // AUX data comes immediately after the symbol in COFF + Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize; +#ifndef NDEBUG + // Verify that the Aux symbol points to a valid entry in the symbol table. + uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base()); + if (Offset < getPointerToSymbolTable() || + Offset >= + getPointerToSymbolTable() + (getNumberOfSymbols() * SymbolSize)) + report_fatal_error("Aux Symbol data was outside of symbol table."); + + assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 && + "Aux Symbol data did not point to the beginning of a symbol"); +#endif + } + return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize); +} + +std::error_code COFFObjectFile::getSectionName(const coff_section *Sec, + StringRef &Res) const { + StringRef Name; + if (Sec->Name[COFF::NameSize - 1] == 0) + // Null terminated, let ::strlen figure out the length. + Name = Sec->Name; + else + // Not null terminated, use all 8 bytes. + Name = StringRef(Sec->Name, COFF::NameSize); + + // Check for string table entry. First byte is '/'. + if (Name.startswith("/")) { + uint32_t Offset; + if (Name.startswith("//")) { + if (decodeBase64StringEntry(Name.substr(2), Offset)) + return object_error::parse_failed; + } else { + if (Name.substr(1).getAsInteger(10, Offset)) + return object_error::parse_failed; + } + if (std::error_code EC = getString(Offset, Name)) + return EC; + } + + Res = Name; + return std::error_code(); +} + +uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const { + // SizeOfRawData and VirtualSize change what they represent depending on + // whether or not we have an executable image. + // + // For object files, SizeOfRawData contains the size of section's data; + // VirtualSize should be zero but isn't due to buggy COFF writers. + // + // For executables, SizeOfRawData *must* be a multiple of FileAlignment; the + // actual section size is in VirtualSize. It is possible for VirtualSize to + // be greater than SizeOfRawData; the contents past that point should be + // considered to be zero. + if (getDOSHeader()) + return std::min(Sec->VirtualSize, Sec->SizeOfRawData); + return Sec->SizeOfRawData; +} + +std::error_code +COFFObjectFile::getSectionContents(const coff_section *Sec, + ArrayRef<uint8_t> &Res) const { + // In COFF, a virtual section won't have any in-file + // content, so the file pointer to the content will be zero. + if (Sec->PointerToRawData == 0) + return std::error_code(); + // The only thing that we need to verify is that the contents is contained + // within the file bounds. We don't need to make sure it doesn't cover other + // data, as there's nothing that says that is not allowed. + uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData; + uint32_t SectionSize = getSectionSize(Sec); + if (checkOffset(Data, ConStart, SectionSize)) + return object_error::parse_failed; + Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize); + return std::error_code(); +} + +const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const { + return reinterpret_cast<const coff_relocation*>(Rel.p); +} + +void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + Rel.p = reinterpret_cast<uintptr_t>( + reinterpret_cast<const coff_relocation*>(Rel.p) + 1); +} + +uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const { + const coff_relocation *R = toRel(Rel); + return R->VirtualAddress; +} + +symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + const coff_relocation *R = toRel(Rel); + DataRefImpl Ref; + if (R->SymbolTableIndex >= getNumberOfSymbols()) + return symbol_end(); + if (SymbolTable16) + Ref.p = reinterpret_cast<uintptr_t>(SymbolTable16 + R->SymbolTableIndex); + else if (SymbolTable32) + Ref.p = reinterpret_cast<uintptr_t>(SymbolTable32 + R->SymbolTableIndex); + else + llvm_unreachable("no symbol table pointer!"); + return symbol_iterator(SymbolRef(Ref, this)); +} + +uint64_t COFFObjectFile::getRelocationType(DataRefImpl Rel) const { + const coff_relocation* R = toRel(Rel); + return R->Type; +} + +const coff_section * +COFFObjectFile::getCOFFSection(const SectionRef &Section) const { + return toSec(Section.getRawDataRefImpl()); +} + +COFFSymbolRef COFFObjectFile::getCOFFSymbol(const DataRefImpl &Ref) const { + if (SymbolTable16) + return toSymb<coff_symbol16>(Ref); + if (SymbolTable32) + return toSymb<coff_symbol32>(Ref); + llvm_unreachable("no symbol table pointer!"); +} + +COFFSymbolRef COFFObjectFile::getCOFFSymbol(const SymbolRef &Symbol) const { + return getCOFFSymbol(Symbol.getRawDataRefImpl()); +} + +const coff_relocation * +COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const { + return toRel(Reloc.getRawDataRefImpl()); +} + +iterator_range<const coff_relocation *> +COFFObjectFile::getRelocations(const coff_section *Sec) const { + const coff_relocation *I = getFirstReloc(Sec, Data, base()); + const coff_relocation *E = I; + if (I) + E += getNumberOfRelocations(Sec, Data, base()); + return make_range(I, E); +} + +#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type) \ + case COFF::reloc_type: \ + Res = #reloc_type; \ + break; + +void COFFObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + const coff_relocation *Reloc = toRel(Rel); + StringRef Res; + switch (getMachine()) { + case COFF::IMAGE_FILE_MACHINE_AMD64: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_1); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_2); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_3); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_4); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_REL32_5); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SREL32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32); + default: + Res = "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T); + default: + Res = "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH26); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEBASE_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_HIGH12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14); + default: + Res = "Unknown"; + } + break; + case COFF::IMAGE_FILE_MACHINE_I386: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SEG12); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32); + default: + Res = "Unknown"; + } + break; + default: + Res = "Unknown"; + } + Result.append(Res.begin(), Res.end()); +} + +#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME + +bool COFFObjectFile::isRelocatableObject() const { + return !DataDirectory; +} + +bool ImportDirectoryEntryRef:: +operator==(const ImportDirectoryEntryRef &Other) const { + return ImportTable == Other.ImportTable && Index == Other.Index; +} + +void ImportDirectoryEntryRef::moveNext() { + ++Index; + if (ImportTable[Index].isNull()) { + Index = -1; + ImportTable = nullptr; + } +} + +std::error_code ImportDirectoryEntryRef::getImportTableEntry( + const coff_import_directory_table_entry *&Result) const { + return getObject(Result, OwningObject->Data, ImportTable + Index); +} + +static imported_symbol_iterator +makeImportedSymbolIterator(const COFFObjectFile *Object, + uintptr_t Ptr, int Index) { + if (Object->getBytesInAddress() == 4) { + auto *P = reinterpret_cast<const import_lookup_table_entry32 *>(Ptr); + return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object)); + } + auto *P = reinterpret_cast<const import_lookup_table_entry64 *>(Ptr); + return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object)); +} + +static imported_symbol_iterator +importedSymbolBegin(uint32_t RVA, const COFFObjectFile *Object) { + uintptr_t IntPtr = 0; + Object->getRvaPtr(RVA, IntPtr); + return makeImportedSymbolIterator(Object, IntPtr, 0); +} + +static imported_symbol_iterator +importedSymbolEnd(uint32_t RVA, const COFFObjectFile *Object) { + uintptr_t IntPtr = 0; + Object->getRvaPtr(RVA, IntPtr); + // Forward the pointer to the last entry which is null. + int Index = 0; + if (Object->getBytesInAddress() == 4) { + auto *Entry = reinterpret_cast<ulittle32_t *>(IntPtr); + while (*Entry++) + ++Index; + } else { + auto *Entry = reinterpret_cast<ulittle64_t *>(IntPtr); + while (*Entry++) + ++Index; + } + return makeImportedSymbolIterator(Object, IntPtr, Index); +} + +imported_symbol_iterator +ImportDirectoryEntryRef::imported_symbol_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportAddressTableRVA, + OwningObject); +} + +imported_symbol_iterator +ImportDirectoryEntryRef::imported_symbol_end() const { + return importedSymbolEnd(ImportTable[Index].ImportAddressTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::imported_symbols() const { + return make_range(imported_symbol_begin(), imported_symbol_end()); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_end() const { + return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::lookup_table_symbols() const { + return make_range(lookup_table_begin(), lookup_table_end()); +} + +std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +std::error_code +ImportDirectoryEntryRef::getImportLookupTableRVA(uint32_t &Result) const { + Result = ImportTable[Index].ImportLookupTableRVA; + return std::error_code(); +} + +std::error_code +ImportDirectoryEntryRef::getImportAddressTableRVA(uint32_t &Result) const { + Result = ImportTable[Index].ImportAddressTableRVA; + return std::error_code(); +} + +bool DelayImportDirectoryEntryRef:: +operator==(const DelayImportDirectoryEntryRef &Other) const { + return Table == Other.Table && Index == Other.Index; +} + +void DelayImportDirectoryEntryRef::moveNext() { + ++Index; +} + +imported_symbol_iterator +DelayImportDirectoryEntryRef::imported_symbol_begin() const { + return importedSymbolBegin(Table[Index].DelayImportNameTable, + OwningObject); +} + +imported_symbol_iterator +DelayImportDirectoryEntryRef::imported_symbol_end() const { + return importedSymbolEnd(Table[Index].DelayImportNameTable, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +DelayImportDirectoryEntryRef::imported_symbols() const { + return make_range(imported_symbol_begin(), imported_symbol_end()); +} + +std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(Table[Index].Name, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +std::error_code DelayImportDirectoryEntryRef:: +getDelayImportTable(const delay_import_directory_table_entry *&Result) const { + Result = Table; + return std::error_code(); +} + +std::error_code DelayImportDirectoryEntryRef:: +getImportAddress(int AddrIndex, uint64_t &Result) const { + uint32_t RVA = Table[Index].DelayImportAddressTable + + AddrIndex * (OwningObject->is64() ? 8 : 4); + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + if (OwningObject->is64()) + Result = *reinterpret_cast<const ulittle64_t *>(IntPtr); + else + Result = *reinterpret_cast<const ulittle32_t *>(IntPtr); + return std::error_code(); +} + +bool ExportDirectoryEntryRef:: +operator==(const ExportDirectoryEntryRef &Other) const { + return ExportTable == Other.ExportTable && Index == Other.Index; +} + +void ExportDirectoryEntryRef::moveNext() { + ++Index; +} + +// Returns the name of the current export symbol. If the symbol is exported only +// by ordinal, the empty string is set as a result. +std::error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +// Returns the starting ordinal number. +std::error_code +ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const { + Result = ExportTable->OrdinalBase; + return std::error_code(); +} + +// Returns the export ordinal of the current export symbol. +std::error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { + Result = ExportTable->OrdinalBase + Index; + return std::error_code(); +} + +// Returns the address of the current export symbol. +std::error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr)) + return EC; + const export_address_table_entry *entry = + reinterpret_cast<const export_address_table_entry *>(IntPtr); + Result = entry[Index].ExportRVA; + return std::error_code(); +} + +// Returns the name of the current export symbol. If the symbol is exported only +// by ordinal, the empty string is set as a result. +std::error_code +ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { + uintptr_t IntPtr = 0; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr)) + return EC; + const ulittle16_t *Start = reinterpret_cast<const ulittle16_t *>(IntPtr); + + uint32_t NumEntries = ExportTable->NumberOfNamePointers; + int Offset = 0; + for (const ulittle16_t *I = Start, *E = Start + NumEntries; + I < E; ++I, ++Offset) { + if (*I != Index) + continue; + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr)) + return EC; + const ulittle32_t *NamePtr = reinterpret_cast<const ulittle32_t *>(IntPtr); + if (std::error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); + } + Result = ""; + return std::error_code(); +} + +std::error_code ExportDirectoryEntryRef::isForwarder(bool &Result) const { + const data_directory *DataEntry; + if (auto EC = OwningObject->getDataDirectory(COFF::EXPORT_TABLE, DataEntry)) + return EC; + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uint32_t Begin = DataEntry->RelativeVirtualAddress; + uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size; + Result = (Begin <= RVA && RVA < End); + return std::error_code(); +} + +std::error_code ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const { + uint32_t RVA; + if (auto EC = getExportRVA(RVA)) + return EC; + uintptr_t IntPtr = 0; + if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = StringRef(reinterpret_cast<const char *>(IntPtr)); + return std::error_code(); +} + +bool ImportedSymbolRef:: +operator==(const ImportedSymbolRef &Other) const { + return Entry32 == Other.Entry32 && Entry64 == Other.Entry64 + && Index == Other.Index; +} + +void ImportedSymbolRef::moveNext() { + ++Index; +} + +std::error_code +ImportedSymbolRef::getSymbolName(StringRef &Result) const { + uint32_t RVA; + if (Entry32) { + // If a symbol is imported only by ordinal, it has no name. + if (Entry32[Index].isOrdinal()) + return std::error_code(); + RVA = Entry32[Index].getHintNameRVA(); + } else { + if (Entry64[Index].isOrdinal()) + return std::error_code(); + RVA = Entry64[Index].getHintNameRVA(); + } + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + // +2 because the first two bytes is hint. + Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2)); + return std::error_code(); +} + +std::error_code ImportedSymbolRef::isOrdinal(bool &Result) const { + if (Entry32) + Result = Entry32[Index].isOrdinal(); + else + Result = Entry64[Index].isOrdinal(); + return std::error_code(); +} + +std::error_code ImportedSymbolRef::getHintNameRVA(uint32_t &Result) const { + if (Entry32) + Result = Entry32[Index].getHintNameRVA(); + else + Result = Entry64[Index].getHintNameRVA(); + return std::error_code(); +} + +std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const { + uint32_t RVA; + if (Entry32) { + if (Entry32[Index].isOrdinal()) { + Result = Entry32[Index].getOrdinal(); + return std::error_code(); + } + RVA = Entry32[Index].getHintNameRVA(); + } else { + if (Entry64[Index].isOrdinal()) { + Result = Entry64[Index].getOrdinal(); + return std::error_code(); + } + RVA = Entry64[Index].getHintNameRVA(); + } + uintptr_t IntPtr = 0; + if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) + return EC; + Result = *reinterpret_cast<const ulittle16_t *>(IntPtr); + return std::error_code(); +} + +ErrorOr<std::unique_ptr<COFFObjectFile>> +ObjectFile::createCOFFObjectFile(MemoryBufferRef Object) { + std::error_code EC; + std::unique_ptr<COFFObjectFile> Ret(new COFFObjectFile(Object, EC)); + if (EC) + return EC; + return std::move(Ret); +} + +bool BaseRelocRef::operator==(const BaseRelocRef &Other) const { + return Header == Other.Header && Index == Other.Index; +} + +void BaseRelocRef::moveNext() { + // Header->BlockSize is the size of the current block, including the + // size of the header itself. + uint32_t Size = sizeof(*Header) + + sizeof(coff_base_reloc_block_entry) * (Index + 1); + if (Size == Header->BlockSize) { + // .reloc contains a list of base relocation blocks. Each block + // consists of the header followed by entries. The header contains + // how many entories will follow. When we reach the end of the + // current block, proceed to the next block. + Header = reinterpret_cast<const coff_base_reloc_block_header *>( + reinterpret_cast<const uint8_t *>(Header) + Size); + Index = 0; + } else { + ++Index; + } +} + +std::error_code BaseRelocRef::getType(uint8_t &Type) const { + auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); + Type = Entry[Index].getType(); + return std::error_code(); +} + +std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { + auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); + Result = Header->PageRVA + Entry[Index].getOffset(); + return std::error_code(); +} + +#define RETURN_IF_ERROR(X) \ + if (auto EC = errorToErrorCode(X)) \ + return EC; + +ErrorOr<ArrayRef<UTF16>> ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { + BinaryStreamReader Reader = BinaryStreamReader(BBS); + Reader.setOffset(Offset); + uint16_t Length; + RETURN_IF_ERROR(Reader.readInteger(Length)); + ArrayRef<UTF16> RawDirString; + RETURN_IF_ERROR(Reader.readArray(RawDirString, Length)); + return RawDirString; +} + +ErrorOr<ArrayRef<UTF16>> +ResourceSectionRef::getEntryNameString(const coff_resource_dir_entry &Entry) { + return getDirStringAtOffset(Entry.Identifier.getNameOffset()); +} + +ErrorOr<const coff_resource_dir_table &> +ResourceSectionRef::getTableAtOffset(uint32_t Offset) { + const coff_resource_dir_table *Table = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Table)); + assert(Table != nullptr); + return *Table; +} + +ErrorOr<const coff_resource_dir_table &> +ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { + return getTableAtOffset(Entry.Offset.value()); +} + +ErrorOr<const coff_resource_dir_table &> ResourceSectionRef::getBaseTable() { + return getTableAtOffset(0); +} diff --git a/contrib/llvm/lib/Object/Decompressor.cpp b/contrib/llvm/lib/Object/Decompressor.cpp new file mode 100644 index 000000000000..53f084d7620e --- /dev/null +++ b/contrib/llvm/lib/Object/Decompressor.cpp @@ -0,0 +1,94 @@ +//===-- Decompressor.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Decompressor.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace object; + +Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, + bool IsLE, bool Is64Bit) { + if (!zlib::isAvailable()) + return createError("zlib is not available"); + + Decompressor D(Data); + Error Err = isGnuStyle(Name) ? D.consumeCompressedGnuHeader() + : D.consumeCompressedZLibHeader(Is64Bit, IsLE); + if (Err) + return std::move(Err); + return D; +} + +Decompressor::Decompressor(StringRef Data) + : SectionData(Data), DecompressedSize(0) {} + +Error Decompressor::consumeCompressedGnuHeader() { + if (!SectionData.startswith("ZLIB")) + return createError("corrupted compressed section header"); + + SectionData = SectionData.substr(4); + + // Consume uncompressed section size (big-endian 8 bytes). + if (SectionData.size() < 8) + return createError("corrupted uncompressed section size"); + DecompressedSize = read64be(SectionData.data()); + SectionData = SectionData.substr(8); + + return Error::success(); +} + +Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, + bool IsLittleEndian) { + using namespace ELF; + uint64_t HdrSize = Is64Bit ? sizeof(Elf64_Chdr) : sizeof(Elf32_Chdr); + if (SectionData.size() < HdrSize) + return createError("corrupted compressed section header"); + + DataExtractor Extractor(SectionData, IsLittleEndian, 0); + uint32_t Offset = 0; + if (Extractor.getUnsigned(&Offset, Is64Bit ? sizeof(Elf64_Word) + : sizeof(Elf32_Word)) != + ELFCOMPRESS_ZLIB) + return createError("unsupported compression type"); + + // Skip Elf64_Chdr::ch_reserved field. + if (Is64Bit) + Offset += sizeof(Elf64_Word); + + DecompressedSize = Extractor.getUnsigned( + &Offset, Is64Bit ? sizeof(Elf64_Xword) : sizeof(Elf32_Word)); + SectionData = SectionData.substr(HdrSize); + return Error::success(); +} + +bool Decompressor::isGnuStyle(StringRef Name) { + return Name.startswith(".zdebug"); +} + +bool Decompressor::isCompressed(const object::SectionRef &Section) { + StringRef Name; + if (Section.getName(Name)) + return false; + return Section.isCompressed() || isGnuStyle(Name); +} + +bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { + return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); +} + +Error Decompressor::decompress(MutableArrayRef<char> Buffer) { + size_t Size = Buffer.size(); + return zlib::uncompress(SectionData, Buffer.data(), Size); +} diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp new file mode 100644 index 000000000000..448fb1bd6b56 --- /dev/null +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -0,0 +1,204 @@ +//===- ELF.cpp - ELF object file implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELF.h" +#include "llvm/BinaryFormat/ELF.h" + +using namespace llvm; +using namespace object; + +#define STRINGIFY_ENUM_CASE(ns, name) \ + case ns::name: \ + return #name; + +#define ELF_RELOC(name, value) STRINGIFY_ENUM_CASE(ELF, name) + +StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + uint32_t Type) { + switch (Machine) { + case ELF::EM_X86_64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" + default: + break; + } + break; + case ELF::EM_386: + case ELF::EM_IAMCU: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/i386.def" + default: + break; + } + break; + case ELF::EM_MIPS: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" + default: + break; + } + break; + case ELF::EM_AARCH64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" + default: + break; + } + break; + case ELF::EM_ARM: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" + default: + break; + } + break; + case ELF::EM_AVR: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/AVR.def" + default: + break; + } + break; + case ELF::EM_HEXAGON: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" + default: + break; + } + break; + case ELF::EM_LANAI: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" + default: + break; + } + break; + case ELF::EM_PPC: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" + default: + break; + } + break; + case ELF::EM_PPC64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" + default: + break; + } + break; + case ELF::EM_RISCV: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" + default: + break; + } + break; + case ELF::EM_S390: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" + default: + break; + } + break; + case ELF::EM_SPARC: + case ELF::EM_SPARC32PLUS: + case ELF::EM_SPARCV9: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" + default: + break; + } + break; + case ELF::EM_WEBASSEMBLY: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def" + default: + break; + } + break; + case ELF::EM_AMDGPU: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" + default: + break; + } + case ELF::EM_BPF: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" + default: + break; + } + break; + default: + break; + } + return "Unknown"; +} + +#undef ELF_RELOC + +StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { + switch (Machine) { + case ELF::EM_ARM: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_EXIDX); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION); + } + break; + case ELF::EM_HEXAGON: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_HEX_ORDERED); } + break; + case ELF::EM_X86_64: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_X86_64_UNWIND); } + break; + case ELF::EM_MIPS: + case ELF::EM_MIPS_RS3_LE: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF); + } + break; + default: + break; + } + + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_NULL); + STRINGIFY_ENUM_CASE(ELF, SHT_PROGBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_STRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNAMIC); + STRINGIFY_ENUM_CASE(ELF, SHT_NOTE); + STRINGIFY_ENUM_CASE(ELF, SHT_NOBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_REL); + STRINGIFY_ENUM_CASE(ELF, SHT_SHLIB); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNSYM); + STRINGIFY_ENUM_CASE(ELF, SHT_INIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_FINI_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verneed); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_versym); + default: + return "Unknown"; + } +} diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp new file mode 100644 index 000000000000..fa136d782b5a --- /dev/null +++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp @@ -0,0 +1,316 @@ +//===- ELFObjectFile.cpp - ELF object file implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the ELFObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <utility> + +using namespace llvm; +using namespace object; + +ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) + : ObjectFile(Type, Source) {} + +ErrorOr<std::unique_ptr<ObjectFile>> +ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { + std::pair<unsigned char, unsigned char> Ident = + getElfArchType(Obj.getBuffer()); + std::size_t MaxAlignment = + 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart())); + + if (MaxAlignment < 2) + return object_error::parse_failed; + + std::error_code EC; + std::unique_ptr<ObjectFile> R; + if (Ident.first == ELF::ELFCLASS32) { + if (Ident.second == ELF::ELFDATA2LSB) + R.reset(new ELFObjectFile<ELFType<support::little, false>>(Obj, EC)); + else if (Ident.second == ELF::ELFDATA2MSB) + R.reset(new ELFObjectFile<ELFType<support::big, false>>(Obj, EC)); + else + return object_error::parse_failed; + } else if (Ident.first == ELF::ELFCLASS64) { + if (Ident.second == ELF::ELFDATA2LSB) + R.reset(new ELFObjectFile<ELFType<support::little, true>>(Obj, EC)); + else if (Ident.second == ELF::ELFDATA2MSB) + R.reset(new ELFObjectFile<ELFType<support::big, true>>(Obj, EC)); + else + return object_error::parse_failed; + } else { + return object_error::parse_failed; + } + + if (EC) + return EC; + return std::move(R); +} + +SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const { + SubtargetFeatures Features; + unsigned PlatformFlags; + getPlatformFlags(PlatformFlags); + + switch (PlatformFlags & ELF::EF_MIPS_ARCH) { + case ELF::EF_MIPS_ARCH_1: + break; + case ELF::EF_MIPS_ARCH_2: + Features.AddFeature("mips2"); + break; + case ELF::EF_MIPS_ARCH_3: + Features.AddFeature("mips3"); + break; + case ELF::EF_MIPS_ARCH_4: + Features.AddFeature("mips4"); + break; + case ELF::EF_MIPS_ARCH_5: + Features.AddFeature("mips5"); + break; + case ELF::EF_MIPS_ARCH_32: + Features.AddFeature("mips32"); + break; + case ELF::EF_MIPS_ARCH_64: + Features.AddFeature("mips64"); + break; + case ELF::EF_MIPS_ARCH_32R2: + Features.AddFeature("mips32r2"); + break; + case ELF::EF_MIPS_ARCH_64R2: + Features.AddFeature("mips64r2"); + break; + case ELF::EF_MIPS_ARCH_32R6: + Features.AddFeature("mips32r6"); + break; + case ELF::EF_MIPS_ARCH_64R6: + Features.AddFeature("mips64r6"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + switch (PlatformFlags & ELF::EF_MIPS_MACH) { + case ELF::EF_MIPS_MACH_NONE: + // No feature associated with this value. + break; + case ELF::EF_MIPS_MACH_OCTEON: + Features.AddFeature("cnmips"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) + Features.AddFeature("mips16"); + if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) + Features.AddFeature("micromips"); + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { + SubtargetFeatures Features; + ARMAttributeParser Attributes; + std::error_code EC = getBuildAttributes(Attributes); + if (EC) + return SubtargetFeatures(); + + // both ARMv7-M and R have to support thumb hardware div + bool isV7 = false; + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) + isV7 = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch) + == ARMBuildAttrs::v7; + + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch_profile)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile)) { + case ARMBuildAttrs::ApplicationProfile: + Features.AddFeature("aclass"); + break; + case ARMBuildAttrs::RealTimeProfile: + Features.AddFeature("rclass"); + if (isV7) + Features.AddFeature("hwdiv"); + break; + case ARMBuildAttrs::MicroControllerProfile: + Features.AddFeature("mclass"); + if (isV7) + Features.AddFeature("hwdiv"); + break; + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::THUMB_ISA_use)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use)) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("thumb", false); + Features.AddFeature("thumb2", false); + break; + case ARMBuildAttrs::AllowThumb32: + Features.AddFeature("thumb2"); + break; + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::FP_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::FP_arch)) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("vfp2", false); + Features.AddFeature("vfp3", false); + Features.AddFeature("vfp4", false); + break; + case ARMBuildAttrs::AllowFPv2: + Features.AddFeature("vfp2"); + break; + case ARMBuildAttrs::AllowFPv3A: + case ARMBuildAttrs::AllowFPv3B: + Features.AddFeature("vfp3"); + break; + case ARMBuildAttrs::AllowFPv4A: + case ARMBuildAttrs::AllowFPv4B: + Features.AddFeature("vfp4"); + break; + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::Advanced_SIMD_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch)) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("neon", false); + Features.AddFeature("fp16", false); + break; + case ARMBuildAttrs::AllowNeon: + Features.AddFeature("neon"); + break; + case ARMBuildAttrs::AllowNeon2: + Features.AddFeature("neon"); + Features.AddFeature("fp16"); + break; + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) { + default: + break; + case ARMBuildAttrs::DisallowDIV: + Features.AddFeature("hwdiv", false); + Features.AddFeature("hwdiv-arm", false); + break; + case ARMBuildAttrs::AllowDIVExt: + Features.AddFeature("hwdiv"); + Features.AddFeature("hwdiv-arm"); + break; + } + } + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getFeatures() const { + switch (getEMachine()) { + case ELF::EM_MIPS: + return getMIPSFeatures(); + case ELF::EM_ARM: + return getARMFeatures(); + default: + return SubtargetFeatures(); + } +} + +// FIXME Encode from a tablegen description or target parser. +void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { + if (TheTriple.getSubArch() != Triple::NoSubArch) + return; + + ARMAttributeParser Attributes; + std::error_code EC = getBuildAttributes(Attributes); + if (EC) + return; + + std::string Triple; + // Default to ARM, but use the triple if it's been set. + if (TheTriple.getArch() == Triple::thumb || + TheTriple.getArch() == Triple::thumbeb) + Triple = "thumb"; + else + Triple = "arm"; + + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch)) { + case ARMBuildAttrs::v4: + Triple += "v4"; + break; + case ARMBuildAttrs::v4T: + Triple += "v4t"; + break; + case ARMBuildAttrs::v5T: + Triple += "v5t"; + break; + case ARMBuildAttrs::v5TE: + Triple += "v5te"; + break; + case ARMBuildAttrs::v5TEJ: + Triple += "v5tej"; + break; + case ARMBuildAttrs::v6: + Triple += "v6"; + break; + case ARMBuildAttrs::v6KZ: + Triple += "v6kz"; + break; + case ARMBuildAttrs::v6T2: + Triple += "v6t2"; + break; + case ARMBuildAttrs::v6K: + Triple += "v6k"; + break; + case ARMBuildAttrs::v7: + Triple += "v7"; + break; + case ARMBuildAttrs::v6_M: + Triple += "v6m"; + break; + case ARMBuildAttrs::v6S_M: + Triple += "v6sm"; + break; + case ARMBuildAttrs::v7E_M: + Triple += "v7em"; + break; + } + } + if (!isLittleEndian()) + Triple += "eb"; + + TheTriple.setArchName(Triple); +} diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp new file mode 100644 index 000000000000..7d43a84f3e0e --- /dev/null +++ b/contrib/llvm/lib/Object/Error.cpp @@ -0,0 +1,95 @@ +//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines a new error_category for the Object library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; +using namespace object; + +namespace { +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. +class _object_error_category : public std::error_category { +public: + const char* name() const noexcept override; + std::string message(int ev) const override; +}; +} + +const char *_object_error_category::name() const noexcept { + return "llvm.object"; +} + +std::string _object_error_category::message(int EV) const { + object_error E = static_cast<object_error>(EV); + switch (E) { + case object_error::arch_not_found: + return "No object file for requested architecture"; + case object_error::invalid_file_type: + return "The file was not recognized as a valid object file"; + case object_error::parse_failed: + return "Invalid data was encountered while parsing the file"; + case object_error::unexpected_eof: + return "The end of the file was unexpectedly encountered"; + case object_error::string_table_non_null_end: + return "String table must end with a null terminator"; + case object_error::invalid_section_index: + return "Invalid section index"; + case object_error::bitcode_section_not_found: + return "Bitcode section not found in object file"; + case object_error::invalid_symbol_index: + return "Invalid symbol index"; + } + llvm_unreachable("An enumerator of object_error does not have a message " + "defined."); +} + +char BinaryError::ID = 0; +char GenericBinaryError::ID = 0; + +GenericBinaryError::GenericBinaryError(Twine Msg) : Msg(Msg.str()) {} + +GenericBinaryError::GenericBinaryError(Twine Msg, object_error ECOverride) + : Msg(Msg.str()) { + setErrorCode(make_error_code(ECOverride)); +} + +void GenericBinaryError::log(raw_ostream &OS) const { + OS << Msg; +} + +static ManagedStatic<_object_error_category> error_category; + +const std::error_category &object::object_category() { + return *error_category; +} + +llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { + if (auto Err2 = + handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error { + // Try to handle 'M'. If successful, return a success value from + // the handler. + if (M->convertToErrorCode() == object_error::invalid_file_type) + return Error::success(); + + // We failed to handle 'M' - return it from the handler. + // This value will be passed back from catchErrors and + // wind up in Err2, where it will be returned from this function. + return Error(std::move(M)); + })) + return Err2; + return Err; +} diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp new file mode 100644 index 000000000000..e7807b038335 --- /dev/null +++ b/contrib/llvm/lib/Object/IRObjectFile.cpp @@ -0,0 +1,163 @@ +//===- IRObjectFile.cpp - IR object file implementation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the IRObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRObjectFile.h" +#include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +IRObjectFile::IRObjectFile(MemoryBufferRef Object, + std::vector<std::unique_ptr<Module>> Mods) + : SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) { + for (auto &M : this->Mods) + SymTab.addModule(M.get()); +} + +IRObjectFile::~IRObjectFile() {} + +static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) { + return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p); +} + +void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + Symb.p += sizeof(ModuleSymbolTable::Symbol); +} + +std::error_code IRObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + SymTab.printSymbolName(OS, getSym(Symb)); + return std::error_code(); +} + +uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { + return SymTab.getSymbolFlags(getSym(Symb)); +} + +basic_symbol_iterator IRObjectFile::symbol_begin() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data()); + return basic_symbol_iterator(BasicSymbolRef(Ret, this)); +} + +basic_symbol_iterator IRObjectFile::symbol_end() const { + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data() + + SymTab.symbols().size()); + return basic_symbol_iterator(BasicSymbolRef(Ret, this)); +} + +StringRef IRObjectFile::getTargetTriple() const { + // Each module must have the same target triple, so we arbitrarily access the + // first one. + return Mods[0]->getTargetTriple(); +} + +ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + if (Sec.isBitcode()) { + StringRef SecContents; + if (std::error_code EC = Sec.getContents(SecContents)) + return EC; + return MemoryBufferRef(SecContents, Obj.getFileName()); + } + } + + return object_error::bitcode_section_not_found; +} + +ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { + file_magic Type = identify_magic(Object.getBuffer()); + switch (Type) { + case file_magic::bitcode: + return Object; + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) + return errorToErrorCode(ObjFile.takeError()); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return object_error::invalid_file_type; + } +} + +Expected<std::unique_ptr<IRObjectFile>> +IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { + ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected<std::vector<BitcodeModule>> BMsOrErr = + getBitcodeModuleList(*BCOrErr); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + std::vector<std::unique_ptr<Module>> Mods; + for (auto BM : *BMsOrErr) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Context, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(std::move(*MOrErr)); + } + + return std::unique_ptr<IRObjectFile>( + new IRObjectFile(*BCOrErr, std::move(Mods))); +} + +Expected<IRSymtabFile> object::readIRSymtab(MemoryBufferRef MBRef) { + IRSymtabFile F; + ErrorOr<MemoryBufferRef> BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected<BitcodeFileContents> BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + + Expected<irsymtab::FileContents> FCOrErr = irsymtab::readBitcode(*BFCOrErr); + if (!FCOrErr) + return FCOrErr.takeError(); + + F.Mods = std::move(BFCOrErr->Mods); + F.Symtab = std::move(FCOrErr->Symtab); + F.Strtab = std::move(FCOrErr->Strtab); + F.TheReader = std::move(FCOrErr->TheReader); + return std::move(F); +} diff --git a/contrib/llvm/lib/Object/IRSymtab.cpp b/contrib/llvm/lib/Object/IRSymtab.cpp new file mode 100644 index 000000000000..7a6424a76a98 --- /dev/null +++ b/contrib/llvm/lib/Object/IRSymtab.cpp @@ -0,0 +1,348 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ObjectUtils.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/VCSRevision.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace irsymtab; + +namespace { + +const char *getExpectedProducerName() { + static char DefaultName[] = LLVM_VERSION_STRING +#ifdef LLVM_REVISION + " " LLVM_REVISION +#endif + ; + // Allows for testing of the irsymtab writer and upgrade mechanism. This + // environment variable should not be set by users. + if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER")) + return OverrideName; + return DefaultName; +} + +const char *kExpectedProducerName = getExpectedProducerName(); + +/// Stores the temporary state that is required to build an IR symbol table. +struct Builder { + SmallVector<char, 0> &Symtab; + StringTableBuilder &StrtabBuilder; + StringSaver Saver; + + // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. + // The StringTableBuilder does not create a copy of any strings added to it, + // so this provides somewhere to store any strings that we create. + Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} + + DenseMap<const Comdat *, unsigned> ComdatMap; + Mangler Mang; + Triple TT; + + std::vector<storage::Comdat> Comdats; + std::vector<storage::Module> Mods; + std::vector<storage::Symbol> Syms; + std::vector<storage::Uncommon> Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + void setStr(storage::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + S.Size = Value.size(); + } + + template <typename T> + void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()), + reinterpret_cast<const char *>(Objs.data() + Objs.size())); + } + + Error addModule(Module *M); + Error addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 8> &Used, + ModuleSymbolTable::Symbol Sym); + + Error build(ArrayRef<Module *> Mods); +}; + +Error Builder::addModule(Module *M) { + if (M->getDataLayoutStr().empty()) + return make_error<StringError>("input module has no datalayout", + inconvertibleErrorCode()); + + SmallPtrSet<GlobalValue *, 8> Used; + collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); + + ModuleSymbolTable Msymtab; + Msymtab.addModule(M); + + storage::Module Mod; + Mod.Begin = Syms.size(); + Mod.End = Syms.size() + Msymtab.symbols().size(); + Mod.UncBegin = Uncommons.size(); + Mods.push_back(Mod); + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (NamedMDNode *LinkerOptions = + M->getNamedMetadata("llvm.linker.options")) { + for (MDNode *MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString(); + } + } + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msymtab, Used, Msym)) + return Err; + + return Error::success(); +} + +Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 8> &Used, + ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + storage::Symbol &Sym = Syms.back(); + Sym = {}; + + storage::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> storage::Uncommon & { + if (Unc) + return *Unc; + Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(StringRef(Name))); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << storage::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << storage::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << storage::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << storage::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + if (Flags & object::BasicSymbolRef::SF_Executable) + Sym.Flags |= 1 << storage::Symbol::FB_executable; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast<GlobalValue *>(); + if (!GV) { + // Undefined module asm symbols act as GC roots and are implicitly used. + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_used; + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + if (Used.count(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << storage::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; + if (canBeOmittedFromSymbolTable(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; + + if (Flags & object::BasicSymbolRef::SF_Common) { + Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize( + GV->getType()->getElementType()); + Uncommon().CommonAlign = GV->getAlignment(); + } + + const GlobalObject *Base = GV->getBaseObject(); + if (!Base) + return make_error<StringError>("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = Base->getComdat()) { + auto P = ComdatMap.insert(std::make_pair(C, Comdats.size())); + Sym.ComdatIndex = P.first->second; + + if (P.second) { + storage::Comdat Comdat; + setStr(Comdat.Name, C->getName()); + Comdats.push_back(Comdat); + } + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if ((Flags & object::BasicSymbolRef::SF_Weak) && + (Flags & object::BasicSymbolRef::SF_Indirect)) { + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName( + OS, cast<GlobalValue>( + cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts())); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + return Error::success(); +} + +Error Builder::build(ArrayRef<Module *> IRMods) { + storage::Header Hdr; + + assert(!IRMods.empty()); + Hdr.Version = storage::Header::kCurrentVersion; + setStr(Hdr.Producer, kExpectedProducerName); + setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts)); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(storage::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + + *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; + return Error::success(); +} + +} // end anonymous namespace + +Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, + StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) { + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); +} + +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) { + FileContents FC; + + LLVMContext Ctx; + std::vector<Module *> Mods; + std::vector<std::unique_ptr<Module>> OwnedMods; + for (auto BM : BMs) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); + BumpPtrAllocator Alloc; + if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc)) + return std::move(E); + + StrtabBuilder.finalizeInOrder(); + FC.Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)FC.Strtab.data()); + + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, + {FC.Strtab.data(), FC.Strtab.size()}}; + return std::move(FC); +} + +Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) { + if (BFC.Mods.empty()) + return make_error<StringError>("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + if (BFC.StrtabForSymtab.empty() || + BFC.Symtab.size() < sizeof(storage::Header)) + return upgrade(BFC.Mods); + + // We cannot use the regular reader to read the version and producer, because + // it will expect the header to be in the current format. The only thing we + // can rely on is that the version and producer will be present as the first + // struct elements. + auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data()); + unsigned Version = Hdr->Version; + StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab); + if (Version != storage::Header::kCurrentVersion || + Producer != kExpectedProducerName) + return upgrade(BFC.Mods); + + FileContents FC; + FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, + {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; + + // Finally, make sure that the number of modules in the symbol table matches + // the number of modules in the bitcode file. If they differ, it may mean that + // the bitcode file was created by binary concatenation, so we need to create + // a new symbol table from scratch. + if (FC.TheReader.getNumModules() != BFC.Mods.size()) + return upgrade(std::move(BFC.Mods)); + + return std::move(FC); +} diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp new file mode 100644 index 000000000000..2e4da9f15aa1 --- /dev/null +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -0,0 +1,4343 @@ +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOObjectFile class, which binds the MachOObject +// class to the generic ObjectFile wrapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> +#include <list> +#include <memory> +#include <string> +#include <system_error> + +using namespace llvm; +using namespace object; + +namespace { + + struct section_base { + char sectname[16]; + char segname[16]; + }; + +} // end anonymous namespace + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed object (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +// FIXME: Replace all uses of this function with getStructOrErr. +template <typename T> +static T getStruct(const MachOObjectFile &O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) + report_fatal_error("Malformed MachO file."); + + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; +} + +template <typename T> +static Expected<T> getStructOrErr(const MachOObjectFile &O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) + return malformedError("Structure read out-of-range"); + + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; +} + +static const char * +getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L, + unsigned Sec) { + uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr); + + bool Is64 = O.is64Bit(); + unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) : + sizeof(MachO::segment_command); + unsigned SectionSize = Is64 ? sizeof(MachO::section_64) : + sizeof(MachO::section); + + uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize; + return reinterpret_cast<const char*>(SectionAddr); +} + +static const char *getPtr(const MachOObjectFile &O, size_t Offset) { + return O.getData().substr(Offset, 1).data(); +} + +static MachO::nlist_base +getSymbolTableEntryBase(const MachOObjectFile &O, DataRefImpl DRI) { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist_base>(O, P); +} + +static StringRef parseSegmentOrSectionName(const char *P) { + if (P[15] == 0) + // Null terminated. + return P; + // Not null terminated, so this is a 16 char string. + return StringRef(P, 16); +} + +static unsigned getCPUType(const MachOObjectFile &O) { + return O.getHeader().cputype; +} + +static uint32_t +getPlainRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0; +} + +static unsigned +getScatteredRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0 & 0xffffff; +} + +static bool getPlainRelocationPCRel(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return (RE.r_word1 >> 24) & 1; + return (RE.r_word1 >> 7) & 1; +} + +static bool +getScatteredRelocationPCRel(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 30) & 1; +} + +static unsigned getPlainRelocationLength(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return (RE.r_word1 >> 25) & 3; + return (RE.r_word1 >> 5) & 3; +} + +static unsigned +getScatteredRelocationLength(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 28) & 3; +} + +static unsigned getPlainRelocationType(const MachOObjectFile &O, + const MachO::any_relocation_info &RE) { + if (O.isLittleEndian()) + return RE.r_word1 >> 28; + return RE.r_word1 & 0xf; +} + +static uint32_t getSectionFlags(const MachOObjectFile &O, + DataRefImpl Sec) { + if (O.is64Bit()) { + MachO::section_64 Sect = O.getSection64(Sec); + return Sect.flags; + } + MachO::section Sect = O.getSection(Sec); + return Sect.flags; +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr, + uint32_t LoadCommandIndex) { + if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) { + if (CmdOrErr->cmdsize < 8) + return malformedError("load command " + Twine(LoadCommandIndex) + + " with size less than 8 bytes"); + return MachOObjectFile::LoadCommandInfo({Ptr, *CmdOrErr}); + } else + return CmdOrErr.takeError(); +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getFirstLoadCommandInfo(const MachOObjectFile &Obj) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds) + return malformedError("load command 0 extends past the end all load " + "commands in the file"); + return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0); +} + +static Expected<MachOObjectFile::LoadCommandInfo> +getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex, + const MachOObjectFile::LoadCommandInfo &L) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) > + Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds) + return malformedError("load command " + Twine(LoadCommandIndex + 1) + + " extends past the end all load commands in the file"); + return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1); +} + +template <typename T> +static void parseHeader(const MachOObjectFile &Obj, T &Header, + Error &Err) { + if (sizeof(T) > Obj.getData().size()) { + Err = malformedError("the mach header extends past the end of the " + "file"); + return; + } + if (auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0))) + Header = *HeaderOrErr; + else + Err = HeaderOrErr.takeError(); +} + +// This is used to check for overlapping of Mach-O elements. +struct MachOElement { + uint64_t Offset; + uint64_t Size; + const char *Name; +}; + +static Error checkOverlappingElement(std::list<MachOElement> &Elements, + uint64_t Offset, uint64_t Size, + const char *Name) { + if (Size == 0) + return Error::success(); + + for (auto it=Elements.begin() ; it != Elements.end(); ++it) { + auto E = *it; + if ((Offset >= E.Offset && Offset < E.Offset + E.Size) || + (Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) || + (Offset <= E.Offset && Offset + Size >= E.Offset + E.Size)) + return malformedError(Twine(Name) + " at offset " + Twine(Offset) + + " with a size of " + Twine(Size) + ", overlaps " + + E.Name + " at offset " + Twine(E.Offset) + " with " + "a size of " + Twine(E.Size)); + auto nt = it; + nt++; + if (nt != Elements.end()) { + auto N = *nt; + if (Offset + Size <= N.Offset) { + Elements.insert(nt, {Offset, Size, Name}); + return Error::success(); + } + } + } + Elements.push_back({Offset, Size, Name}); + return Error::success(); +} + +// Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all +// sections to \param Sections, and optionally sets +// \param IsPageZeroSegment to true. +template <typename Segment, typename Section> +static Error parseSegmentLoadCommand( + const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment, + uint32_t LoadCommandIndex, const char *CmdName, uint64_t SizeOfHeaders, + std::list<MachOElement> &Elements) { + const unsigned SegmentLoadSize = sizeof(Segment); + if (Load.C.cmdsize < SegmentLoadSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " " + CmdName + " cmdsize too small"); + if (auto SegOrErr = getStructOrErr<Segment>(Obj, Load.Ptr)) { + Segment S = SegOrErr.get(); + const unsigned SectionSize = sizeof(Section); + uint64_t FileSize = Obj.getData().size(); + if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize || + S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " inconsistent cmdsize in " + CmdName + + " for the number of sections"); + for (unsigned J = 0; J < S.nsects; ++J) { + const char *Sec = getSectionPtr(Obj, Load, J); + Sections.push_back(Sec); + Section s = getStruct<Section>(Obj, Sec); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.offset > FileSize) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && S.fileoff == 0 && + s.offset < SizeOfHeaders && s.size != 0) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " not past the headers of the file"); + uint64_t BigSize = s.offset; + BigSize += s.size; + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + BigSize > FileSize) + return malformedError("offset field plus size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.size > S.filesize) + return malformedError("size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than the segment"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && s.size != 0 && + s.addr < S.vmaddr) + return malformedError("addr field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " less than the segment's vmaddr"); + BigSize = s.addr; + BigSize += s.size; + uint64_t BigEnd = S.vmaddr; + BigEnd += S.vmsize; + if (S.vmsize != 0 && s.size != 0 && BigSize > BigEnd) + return malformedError("addr field plus size of section " + Twine(J) + + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than than " + "the segment's vmaddr plus vmsize"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL) + if (Error Err = checkOverlappingElement(Elements, s.offset, s.size, + "section contents")) + return Err; + if (s.reloff > FileSize) + return malformedError("reloff field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + BigSize = s.nreloc; + BigSize *= sizeof(struct MachO::relocation_info); + BigSize += s.reloff; + if (BigSize > FileSize) + return malformedError("reloff field plus nreloc field times sizeof(" + "struct relocation_info) of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, s.reloff, s.nreloc * + sizeof(struct + MachO::relocation_info), + "section relocation entries")) + return Err; + } + if (S.fileoff > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field in " + CmdName + + " extends past the end of the file"); + uint64_t BigSize = S.fileoff; + BigSize += S.filesize; + if (BigSize > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field plus filesize field in " + + CmdName + " extends past the end of the file"); + if (S.vmsize != 0 && S.filesize > S.vmsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " filesize field in " + CmdName + + " greater than vmsize field"); + IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); + } else + return SegOrErr.takeError(); + + return Error::success(); +} + +static Error checkSymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **SymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::symtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_SYMTAB cmdsize too small"); + if (*SymtabLoadCmd != nullptr) + return malformedError("more than one LC_SYMTAB command"); + MachO::symtab_command Symtab = + getStruct<MachO::symtab_command>(Obj, Load.Ptr); + if (Symtab.cmdsize != sizeof(MachO::symtab_command)) + return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Symtab.symoff > FileSize) + return malformedError("symoff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + uint64_t SymtabSize = Symtab.nsyms; + const char *struct_nlist_name; + if (Obj.is64Bit()) { + SymtabSize *= sizeof(MachO::nlist_64); + struct_nlist_name = "struct nlist_64"; + } else { + SymtabSize *= sizeof(MachO::nlist); + struct_nlist_name = "struct nlist"; + } + uint64_t BigSize = SymtabSize; + BigSize += Symtab.symoff; + if (BigSize > FileSize) + return malformedError("symoff field plus nsyms field times sizeof(" + + Twine(struct_nlist_name) + ") of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.symoff, SymtabSize, + "symbol table")) + return Err; + if (Symtab.stroff > FileSize) + return malformedError("stroff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + BigSize = Symtab.stroff; + BigSize += Symtab.strsize; + if (BigSize > FileSize) + return malformedError("stroff field plus strsize field of LC_SYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.stroff, + Symtab.strsize, "string table")) + return Err; + *SymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDysymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **DysymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dysymtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_DYSYMTAB cmdsize too small"); + if (*DysymtabLoadCmd != nullptr) + return malformedError("more than one LC_DYSYMTAB command"); + MachO::dysymtab_command Dysymtab = + getStruct<MachO::dysymtab_command>(Obj, Load.Ptr); + if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command)) + return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Dysymtab.tocoff > FileSize) + return malformedError("tocoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Dysymtab.ntoc; + BigSize *= sizeof(MachO::dylib_table_of_contents); + BigSize += Dysymtab.tocoff; + if (BigSize > FileSize) + return malformedError("tocoff field plus ntoc field times sizeof(struct " + "dylib_table_of_contents) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff, + Dysymtab.ntoc * sizeof(struct + MachO::dylib_table_of_contents), + "table of contents")) + return Err; + if (Dysymtab.modtaboff > FileSize) + return malformedError("modtaboff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nmodtab; + const char *struct_dylib_module_name; + uint64_t sizeof_modtab; + if (Obj.is64Bit()) { + sizeof_modtab = sizeof(MachO::dylib_module_64); + struct_dylib_module_name = "struct dylib_module_64"; + } else { + sizeof_modtab = sizeof(MachO::dylib_module); + struct_dylib_module_name = "struct dylib_module"; + } + BigSize *= sizeof_modtab; + BigSize += Dysymtab.modtaboff; + if (BigSize > FileSize) + return malformedError("modtaboff field plus nmodtab field times sizeof(" + + Twine(struct_dylib_module_name) + ") of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.modtaboff, + Dysymtab.nmodtab * sizeof_modtab, + "module table")) + return Err; + if (Dysymtab.extrefsymoff > FileSize) + return malformedError("extrefsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrefsyms; + BigSize *= sizeof(MachO::dylib_reference); + BigSize += Dysymtab.extrefsymoff; + if (BigSize > FileSize) + return malformedError("extrefsymoff field plus nextrefsyms field times " + "sizeof(struct dylib_reference) of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extrefsymoff, + Dysymtab.nextrefsyms * + sizeof(MachO::dylib_reference), + "reference table")) + return Err; + if (Dysymtab.indirectsymoff > FileSize) + return malformedError("indirectsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nindirectsyms; + BigSize *= sizeof(uint32_t); + BigSize += Dysymtab.indirectsymoff; + if (BigSize > FileSize) + return malformedError("indirectsymoff field plus nindirectsyms field times " + "sizeof(uint32_t) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff, + Dysymtab.nindirectsyms * + sizeof(uint32_t), + "indirect table")) + return Err; + if (Dysymtab.extreloff > FileSize) + return malformedError("extreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.extreloff; + if (BigSize > FileSize) + return malformedError("extreloff field plus nextrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extreloff, + Dysymtab.nextrel * + sizeof(MachO::relocation_info), + "external relocation table")) + return Err; + if (Dysymtab.locreloff > FileSize) + return malformedError("locreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nlocrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.locreloff; + if (BigSize > FileSize) + return malformedError("locreloff field plus nlocrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.locreloff, + Dysymtab.nlocrel * + sizeof(MachO::relocation_info), + "local relocation table")) + return Err; + *DysymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkeditDataCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements, + const char *ElementName) { + if (Load.C.cmdsize < sizeof(MachO::linkedit_data_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one " + Twine(CmdName) + " command"); + MachO::linkedit_data_command LinkData = + getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr); + if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (LinkData.dataoff > FileSize) + return malformedError("dataoff field of " + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = LinkData.dataoff; + BigSize += LinkData.datasize; + if (BigSize > FileSize) + return malformedError("dataoff field plus datasize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, LinkData.dataoff, + LinkData.datasize, ElementName)) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldInfoCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dyld_info_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY " + "command"); + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(Obj, Load.Ptr); + if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (DyldInfo.rebase_off > FileSize) + return malformedError("rebase_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = DyldInfo.rebase_off; + BigSize += DyldInfo.rebase_size; + if (BigSize > FileSize) + return malformedError("rebase_off field plus rebase_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.rebase_off, + DyldInfo.rebase_size, + "dyld rebase info")) + return Err; + if (DyldInfo.bind_off > FileSize) + return malformedError("bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.bind_off; + BigSize += DyldInfo.bind_size; + if (BigSize > FileSize) + return malformedError("bind_off field plus bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.bind_off, + DyldInfo.bind_size, + "dyld bind info")) + return Err; + if (DyldInfo.weak_bind_off > FileSize) + return malformedError("weak_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.weak_bind_off; + BigSize += DyldInfo.weak_bind_size; + if (BigSize > FileSize) + return malformedError("weak_bind_off field plus weak_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.weak_bind_off, + DyldInfo.weak_bind_size, + "dyld weak bind info")) + return Err; + if (DyldInfo.lazy_bind_off > FileSize) + return malformedError("lazy_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.lazy_bind_off; + BigSize += DyldInfo.lazy_bind_size; + if (BigSize > FileSize) + return malformedError("lazy_bind_off field plus lazy_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.lazy_bind_off, + DyldInfo.lazy_bind_size, + "dyld lazy bind info")) + return Err; + if (DyldInfo.export_off > FileSize) + return malformedError("export_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.export_off; + BigSize += DyldInfo.export_size; + if (BigSize > FileSize) + return malformedError("export_off field plus export_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.export_off, + DyldInfo.export_size, + "dyld export info")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDylibCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr); + if (D.dylib.name < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylib_command struct"); + if (D.dylib.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.dylib.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkDylibIdCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd) { + if (Error Err = checkDylibCommand(Obj, Load, LoadCommandIndex, + "LC_ID_DYLIB")) + return Err; + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ID_DYLIB command"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB && + Obj.getHeader().filetype != MachO::MH_DYLIB_STUB) + return malformedError("LC_ID_DYLIB load command in non-dynamic library " + "file type"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr); + if (D.name < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylinker_command struct"); + if (D.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " dyld name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkVersCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName) { + if (Load.C.cmdsize != sizeof(MachO::version_min_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_VERSION_MIN_MACOSX, " + "LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS or " + "LC_VERSION_MIN_WATCHOS command"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkNoteCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::note_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_NOTE has incorrect cmdsize"); + MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Nt.offset > FileSize) + return malformedError("offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = Nt.offset; + BigSize += Nt.size; + if (BigSize > FileSize) + return malformedError("size field plus offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size, + "LC_NOTE data")) + return Err; + return Error::success(); +} + +static Error +parseBuildVersionCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char*> &BuildTools, + uint32_t LoadCommandIndex) { + MachO::build_version_command BVC = + getStruct<MachO::build_version_command>(Obj, Load.Ptr); + if (Load.C.cmdsize != + sizeof(MachO::build_version_command) + + BVC.ntools * sizeof(MachO::build_tool_version)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_BUILD_VERSION_COMMAND has incorrect cmdsize"); + + auto Start = Load.Ptr + sizeof(MachO::build_version_command); + BuildTools.resize(BVC.ntools); + for (unsigned i = 0; i < BVC.ntools; ++i) + BuildTools[i] = Start + i * sizeof(MachO::build_tool_version); + + return Error::success(); +} + +static Error checkRpathCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH cmdsize too small"); + MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr); + if (R.path < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field too small, not past " + "the end of the rpath_command struct"); + if (R.path >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = R.path; i < R.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkEncryptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + uint64_t cryptoff, uint64_t cryptsize, + const char **LoadCmd, const char *CmdName) { + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ENCRYPTION_INFO and or " + "LC_ENCRYPTION_INFO_64 command"); + uint64_t FileSize = Obj.getData().size(); + if (cryptoff > FileSize) + return malformedError("cryptoff field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = cryptoff; + BigSize += cryptsize; + if (BigSize > FileSize) + return malformedError("cryptoff field plus cryptsize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkerOptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::linker_option_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION cmdsize too small"); + MachO::linker_option_command L = + getStruct<MachO::linker_option_command>(Obj, Load.Ptr); + // Make sure the count of strings is correct. + const char *string = (const char *)Load.Ptr + + sizeof(struct MachO::linker_option_command); + uint32_t left = L.cmdsize - sizeof(struct MachO::linker_option_command); + uint32_t i = 0; + while (left > 0) { + while (*string == '\0' && left > 0) { + string++; + left--; + } + if (left > 0) { + i++; + uint32_t NullPos = StringRef(string, left).find('\0'); + uint32_t len = std::min(NullPos, left) + 1; + string += len; + left -= len; + } + } + if (L.count != i) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION string count " + Twine(L.count) + + " does not match number of strings"); + return Error::success(); +} + +static Error checkSubCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName, + size_t SizeOfCmd, const char *CmdStructName, + uint32_t PathOffset, const char *PathFieldName) { + if (PathOffset < SizeOfCmd) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field too " + "small, not past the end of the " + CmdStructName); + if (PathOffset >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field " + "extends past the end of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = PathOffset; i < Load.C.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + " name extends past " + "the end of the load command"); + return Error::success(); +} + +static Error checkThreadCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::thread_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + CmdName + " cmdsize too small"); + MachO::thread_command T = + getStruct<MachO::thread_command>(Obj, Load.Ptr); + const char *state = Load.Ptr + sizeof(MachO::thread_command); + const char *end = Load.Ptr + T.cmdsize; + uint32_t nflavor = 0; + uint32_t cputype = getCPUType(Obj); + while (state < end) { + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + "flavor in " + CmdName + " extends past end of " + "command"); + uint32_t flavor; + memcpy(&flavor, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(flavor); + state += sizeof(uint32_t); + + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count in " + CmdName + " extends past end of " + "command"); + uint32_t count; + memcpy(&count, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(count); + state += sizeof(uint32_t); + + if (cputype == MachO::CPU_TYPE_I386) { + if (flavor == MachO::x86_THREAD_STATE32) { + if (count != MachO::x86_THREAD_STATE32_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE32_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE32 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE32 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_X86_64) { + if (flavor == MachO::x86_THREAD_STATE64) { + if (count != MachO::x86_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM) { + if (flavor == MachO::ARM_THREAD_STATE) { + if (count != MachO::ARM_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM64) { + if (flavor == MachO::ARM_THREAD_STATE64) { + if (count != MachO::ARM_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_POWERPC) { + if (flavor == MachO::PPC_THREAD_STATE) { + if (count != MachO::PPC_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not PPC_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a PPC_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::ppc_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " PPC_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::ppc_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else { + return malformedError("unknown cputype (" + Twine(cputype) + ") load " + "command " + Twine(LoadCommandIndex) + " for " + + CmdName + " command can't be checked"); + } + nflavor++; + } + return Error::success(); +} + +static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo + &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::twolevel_hints_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_TWOLEVEL_HINTS has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_TWOLEVEL_HINTS command"); + MachO::twolevel_hints_command Hints = + getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Hints.offset > FileSize) + return malformedError("offset field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Hints.nhints; + BigSize *= Hints.nhints * sizeof(MachO::twolevel_hint); + BigSize += Hints.offset; + if (BigSize > FileSize) + return malformedError("offset field plus nhints times sizeof(struct " + "twolevel_hint) field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Hints.offset, Hints.nhints * + sizeof(MachO::twolevel_hint), + "two level hints")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +// Returns true if the libObject code does not support the load command and its +// contents. The cmd value it is treated as an unknown load command but with +// an error message that says the cmd value is obsolete. +static bool isLoadCommandObsolete(uint32_t cmd) { + if (cmd == MachO::LC_SYMSEG || + cmd == MachO::LC_LOADFVMLIB || + cmd == MachO::LC_IDFVMLIB || + cmd == MachO::LC_IDENT || + cmd == MachO::LC_FVMFILE || + cmd == MachO::LC_PREPAGE || + cmd == MachO::LC_PREBOUND_DYLIB || + cmd == MachO::LC_TWOLEVEL_HINTS || + cmd == MachO::LC_PREBIND_CKSUM) + return true; + return false; +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian, + bool Is64Bits, uint32_t UniversalCputype, + uint32_t UniversalIndex) { + Error Err = Error::success(); + std::unique_ptr<MachOObjectFile> Obj( + new MachOObjectFile(std::move(Object), IsLittleEndian, + Is64Bits, Err, UniversalCputype, + UniversalIndex)); + if (Err) + return std::move(Err); + return std::move(Obj); +} + +MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, + bool Is64bits, Error &Err, + uint32_t UniversalCputype, + uint32_t UniversalIndex) + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { + ErrorAsOutParameter ErrAsOutParam(&Err); + uint64_t SizeOfHeaders; + uint32_t cputype; + if (is64Bit()) { + parseHeader(*this, Header64, Err); + SizeOfHeaders = sizeof(MachO::mach_header_64); + cputype = Header64.cputype; + } else { + parseHeader(*this, Header, Err); + SizeOfHeaders = sizeof(MachO::mach_header); + cputype = Header.cputype; + } + if (Err) + return; + SizeOfHeaders += getHeader().sizeofcmds; + if (getData().data() + SizeOfHeaders > getData().end()) { + Err = malformedError("load commands extend past the end of the file"); + return; + } + if (UniversalCputype != 0 && cputype != UniversalCputype) { + Err = malformedError("universal header architecture: " + + Twine(UniversalIndex) + "'s cputype does not match " + "object file's mach header"); + return; + } + std::list<MachOElement> Elements; + Elements.push_back({0, SizeOfHeaders, "Mach-O headers"}); + + uint32_t LoadCommandCount = getHeader().ncmds; + LoadCommandInfo Load; + if (LoadCommandCount != 0) { + if (auto LoadOrErr = getFirstLoadCommandInfo(*this)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } + } + + const char *DyldIdLoadCmd = nullptr; + const char *FuncStartsLoadCmd = nullptr; + const char *SplitInfoLoadCmd = nullptr; + const char *CodeSignDrsLoadCmd = nullptr; + const char *CodeSignLoadCmd = nullptr; + const char *VersLoadCmd = nullptr; + const char *SourceLoadCmd = nullptr; + const char *EntryPointLoadCmd = nullptr; + const char *EncryptLoadCmd = nullptr; + const char *RoutinesLoadCmd = nullptr; + const char *UnixThreadLoadCmd = nullptr; + const char *TwoLevelHintsLoadCmd = nullptr; + for (unsigned I = 0; I < LoadCommandCount; ++I) { + if (is64Bit()) { + if (Load.C.cmdsize % 8 != 0) { + // We have a hack here to allow 64-bit Mach-O core files to have + // LC_THREAD commands that are only a multiple of 4 and not 8 to be + // allowed since the macOS kernel produces them. + if (getHeader().filetype != MachO::MH_CORE || + Load.C.cmd != MachO::LC_THREAD || Load.C.cmdsize % 4) { + Err = malformedError("load command " + Twine(I) + " cmdsize not a " + "multiple of 8"); + return; + } + } + } else { + if (Load.C.cmdsize % 4 != 0) { + Err = malformedError("load command " + Twine(I) + " cmdsize not a " + "multiple of 4"); + return; + } + } + LoadCommands.push_back(Load); + if (Load.C.cmd == MachO::LC_SYMTAB) { + if ((Err = checkSymtabCommand(*this, Load, I, &SymtabLoadCmd, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYSYMTAB) { + if ((Err = checkDysymtabCommand(*this, Load, I, &DysymtabLoadCmd, + Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &DataInCodeLoadCmd, + "LC_DATA_IN_CODE", Elements, + "data in code info"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &LinkOptHintsLoadCmd, + "LC_LINKER_OPTIMIZATION_HINT", + Elements, "linker optimization " + "hints"))) + return; + } else if (Load.C.cmd == MachO::LC_FUNCTION_STARTS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &FuncStartsLoadCmd, + "LC_FUNCTION_STARTS", Elements, + "function starts data"))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &SplitInfoLoadCmd, + "LC_SEGMENT_SPLIT_INFO", Elements, + "split info data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignDrsLoadCmd, + "LC_DYLIB_CODE_SIGN_DRS", Elements, + "code signing RDs data"))) + return; + } else if (Load.C.cmd == MachO::LC_CODE_SIGNATURE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignLoadCmd, + "LC_CODE_SIGNATURE", Elements, + "code signature data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO_ONLY", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_UUID) { + if (Load.C.cmdsize != sizeof(MachO::uuid_command)) { + Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect " + "cmdsize"); + return; + } + if (UuidLoadCmd) { + Err = malformedError("more than one LC_UUID command"); + return; + } + UuidLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_SEGMENT_64) { + if ((Err = parseSegmentLoadCommand<MachO::segment_command_64, + MachO::section_64>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT_64", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + if ((Err = parseSegmentLoadCommand<MachO::segment_command, + MachO::section>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_ID_DYLIB) { + if ((Err = checkDylibIdCommand(*this, Load, I, &DyldIdLoadCmd))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_WEAK_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LAZY_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_REEXPORT_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_UPWARD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_ID_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_ID_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_LOAD_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_ENVIRONMENT) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_DYLD_ENVIRONMENT"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_MACOSX) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_MACOSX"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_IPHONEOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_TVOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_TVOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_WATCHOS"))) + return; + } else if (Load.C.cmd == MachO::LC_NOTE) { + if ((Err = checkNoteCommand(*this, Load, I, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_BUILD_VERSION) { + if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I))) + return; + } else if (Load.C.cmd == MachO::LC_RPATH) { + if ((Err = checkRpathCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SOURCE_VERSION) { + if (Load.C.cmdsize != sizeof(MachO::source_version_command)) { + Err = malformedError("LC_SOURCE_VERSION command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (SourceLoadCmd) { + Err = malformedError("more than one LC_SOURCE_VERSION command"); + return; + } + SourceLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_MAIN) { + if (Load.C.cmdsize != sizeof(MachO::entry_point_command)) { + Err = malformedError("LC_MAIN command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (EntryPointLoadCmd) { + Err = malformedError("more than one LC_MAIN command"); + return; + } + EntryPointLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command)) { + Err = malformedError("LC_ENCRYPTION_INFO command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command E = + getStruct<MachO::encryption_info_command>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO"))) + return; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO_64) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command_64)) { + Err = malformedError("LC_ENCRYPTION_INFO_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command_64 E = + getStruct<MachO::encryption_info_command_64>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO_64"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTION) { + if ((Err = checkLinkerOptCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_FRAMEWORK) { + if (Load.C.cmdsize < sizeof(MachO::sub_framework_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_FRAMEWORK cmdsize too small"); + return; + } + MachO::sub_framework_command S = + getStruct<MachO::sub_framework_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_FRAMEWORK", + sizeof(MachO::sub_framework_command), + "sub_framework_command", S.umbrella, + "umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_UMBRELLA) { + if (Load.C.cmdsize < sizeof(MachO::sub_umbrella_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_UMBRELLA cmdsize too small"); + return; + } + MachO::sub_umbrella_command S = + getStruct<MachO::sub_umbrella_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_UMBRELLA", + sizeof(MachO::sub_umbrella_command), + "sub_umbrella_command", S.sub_umbrella, + "sub_umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_LIBRARY) { + if (Load.C.cmdsize < sizeof(MachO::sub_library_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_LIBRARY cmdsize too small"); + return; + } + MachO::sub_library_command S = + getStruct<MachO::sub_library_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_LIBRARY", + sizeof(MachO::sub_library_command), + "sub_library_command", S.sub_library, + "sub_library"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_CLIENT) { + if (Load.C.cmdsize < sizeof(MachO::sub_client_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_CLIENT cmdsize too small"); + return; + } + MachO::sub_client_command S = + getStruct<MachO::sub_client_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_CLIENT", + sizeof(MachO::sub_client_command), + "sub_client_command", S.client, "client"))) + return; + } else if (Load.C.cmd == MachO::LC_ROUTINES) { + if (Load.C.cmdsize != sizeof(MachO::routines_command)) { + Err = malformedError("LC_ROUTINES command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES and or LC_ROUTINES_64 " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ROUTINES_64) { + if (Load.C.cmdsize != sizeof(MachO::routines_command_64)) { + Err = malformedError("LC_ROUTINES_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES_64 and or LC_ROUTINES " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_UNIXTHREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_UNIXTHREAD"))) + return; + if (UnixThreadLoadCmd) { + Err = malformedError("more than one LC_UNIXTHREAD command"); + return; + } + UnixThreadLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_THREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_THREAD"))) + return; + // Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported. + } else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) { + if ((Err = checkTwoLevelHintsCommand(*this, Load, I, + &TwoLevelHintsLoadCmd, Elements))) + return; + } else if (isLoadCommandObsolete(Load.C.cmd)) { + Err = malformedError("load command " + Twine(I) + " for cmd value of: " + + Twine(Load.C.cmd) + " is obsolete and not " + "supported"); + return; + } + // TODO: generate a error for unknown load commands by default. But still + // need work out an approach to allow or not allow unknown values like this + // as an option for some uses like lldb. + if (I < LoadCommandCount - 1) { + if (auto LoadOrErr = getNextLoadCommandInfo(*this, I, Load)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } + } + } + if (!SymtabLoadCmd) { + if (DysymtabLoadCmd) { + Err = malformedError("contains LC_DYSYMTAB load command without a " + "LC_SYMTAB load command"); + return; + } + } else if (DysymtabLoadCmd) { + MachO::symtab_command Symtab = + getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); + MachO::dysymtab_command Dysymtab = + getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); + if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + Err = malformedError("ilocalsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + uint64_t BigSize = Dysymtab.ilocalsym; + BigSize += Dysymtab.nlocalsym; + if (Dysymtab.nlocalsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("ilocalsym plus nlocalsym in LC_DYSYMTAB load " + "command extends past the end of the symbol table"); + return; + } + if (Dysymtab.nextdefsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { + Err = malformedError("nextdefsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + BigSize = Dysymtab.iextdefsym; + BigSize += Dysymtab.nextdefsym; + if (Dysymtab.nextdefsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("iextdefsym plus nextdefsym in LC_DYSYMTAB " + "load command extends past the end of the symbol " + "table"); + return; + } + if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) { + Err = malformedError("nundefsym in LC_DYSYMTAB load command " + "extends past the end of the symbol table"); + return; + } + BigSize = Dysymtab.iundefsym; + BigSize += Dysymtab.nundefsym; + if (Dysymtab.nundefsym != 0 && BigSize > Symtab.nsyms) { + Err = malformedError("iundefsym plus nundefsym in LC_DYSYMTAB load " + " command extends past the end of the symbol table"); + return; + } + } + if ((getHeader().filetype == MachO::MH_DYLIB || + getHeader().filetype == MachO::MH_DYLIB_STUB) && + DyldIdLoadCmd == nullptr) { + Err = malformedError("no LC_ID_DYLIB load command in dynamic library " + "filetype"); + return; + } + assert(LoadCommands.size() == LoadCommandCount); + + Err = Error::success(); +} + +Error MachOObjectFile::checkSymbolTable() const { + uint32_t Flags = 0; + if (is64Bit()) { + MachO::mach_header_64 H_64 = MachOObjectFile::getHeader64(); + Flags = H_64.flags; + } else { + MachO::mach_header H = MachOObjectFile::getHeader(); + Flags = H.flags; + } + uint8_t NType = 0; + uint8_t NSect = 0; + uint16_t NDesc = 0; + uint32_t NStrx = 0; + uint64_t NValue = 0; + uint32_t SymbolIndex = 0; + MachO::symtab_command S = getSymtabLoadCommand(); + for (const SymbolRef &Symbol : symbols()) { + DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); + if (is64Bit()) { + MachO::nlist_64 STE_64 = getSymbol64TableEntry(SymDRI); + NType = STE_64.n_type; + NSect = STE_64.n_sect; + NDesc = STE_64.n_desc; + NStrx = STE_64.n_strx; + NValue = STE_64.n_value; + } else { + MachO::nlist STE = getSymbolTableEntry(SymDRI); + NType = STE.n_type; + NType = STE.n_type; + NSect = STE.n_sect; + NDesc = STE.n_desc; + NStrx = STE.n_strx; + NValue = STE.n_value; + } + if ((NType & MachO::N_STAB) == 0 && + (NType & MachO::N_TYPE) == MachO::N_SECT) { + if (NSect == 0 || NSect > Sections.size()) + return malformedError("bad section index: " + Twine((int)NSect) + + " for symbol at index " + Twine(SymbolIndex)); + } + if ((NType & MachO::N_STAB) == 0 && + (NType & MachO::N_TYPE) == MachO::N_INDR) { + if (NValue >= S.strsize) + return malformedError("bad n_value: " + Twine((int)NValue) + " past " + "the end of string table, for N_INDR symbol at " + "index " + Twine(SymbolIndex)); + } + if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL && + (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) || + (NType & MachO::N_TYPE) == MachO::N_PBUD)) { + uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc); + if (LibraryOrdinal != 0 && + LibraryOrdinal != MachO::EXECUTABLE_ORDINAL && + LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL && + LibraryOrdinal - 1 >= Libraries.size() ) { + return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) + + " for symbol at index " + Twine(SymbolIndex)); + } + } + if (NStrx >= S.strsize) + return malformedError("bad string table index: " + Twine((int)NStrx) + + " past the end of string table, for symbol at " + "index " + Twine(SymbolIndex)); + SymbolIndex++; + } + return Error::success(); +} + +void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + Symb.p += SymbolTableEntrySize; +} + +Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const { + StringRef StringTable = getStringTableData(); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + const char *Start = &StringTable.data()[Entry.n_strx]; + if (Start < getData().begin() || Start >= getData().end()) { + return malformedError("bad string index: " + Twine(Entry.n_strx) + + " for symbol at index " + Twine(getSymbolIndex(Symb))); + } + return StringRef(Start); +} + +unsigned MachOObjectFile::getSectionType(SectionRef Sec) const { + DataRefImpl DRI = Sec.getRawDataRefImpl(); + uint32_t Flags = getSectionFlags(*this, DRI); + return Flags & MachO::SECTION_TYPE; +} + +uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const { + if (is64Bit()) { + MachO::nlist_64 Entry = getSymbol64TableEntry(Sym); + return Entry.n_value; + } + MachO::nlist Entry = getSymbolTableEntry(Sym); + return Entry.n_value; +} + +// getIndirectName() returns the name of the alias'ed symbol who's string table +// index is in the n_value field. +std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, + StringRef &Res) const { + StringRef StringTable = getStringTableData(); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR) + return object_error::parse_failed; + uint64_t NValue = getNValue(Symb); + if (NValue >= StringTable.size()) + return object_error::parse_failed; + const char *Start = &StringTable.data()[NValue]; + Res = StringRef(Start); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const { + return getNValue(Sym); +} + +Expected<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const { + return getSymbolValue(Sym); +} + +uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { + uint32_t flags = getSymbolFlags(DRI); + if (flags & SymbolRef::SF_Common) { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); + return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); + } + return 0; +} + +uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const { + return getNValue(DRI); +} + +Expected<SymbolRef::Type> +MachOObjectFile::getSymbolType(DataRefImpl Symb) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + uint8_t n_type = Entry.n_type; + + // If this is a STAB debugging symbol, we can do nothing more. + if (n_type & MachO::N_STAB) + return SymbolRef::ST_Debug; + + switch (n_type & MachO::N_TYPE) { + case MachO::N_UNDF : + return SymbolRef::ST_Unknown; + case MachO::N_SECT : + Expected<section_iterator> SecOrError = getSymbolSection(Symb); + if (!SecOrError) + return SecOrError.takeError(); + section_iterator Sec = *SecOrError; + if (Sec->isData() || Sec->isBSS()) + return SymbolRef::ST_Data; + return SymbolRef::ST_Function; + } + return SymbolRef::ST_Other; +} + +uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); + + uint8_t MachOType = Entry.n_type; + uint16_t MachOFlags = Entry.n_desc; + + uint32_t Result = SymbolRef::SF_None; + + if ((MachOType & MachO::N_TYPE) == MachO::N_INDR) + Result |= SymbolRef::SF_Indirect; + + if (MachOType & MachO::N_STAB) + Result |= SymbolRef::SF_FormatSpecific; + + if (MachOType & MachO::N_EXT) { + Result |= SymbolRef::SF_Global; + if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { + if (getNValue(DRI)) + Result |= SymbolRef::SF_Common; + else + Result |= SymbolRef::SF_Undefined; + } + + if (!(MachOType & MachO::N_PEXT)) + Result |= SymbolRef::SF_Exported; + } + + if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) + Result |= SymbolRef::SF_Weak; + + if (MachOFlags & (MachO::N_ARM_THUMB_DEF)) + Result |= SymbolRef::SF_Thumb; + + if ((MachOType & MachO::N_TYPE) == MachO::N_ABS) + Result |= SymbolRef::SF_Absolute; + + return Result; +} + +Expected<section_iterator> +MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + uint8_t index = Entry.n_sect; + + if (index == 0) + return section_end(); + DataRefImpl DRI; + DRI.d.a = index - 1; + if (DRI.d.a >= Sections.size()){ + return malformedError("bad section index: " + Twine((int)index) + + " for symbol at index " + Twine(getSymbolIndex(Symb))); + } + return section_iterator(SectionRef(DRI, this)); +} + +unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const { + MachO::nlist_base Entry = + getSymbolTableEntryBase(*this, Sym.getRawDataRefImpl()); + return Entry.n_sect - 1; +} + +void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const { + Sec.d.a++; +} + +std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Result) const { + ArrayRef<char> Raw = getSectionRawName(Sec); + Result = parseSegmentOrSectionName(Raw.data()); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { + if (is64Bit()) + return getSection64(Sec).addr; + return getSection(Sec).addr; +} + +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + +uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { + // In the case if a malformed Mach-O file where the section offset is past + // the end of the file or some part of the section size is past the end of + // the file return a size of zero or a size that covers the rest of the file + // but does not extend past the end of the file. + uint32_t SectOffset, SectType; + uint64_t SectSize; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } else { + MachO::section Sect = getSection(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } + if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL) + return SectSize; + uint64_t FileSize = getData().size(); + if (SectOffset > FileSize) + return 0; + if (FileSize - SectOffset < SectSize) + return FileSize - SectOffset; + return SectSize; +} + +std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Res) const { + uint32_t Offset; + uint64_t Size; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.offset; + Size = Sect.size; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.offset; + Size = Sect.size; + } + + Res = this->getData().substr(Offset, Size); + return std::error_code(); +} + +uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { + uint32_t Align; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Align = Sect.align; + } else { + MachO::section Sect = getSection(Sec); + Align = Sect.align; + } + + return uint64_t(1) << Align; +} + +bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool MachOObjectFile::isSectionText(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; +} + +bool MachOObjectFile::isSectionData(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + !(SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); +} + +bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const { + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + (SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); +} + +unsigned MachOObjectFile::getSectionID(SectionRef Sec) const { + return Sec.getRawDataRefImpl().d.a; +} + +bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const { + // FIXME: Unimplemented. + return false; +} + +bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const { + StringRef SegmentName = getSectionFinalSegmentName(Sec); + StringRef SectName; + if (!getSectionName(Sec, SectName)) + return (SegmentName == "__LLVM" && SectName == "__bitcode"); + return false; +} + +relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const { + DataRefImpl Ret; + Ret.d.a = Sec.d.a; + Ret.d.b = 0; + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator +MachOObjectFile::section_rel_end(DataRefImpl Sec) const { + uint32_t Num; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Num = Sect.nreloc; + } else { + MachO::section Sect = getSection(Sec); + Num = Sect.nreloc; + } + + DataRefImpl Ret; + Ret.d.a = Sec.d.a; + Ret.d.b = Num; + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_begin() const { + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = 0; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + ++Rel.d.b; +} + +uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { + assert((getHeader().filetype == MachO::MH_OBJECT || + getHeader().filetype == MachO::MH_KEXT_BUNDLE) && + "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE"); + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationAddress(RE); +} + +symbol_iterator +MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + if (isRelocationScattered(RE)) + return symbol_end(); + + uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE); + bool isExtern = getPlainRelocationExternal(RE); + if (!isExtern) + return symbol_end(); + + MachO::symtab_command S = getSymtabLoadCommand(); + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize; + DataRefImpl Sym; + Sym.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return symbol_iterator(SymbolRef(Sym, this)); +} + +section_iterator +MachOObjectFile::getRelocationSection(DataRefImpl Rel) const { + return section_iterator(getAnyRelocationSection(getRelocation(Rel))); +} + +uint64_t MachOObjectFile::getRelocationType(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationType(RE); +} + +void MachOObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + StringRef res; + uint64_t RType = getRelocationType(Rel); + + unsigned Arch = this->getArch(); + + switch (Arch) { + case Triple::x86: { + static const char *const Table[] = { + "GENERIC_RELOC_VANILLA", + "GENERIC_RELOC_PAIR", + "GENERIC_RELOC_SECTDIFF", + "GENERIC_RELOC_PB_LA_PTR", + "GENERIC_RELOC_LOCAL_SECTDIFF", + "GENERIC_RELOC_TLV" }; + + if (RType > 5) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::x86_64: { + static const char *const Table[] = { + "X86_64_RELOC_UNSIGNED", + "X86_64_RELOC_SIGNED", + "X86_64_RELOC_BRANCH", + "X86_64_RELOC_GOT_LOAD", + "X86_64_RELOC_GOT", + "X86_64_RELOC_SUBTRACTOR", + "X86_64_RELOC_SIGNED_1", + "X86_64_RELOC_SIGNED_2", + "X86_64_RELOC_SIGNED_4", + "X86_64_RELOC_TLV" }; + + if (RType > 9) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::arm: { + static const char *const Table[] = { + "ARM_RELOC_VANILLA", + "ARM_RELOC_PAIR", + "ARM_RELOC_SECTDIFF", + "ARM_RELOC_LOCAL_SECTDIFF", + "ARM_RELOC_PB_LA_PTR", + "ARM_RELOC_BR24", + "ARM_THUMB_RELOC_BR22", + "ARM_THUMB_32BIT_BRANCH", + "ARM_RELOC_HALF", + "ARM_RELOC_HALF_SECTDIFF" }; + + if (RType > 9) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::aarch64: { + static const char *const Table[] = { + "ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR", + "ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21", + "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT", + "ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + "ARM64_RELOC_ADDEND" + }; + + if (RType >= array_lengthof(Table)) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::ppc: { + static const char *const Table[] = { + "PPC_RELOC_VANILLA", + "PPC_RELOC_PAIR", + "PPC_RELOC_BR14", + "PPC_RELOC_BR24", + "PPC_RELOC_HI16", + "PPC_RELOC_LO16", + "PPC_RELOC_HA16", + "PPC_RELOC_LO14", + "PPC_RELOC_SECTDIFF", + "PPC_RELOC_PB_LA_PTR", + "PPC_RELOC_HI16_SECTDIFF", + "PPC_RELOC_LO16_SECTDIFF", + "PPC_RELOC_HA16_SECTDIFF", + "PPC_RELOC_JBSR", + "PPC_RELOC_LO14_SECTDIFF", + "PPC_RELOC_LOCAL_SECTDIFF" }; + + if (RType > 15) + res = "Unknown"; + else + res = Table[RType]; + break; + } + case Triple::UnknownArch: + res = "Unknown"; + break; + } + Result.append(res.begin(), res.end()); +} + +uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationLength(RE); +} + +// +// guessLibraryShortName() is passed a name of a dynamic library and returns a +// guess on what the short name is. Then name is returned as a substring of the +// StringRef Name passed in. The name of the dynamic library is recognized as +// a framework if it has one of the two following forms: +// Foo.framework/Versions/A/Foo +// Foo.framework/Foo +// Where A and Foo can be any string. And may contain a trailing suffix +// starting with an underbar. If the Name is recognized as a framework then +// isFramework is set to true else it is set to false. If the Name has a +// suffix then Suffix is set to the substring in Name that contains the suffix +// else it is set to a NULL StringRef. +// +// The Name of the dynamic library is recognized as a library name if it has +// one of the two following forms: +// libFoo.A.dylib +// libFoo.dylib +// The library may have a suffix trailing the name Foo of the form: +// libFoo_profile.A.dylib +// libFoo_profile.dylib +// +// The Name of the dynamic library is also recognized as a library name if it +// has the following form: +// Foo.qtx +// +// If the Name of the dynamic library is none of the forms above then a NULL +// StringRef is returned. +// +StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, + bool &isFramework, + StringRef &Suffix) { + StringRef Foo, F, DotFramework, V, Dylib, Lib, Dot, Qtx; + size_t a, b, c, d, Idx; + + isFramework = false; + Suffix = StringRef(); + + // Pull off the last component and make Foo point to it + a = Name.rfind('/'); + if (a == Name.npos || a == 0) + goto guess_library; + Foo = Name.slice(a+1, Name.npos); + + // Look for a suffix starting with a '_' + Idx = Foo.rfind('_'); + if (Idx != Foo.npos && Foo.size() >= 2) { + Suffix = Foo.slice(Idx, Foo.npos); + Foo = Foo.slice(0, Idx); + } + + // First look for the form Foo.framework/Foo + b = Name.rfind('/', a); + if (b == Name.npos) + Idx = 0; + else + Idx = b+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + + // Next look for the form Foo.framework/Versions/A/Foo + if (b == Name.npos) + goto guess_library; + c = Name.rfind('/', b); + if (c == Name.npos || c == 0) + goto guess_library; + V = Name.slice(c+1, Name.npos); + if (!V.startswith("Versions/")) + goto guess_library; + d = Name.rfind('/', c); + if (d == Name.npos) + Idx = 0; + else + Idx = d+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + +guess_library: + // pull off the suffix after the "." and make a point to it + a = Name.rfind('.'); + if (a == Name.npos || a == 0) + return StringRef(); + Dylib = Name.slice(a, Name.npos); + if (Dylib != ".dylib") + goto guess_qtx; + + // First pull off the version letter for the form Foo.A.dylib if any. + if (a >= 3) { + Dot = Name.slice(a-2, a-1); + if (Dot == ".") + a = a - 2; + } + + b = Name.rfind('/', a); + if (b == Name.npos) + b = 0; + else + b = b+1; + // ignore any suffix after an underbar like Foo_profile.A.dylib + Idx = Name.find('_', b); + if (Idx != Name.npos && Idx != b) { + Lib = Name.slice(b, Idx); + Suffix = Name.slice(Idx, a); + } + else + Lib = Name.slice(b, a); + // There are incorrect library names of the form: + // libATS.A_profile.dylib so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; + +guess_qtx: + Qtx = Name.slice(a, Name.npos); + if (Qtx != ".qtx") + return StringRef(); + b = Name.rfind('/', a); + if (b == Name.npos) + Lib = Name.slice(0, a); + else + Lib = Name.slice(b+1, a); + // There are library names of the form: QT.A.qtx so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; +} + +// getLibraryShortNameByIndex() is used to get the short name of the library +// for an undefined symbol in a linked Mach-O binary that was linked with the +// normal two-level namespace default (that is MH_TWOLEVEL in the header). +// It is passed the index (0 - based) of the library as translated from +// GET_LIBRARY_ORDINAL (1 - based). +std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, + StringRef &Res) const { + if (Index >= Libraries.size()) + return object_error::parse_failed; + + // If the cache of LibrariesShortNames is not built up do that first for + // all the Libraries. + if (LibrariesShortNames.size() == 0) { + for (unsigned i = 0; i < Libraries.size(); i++) { + MachO::dylib_command D = + getStruct<MachO::dylib_command>(*this, Libraries[i]); + if (D.dylib.name >= D.cmdsize) + return object_error::parse_failed; + const char *P = (const char *)(Libraries[i]) + D.dylib.name; + StringRef Name = StringRef(P); + if (D.dylib.name+Name.size() >= D.cmdsize) + return object_error::parse_failed; + StringRef Suffix; + bool isFramework; + StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix); + if (shortName.empty()) + LibrariesShortNames.push_back(Name); + else + LibrariesShortNames.push_back(shortName); + } + } + + Res = LibrariesShortNames[Index]; + return std::error_code(); +} + +uint32_t MachOObjectFile::getLibraryCount() const { + return Libraries.size(); +} + +section_iterator +MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { + DataRefImpl Sec; + Sec.d.a = Rel->getRawDataRefImpl().d.a; + return section_iterator(SectionRef(Sec, this)); +} + +basic_symbol_iterator MachOObjectFile::symbol_begin() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + return getSymbolByIndex(0); +} + +basic_symbol_iterator MachOObjectFile::symbol_end() const { + DataRefImpl DRI; + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Symtab.nsyms == 0) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + unsigned Offset = Symtab.symoff + + Symtab.nsyms * SymbolTableEntrySize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + +basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const { + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd || Index >= Symtab.nsyms) + report_fatal_error("Requested symbol index is out of range."); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRI; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); + DRI.p += Index * SymbolTableEntrySize; + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + +uint64_t MachOObjectFile::getSymbolIndex(DataRefImpl Symb) const { + MachO::symtab_command Symtab = getSymtabLoadCommand(); + if (!SymtabLoadCmd) + report_fatal_error("getSymbolIndex() called with no symbol table symbol"); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DataRefImpl DRIstart; + DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); + uint64_t Index = (Symb.p - DRIstart.p) / SymbolTableEntrySize; + return Index; +} + +section_iterator MachOObjectFile::section_begin() const { + DataRefImpl DRI; + return section_iterator(SectionRef(DRI, this)); +} + +section_iterator MachOObjectFile::section_end() const { + DataRefImpl DRI; + DRI.d.a = Sections.size(); + return section_iterator(SectionRef(DRI, this)); +} + +uint8_t MachOObjectFile::getBytesInAddress() const { + return is64Bit() ? 8 : 4; +} + +StringRef MachOObjectFile::getFileFormatName() const { + unsigned CPUType = getCPUType(*this); + if (!is64Bit()) { + switch (CPUType) { + case MachO::CPU_TYPE_I386: + return "Mach-O 32-bit i386"; + case MachO::CPU_TYPE_ARM: + return "Mach-O arm"; + case MachO::CPU_TYPE_POWERPC: + return "Mach-O 32-bit ppc"; + default: + return "Mach-O 32-bit unknown"; + } + } + + switch (CPUType) { + case MachO::CPU_TYPE_X86_64: + return "Mach-O 64-bit x86-64"; + case MachO::CPU_TYPE_ARM64: + return "Mach-O arm64"; + case MachO::CPU_TYPE_POWERPC64: + return "Mach-O 64-bit ppc64"; + default: + return "Mach-O 64-bit unknown"; + } +} + +Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { + switch (CPUType) { + case MachO::CPU_TYPE_I386: + return Triple::x86; + case MachO::CPU_TYPE_X86_64: + return Triple::x86_64; + case MachO::CPU_TYPE_ARM: + return Triple::arm; + case MachO::CPU_TYPE_ARM64: + return Triple::aarch64; + case MachO::CPU_TYPE_POWERPC: + return Triple::ppc; + case MachO::CPU_TYPE_POWERPC64: + return Triple::ppc64; + default: + return Triple::UnknownArch; + } +} + +Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, + const char **McpuDefault, + const char **ArchFlag) { + if (McpuDefault) + *McpuDefault = nullptr; + if (ArchFlag) + *ArchFlag = nullptr; + + switch (CPUType) { + case MachO::CPU_TYPE_I386: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_I386_ALL: + if (ArchFlag) + *ArchFlag = "i386"; + return Triple("i386-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_X86_64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_X86_64_ALL: + if (ArchFlag) + *ArchFlag = "x86_64"; + return Triple("x86_64-apple-darwin"); + case MachO::CPU_SUBTYPE_X86_64_H: + if (ArchFlag) + *ArchFlag = "x86_64h"; + return Triple("x86_64h-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM_V4T: + if (ArchFlag) + *ArchFlag = "armv4t"; + return Triple("armv4t-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V5TEJ: + if (ArchFlag) + *ArchFlag = "armv5e"; + return Triple("armv5e-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_XSCALE: + if (ArchFlag) + *ArchFlag = "xscale"; + return Triple("xscale-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6: + if (ArchFlag) + *ArchFlag = "armv6"; + return Triple("armv6-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6M: + if (McpuDefault) + *McpuDefault = "cortex-m0"; + if (ArchFlag) + *ArchFlag = "armv6m"; + return Triple("armv6m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7: + if (ArchFlag) + *ArchFlag = "armv7"; + return Triple("armv7-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7EM: + if (McpuDefault) + *McpuDefault = "cortex-m4"; + if (ArchFlag) + *ArchFlag = "armv7em"; + return Triple("thumbv7em-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7K: + if (McpuDefault) + *McpuDefault = "cortex-a7"; + if (ArchFlag) + *ArchFlag = "armv7k"; + return Triple("armv7k-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7M: + if (McpuDefault) + *McpuDefault = "cortex-m3"; + if (ArchFlag) + *ArchFlag = "armv7m"; + return Triple("thumbv7m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7S: + if (McpuDefault) + *McpuDefault = "cortex-a7"; + if (ArchFlag) + *ArchFlag = "armv7s"; + return Triple("armv7s-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM64_ALL: + if (McpuDefault) + *McpuDefault = "cyclone"; + if (ArchFlag) + *ArchFlag = "arm64"; + return Triple("arm64-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc"; + return Triple("ppc-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc64"; + return Triple("ppc64-apple-darwin"); + default: + return Triple(); + } + default: + return Triple(); + } +} + +Triple MachOObjectFile::getHostArch() { + return Triple(sys::getDefaultTargetTriple()); +} + +bool MachOObjectFile::isValidArch(StringRef ArchFlag) { + return StringSwitch<bool>(ArchFlag) + .Case("i386", true) + .Case("x86_64", true) + .Case("x86_64h", true) + .Case("armv4t", true) + .Case("arm", true) + .Case("armv5e", true) + .Case("armv6", true) + .Case("armv6m", true) + .Case("armv7", true) + .Case("armv7em", true) + .Case("armv7k", true) + .Case("armv7m", true) + .Case("armv7s", true) + .Case("arm64", true) + .Case("ppc", true) + .Case("ppc64", true) + .Default(false); +} + +unsigned MachOObjectFile::getArch() const { + return getArch(getCPUType(*this)); +} + +Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const { + return getArchTriple(Header.cputype, Header.cpusubtype, McpuDefault); +} + +relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return section_rel_begin(DRI); +} + +relocation_iterator MachOObjectFile::section_rel_end(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return section_rel_end(DRI); +} + +dice_iterator MachOObjectFile::begin_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, DicLC.dataoff)); + return dice_iterator(DiceRef(DRI, this)); +} + +dice_iterator MachOObjectFile::end_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + unsigned Offset = DicLC.dataoff + DicLC.datasize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); + return dice_iterator(DiceRef(DRI, this)); +} + +ExportEntry::ExportEntry(ArrayRef<uint8_t> T) : Trie(T) {} + +void ExportEntry::moveToFirst() { + pushNode(0); + pushDownUntilBottom(); +} + +void ExportEntry::moveToEnd() { + Stack.clear(); + Done = true; +} + +bool ExportEntry::operator==(const ExportEntry &Other) const { + // Common case, one at end, other iterating from begin. + if (Done || Other.Done) + return (Done == Other.Done); + // Not equal if different stack sizes. + if (Stack.size() != Other.Stack.size()) + return false; + // Not equal if different cumulative strings. + if (!CumulativeString.equals(Other.CumulativeString)) + return false; + // Equal if all nodes in both stacks match. + for (unsigned i=0; i < Stack.size(); ++i) { + if (Stack[i].Start != Other.Stack[i].Start) + return false; + } + return true; +} + +uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + if (Ptr > Trie.end()) { + Ptr = Trie.end(); + Malformed = true; + } + return Result; +} + +StringRef ExportEntry::name() const { + return CumulativeString; +} + +uint64_t ExportEntry::flags() const { + return Stack.back().Flags; +} + +uint64_t ExportEntry::address() const { + return Stack.back().Address; +} + +uint64_t ExportEntry::other() const { + return Stack.back().Other; +} + +StringRef ExportEntry::otherName() const { + const char* ImportName = Stack.back().ImportName; + if (ImportName) + return StringRef(ImportName); + return StringRef(); +} + +uint32_t ExportEntry::nodeOffset() const { + return Stack.back().Start - Trie.begin(); +} + +ExportEntry::NodeState::NodeState(const uint8_t *Ptr) + : Start(Ptr), Current(Ptr) {} + +void ExportEntry::pushNode(uint64_t offset) { + const uint8_t *Ptr = Trie.begin() + offset; + NodeState State(Ptr); + uint64_t ExportInfoSize = readULEB128(State.Current); + State.IsExportNode = (ExportInfoSize != 0); + const uint8_t* Children = State.Current + ExportInfoSize; + if (State.IsExportNode) { + State.Flags = readULEB128(State.Current); + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + State.Address = 0; + State.Other = readULEB128(State.Current); // dylib ordinal + State.ImportName = reinterpret_cast<const char*>(State.Current); + } else { + State.Address = readULEB128(State.Current); + if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + State.Other = readULEB128(State.Current); + } + } + State.ChildCount = *Children; + State.Current = Children + 1; + State.NextChildIndex = 0; + State.ParentStringLength = CumulativeString.size(); + Stack.push_back(State); +} + +void ExportEntry::pushDownUntilBottom() { + while (Stack.back().NextChildIndex < Stack.back().ChildCount) { + NodeState &Top = Stack.back(); + CumulativeString.resize(Top.ParentStringLength); + for (;*Top.Current != 0; Top.Current++) { + char C = *Top.Current; + CumulativeString.push_back(C); + } + Top.Current += 1; + uint64_t childNodeIndex = readULEB128(Top.Current); + Top.NextChildIndex += 1; + pushNode(childNodeIndex); + } + if (!Stack.back().IsExportNode) { + Malformed = true; + moveToEnd(); + } +} + +// We have a trie data structure and need a way to walk it that is compatible +// with the C++ iterator model. The solution is a non-recursive depth first +// traversal where the iterator contains a stack of parent nodes along with a +// string that is the accumulation of all edge strings along the parent chain +// to this point. +// +// There is one "export" node for each exported symbol. But because some +// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export +// node may have child nodes too. +// +// The algorithm for moveNext() is to keep moving down the leftmost unvisited +// child until hitting a node with no children (which is an export node or +// else the trie is malformed). On the way down, each node is pushed on the +// stack ivar. If there is no more ways down, it pops up one and tries to go +// down a sibling path until a childless node is reached. +void ExportEntry::moveNext() { + if (Stack.empty() || !Stack.back().IsExportNode) { + Malformed = true; + moveToEnd(); + return; + } + + Stack.pop_back(); + while (!Stack.empty()) { + NodeState &Top = Stack.back(); + if (Top.NextChildIndex < Top.ChildCount) { + pushDownUntilBottom(); + // Now at the next export node. + return; + } else { + if (Top.IsExportNode) { + // This node has no children but is itself an export node. + CumulativeString.resize(Top.ParentStringLength); + return; + } + Stack.pop_back(); + } + } + Done = true; +} + +iterator_range<export_iterator> +MachOObjectFile::exports(ArrayRef<uint8_t> Trie) { + ExportEntry Start(Trie); + if (Trie.empty()) + Start.moveToEnd(); + else + Start.moveToFirst(); + + ExportEntry Finish(Trie); + Finish.moveToEnd(); + + return make_range(export_iterator(Start), export_iterator(Finish)); +} + +iterator_range<export_iterator> MachOObjectFile::exports() const { + return exports(getDyldInfoExportsTrie()); +} + +MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} + +void MachORebaseEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachORebaseEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachORebaseEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); + // If in the middle of some loop, move to next rebasing in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + // REBASE_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen REBASE_OPCODE_DONE. + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More) { + // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + uint32_t Count, Skip; + const char *error = nullptr; + switch (Opcode) { + case MachO::REBASE_OPCODE_DONE: + More = false; + Done = true; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); + break; + case MachO::REBASE_OPCODE_SET_TYPE_IMM: + RebaseType = ImmValue; + if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) { + *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)RebaseType) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); + break; + case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-rebase", + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + SegmentOffset += ImmValue * PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + false); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-rebase", + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = PointerSize; + Skip = 0; + Count = ImmValue; + if (ImmValue != 0) + RemainingLoopCount = ImmValue - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = PointerSize; + Skip = 0; + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + Count = 1; + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-rebase", + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + *E = malformedError("bad rebase info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + } +} + +uint64_t MachORebaseEntry::readULEB128(const char **error) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); + Ptr += Count; + if (Ptr > Opcodes.end()) + Ptr = Opcodes.end(); + return Result; +} + +int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachORebaseEntry::typeName() const { + switch (RebaseType) { + case MachO::REBASE_TYPE_POINTER: + return "pointer"; + case MachO::REBASE_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::REBASE_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +// For use with the SegIndex of a checked Mach-O Rebase entry +// to get the segment name. +StringRef MachORebaseEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the section name. +StringRef MachORebaseEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the address. +uint64_t MachORebaseEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + +bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { +#ifdef EXPENSIVE_CHECKS + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +iterator_range<rebase_iterator> +MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachORebaseEntry Start(&Err, O, Opcodes, is64); + Start.moveToFirst(); + + MachORebaseEntry Finish(&Err, O, Opcodes, is64); + Finish.moveToEnd(); + + return make_range(rebase_iterator(Start), rebase_iterator(Finish)); +} + +iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) { + return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit()); +} + +MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} + +void MachOBindEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachOBindEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachOBindEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); + // If in the middle of some loop, move to next binding in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + // BIND_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen BIND_OPCODE_DONE. + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More) { + // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK; + int8_t SignExtended; + const uint8_t *SymStart; + uint32_t Count, Skip; + const char *error = nullptr; + switch (Opcode) { + case MachO::BIND_OPCODE_DONE: + if (TableKind == Kind::Lazy) { + // Lazying bindings have a DONE opcode between entries. Need to ignore + // it to advance to next entry. But need not if this is last entry. + bool NotLastEntry = false; + for (const uint8_t *P = Ptr; P < Opcodes.end(); ++P) { + if (*P) { + NotLastEntry = true; + } + } + if (NotLastEntry) + break; + } + More = false; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = ImmValue; + LibraryOrdinalSet = true; + if (ImmValue > O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)ImmValue) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = readULEB128(&error); + LibraryOrdinalSet = true; + if (error) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Ordinal > (int)O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)Ordinal) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (ImmValue) { + SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; + Ordinal = SignExtended; + if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " + "special ordinal: " + Twine((int)Ordinal) + " for opcode at: " + "0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + } else + Ordinal = 0; + LibraryOrdinalSet = true; + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); + break; + case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: + Flags = ImmValue; + SymStart = Ptr; + while (*Ptr && (Ptr < Opcodes.end())) { + ++Ptr; + } + if (Ptr == Opcodes.end()) { + *E = malformedError("for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM " + "symbol name extends past opcodes for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + SymbolName = StringRef(reinterpret_cast<const char*>(SymStart), + Ptr-SymStart); + ++Ptr; + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); + if (TableKind == Kind::Weak) { + if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) + return; + } + break; + case MachO::BIND_OPCODE_SET_TYPE_IMM: + BindType = ImmValue; + if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) { + *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)ImmValue) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); + break; + case MachO::BIND_OPCODE_SET_ADDEND_SLEB: + Addend = readSLEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); + break; + case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::BIND_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-bind", + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::BIND_OPCODE_DO_BIND: + AdvanceAmount = PointerSize; + RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) + + " for opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-bind", + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in " + "lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = readULEB128(&error) + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + // Note, this is not really an error until the next bind but make no sense + // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another + // bind operation. + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding " + "ULEB) " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + RemainingLoopCount = 0; + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = ImmValue * PointerSize + PointerSize; + RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE("mach-o-bind", + dbgs() + << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); + return; + case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (count value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + AdvanceAmount = Skip + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (skip value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + DEBUG_WITH_TYPE( + "mach-o-bind", + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + *E = malformedError("bad bind info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + } +} + +uint64_t MachOBindEntry::readULEB128(const char **error) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); + Ptr += Count; + if (Ptr > Opcodes.end()) + Ptr = Opcodes.end(); + return Result; +} + +int64_t MachOBindEntry::readSLEB128(const char **error) { + unsigned Count; + int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error); + Ptr += Count; + if (Ptr > Opcodes.end()) + Ptr = Opcodes.end(); + return Result; +} + +int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachOBindEntry::typeName() const { + switch (BindType) { + case MachO::BIND_TYPE_POINTER: + return "pointer"; + case MachO::BIND_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::BIND_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +StringRef MachOBindEntry::symbolName() const { return SymbolName; } + +int64_t MachOBindEntry::addend() const { return Addend; } + +uint32_t MachOBindEntry::flags() const { return Flags; } + +int MachOBindEntry::ordinal() const { return Ordinal; } + +// For use with the SegIndex of a checked Mach-O Bind entry +// to get the segment name. +StringRef MachOBindEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the section name. +StringRef MachOBindEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the address. +uint64_t MachOBindEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + +bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { +#ifdef EXPENSIVE_CHECKS + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +// Build table of sections so SegIndex/SegOffset pairs can be translated. +BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { + uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0; + StringRef CurSegName; + uint64_t CurSegAddress; + for (const SectionRef &Section : Obj->sections()) { + SectionInfo Info; + Section.getName(Info.SectionName); + Info.Address = Section.getAddress(); + Info.Size = Section.getSize(); + Info.SegmentName = + Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); + if (!Info.SegmentName.equals(CurSegName)) { + ++CurSegIndex; + CurSegName = Info.SegmentName; + CurSegAddress = Info.Address; + } + Info.SegmentIndex = CurSegIndex - 1; + Info.OffsetInSegment = Info.Address - CurSegAddress; + Info.SegmentStartAddress = CurSegAddress; + Sections.push_back(Info); + } + MaxSegIndex = CurSegIndex; +} + +// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to +// validate a MachOBindEntry or MachORebaseEntry. +const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex, + uint64_t SegOffset, + bool endInvalid) { + if (SegIndex == -1) + return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB"; + if (SegIndex >= MaxSegIndex) + return "bad segIndex (too large)"; + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset > (SI.OffsetInSegment + SI.Size)) + continue; + if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return nullptr; + } + return "bad segOffset, too large"; +} + +// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for +// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in +// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for +// REBASE_OPCODE_DO_*_TIMES* opcodes. The SegIndex and SegOffset must have +// been already checked. +const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip, + uint8_t PointerSize, + int32_t SegIndex, + uint64_t SegOffset) { + const SectionInfo &SI = findSection(SegIndex, SegOffset); + uint64_t addr = SI.SegmentStartAddress + SegOffset; + if (addr >= SI.Address + SI.Size) + return "bad segOffset, too large"; + uint64_t i = 0; + if (Count > 1) + i = (Skip + PointerSize) * (Count - 1); + else if (Count == 1) + i = Skip + PointerSize; + if (addr + i >= SI.Address + SI.Size) { + // For rebase opcodes they can step from one section to another. + uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress; + const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false); + if (error) + return "bad count and skip, too large"; + } + return nullptr; +} + +// For use with the SegIndex of a checked Mach-O Bind or Rebase entry +// to get the segment name. +StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex == SegIndex) + return SI.SegmentName; + } + llvm_unreachable("invalid SegIndex"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// to get the SectionInfo. +const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection( + int32_t SegIndex, uint64_t SegOffset) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return SI; + } + llvm_unreachable("SegIndex and SegOffset not in any section"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the section name. +StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex, + uint64_t SegOffset) { + return findSection(SegIndex, SegOffset).SectionName; +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the address. +uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { + const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); + return SI.SegmentStartAddress + OffsetInSeg; +} + +iterator_range<bind_iterator> +MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64, + MachOBindEntry::Kind BKind) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); + Start.moveToFirst(); + + MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind); + Finish.moveToEnd(); + + return make_range(bind_iterator(Start), bind_iterator(Finish)); +} + +iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Regular); +} + +iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Lazy); +} + +iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(), + MachOBindEntry::Kind::Weak); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::begin_load_commands() const { + return LoadCommands.begin(); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::end_load_commands() const { + return LoadCommands.end(); +} + +iterator_range<MachOObjectFile::load_command_iterator> +MachOObjectFile::load_commands() const { + return make_range(begin_load_commands(), end_load_commands()); +} + +StringRef +MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { + ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec); + return parseSegmentOrSectionName(Raw.data()); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawName(DataRefImpl Sec) const { + assert(Sec.d.a < Sections.size() && "Should have detected this earlier"); + const section_base *Base = + reinterpret_cast<const section_base *>(Sections[Sec.d.a]); + return makeArrayRef(Base->sectname); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const { + assert(Sec.d.a < Sections.size() && "Should have detected this earlier"); + const section_base *Base = + reinterpret_cast<const section_base *>(Sections[Sec.d.a]); + return makeArrayRef(Base->segname); +} + +bool +MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE) + const { + if (getCPUType(*this) == MachO::CPU_TYPE_X86_64) + return false; + return getPlainRelocationAddress(RE) & MachO::R_SCATTERED; +} + +unsigned MachOObjectFile::getPlainRelocationSymbolNum( + const MachO::any_relocation_info &RE) const { + if (isLittleEndian()) + return RE.r_word1 & 0xffffff; + return RE.r_word1 >> 8; +} + +bool MachOObjectFile::getPlainRelocationExternal( + const MachO::any_relocation_info &RE) const { + if (isLittleEndian()) + return (RE.r_word1 >> 27) & 1; + return (RE.r_word1 >> 4) & 1; +} + +bool MachOObjectFile::getScatteredRelocationScattered( + const MachO::any_relocation_info &RE) const { + return RE.r_word0 >> 31; +} + +uint32_t MachOObjectFile::getScatteredRelocationValue( + const MachO::any_relocation_info &RE) const { + return RE.r_word1; +} + +uint32_t MachOObjectFile::getScatteredRelocationType( + const MachO::any_relocation_info &RE) const { + return (RE.r_word0 >> 24) & 0xf; +} + +unsigned MachOObjectFile::getAnyRelocationAddress( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationAddress(RE); + return getPlainRelocationAddress(RE); +} + +unsigned MachOObjectFile::getAnyRelocationPCRel( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationPCRel(RE); + return getPlainRelocationPCRel(*this, RE); +} + +unsigned MachOObjectFile::getAnyRelocationLength( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationLength(RE); + return getPlainRelocationLength(*this, RE); +} + +unsigned +MachOObjectFile::getAnyRelocationType( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationType(RE); + return getPlainRelocationType(*this, RE); +} + +SectionRef +MachOObjectFile::getAnyRelocationSection( + const MachO::any_relocation_info &RE) const { + if (isRelocationScattered(RE) || getPlainRelocationExternal(RE)) + return *section_end(); + unsigned SecNum = getPlainRelocationSymbolNum(RE); + if (SecNum == MachO::R_ABS || SecNum > Sections.size()) + return *section_end(); + DataRefImpl DRI; + DRI.d.a = SecNum - 1; + return SectionRef(DRI, this); +} + +MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const { + assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); + return getStruct<MachO::section>(*this, Sections[DRI.d.a]); +} + +MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const { + assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); + return getStruct<MachO::section_64>(*this, Sections[DRI.d.a]); +} + +MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section>(*this, Sec); +} + +MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section_64>(*this, Sec); +} + +MachO::nlist +MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist>(*this, P); +} + +MachO::nlist_64 +MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<MachO::nlist_64>(*this, P); +} + +MachO::linkedit_data_command +MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::linkedit_data_command>(*this, L.Ptr); +} + +MachO::segment_command +MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::segment_command>(*this, L.Ptr); +} + +MachO::segment_command_64 +MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::segment_command_64>(*this, L.Ptr); +} + +MachO::linker_option_command +MachOObjectFile::getLinkerOptionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::linker_option_command>(*this, L.Ptr); +} + +MachO::version_min_command +MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::version_min_command>(*this, L.Ptr); +} + +MachO::note_command +MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::note_command>(*this, L.Ptr); +} + +MachO::build_version_command +MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::build_version_command>(*this, L.Ptr); +} + +MachO::build_tool_version +MachOObjectFile::getBuildToolVersion(unsigned index) const { + return getStruct<MachO::build_tool_version>(*this, BuildTools[index]); +} + +MachO::dylib_command +MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylib_command>(*this, L.Ptr); +} + +MachO::dyld_info_command +MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dyld_info_command>(*this, L.Ptr); +} + +MachO::dylinker_command +MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylinker_command>(*this, L.Ptr); +} + +MachO::uuid_command +MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::uuid_command>(*this, L.Ptr); +} + +MachO::rpath_command +MachOObjectFile::getRpathCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::rpath_command>(*this, L.Ptr); +} + +MachO::source_version_command +MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::source_version_command>(*this, L.Ptr); +} + +MachO::entry_point_command +MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::entry_point_command>(*this, L.Ptr); +} + +MachO::encryption_info_command +MachOObjectFile::getEncryptionInfoCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::encryption_info_command>(*this, L.Ptr); +} + +MachO::encryption_info_command_64 +MachOObjectFile::getEncryptionInfoCommand64(const LoadCommandInfo &L) const { + return getStruct<MachO::encryption_info_command_64>(*this, L.Ptr); +} + +MachO::sub_framework_command +MachOObjectFile::getSubFrameworkCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_framework_command>(*this, L.Ptr); +} + +MachO::sub_umbrella_command +MachOObjectFile::getSubUmbrellaCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_umbrella_command>(*this, L.Ptr); +} + +MachO::sub_library_command +MachOObjectFile::getSubLibraryCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_library_command>(*this, L.Ptr); +} + +MachO::sub_client_command +MachOObjectFile::getSubClientCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::sub_client_command>(*this, L.Ptr); +} + +MachO::routines_command +MachOObjectFile::getRoutinesCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::routines_command>(*this, L.Ptr); +} + +MachO::routines_command_64 +MachOObjectFile::getRoutinesCommand64(const LoadCommandInfo &L) const { + return getStruct<MachO::routines_command_64>(*this, L.Ptr); +} + +MachO::thread_command +MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::thread_command>(*this, L.Ptr); +} + +MachO::any_relocation_info +MachOObjectFile::getRelocation(DataRefImpl Rel) const { + uint32_t Offset; + if (getHeader().filetype == MachO::MH_OBJECT) { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } + } else { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations + } + + auto P = reinterpret_cast<const MachO::any_relocation_info *>( + getPtr(*this, Offset)) + Rel.d.b; + return getStruct<MachO::any_relocation_info>( + *this, reinterpret_cast<const char *>(P)); +} + +MachO::data_in_code_entry +MachOObjectFile::getDice(DataRefImpl Rel) const { + const char *P = reinterpret_cast<const char *>(Rel.p); + return getStruct<MachO::data_in_code_entry>(*this, P); +} + +const MachO::mach_header &MachOObjectFile::getHeader() const { + return Header; +} + +const MachO::mach_header_64 &MachOObjectFile::getHeader64() const { + assert(is64Bit()); + return Header64; +} + +uint32_t MachOObjectFile::getIndirectSymbolTableEntry( + const MachO::dysymtab_command &DLC, + unsigned Index) const { + uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t); + return getStruct<uint32_t>(*this, getPtr(*this, Offset)); +} + +MachO::data_in_code_entry +MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset, + unsigned Index) const { + uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry); + return getStruct<MachO::data_in_code_entry>(*this, getPtr(*this, Offset)); +} + +MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const { + if (SymtabLoadCmd) + return getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); + + // If there is no SymtabLoadCmd return a load command with zero'ed fields. + MachO::symtab_command Cmd; + Cmd.cmd = MachO::LC_SYMTAB; + Cmd.cmdsize = sizeof(MachO::symtab_command); + Cmd.symoff = 0; + Cmd.nsyms = 0; + Cmd.stroff = 0; + Cmd.strsize = 0; + return Cmd; +} + +MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const { + if (DysymtabLoadCmd) + return getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); + + // If there is no DysymtabLoadCmd return a load command with zero'ed fields. + MachO::dysymtab_command Cmd; + Cmd.cmd = MachO::LC_DYSYMTAB; + Cmd.cmdsize = sizeof(MachO::dysymtab_command); + Cmd.ilocalsym = 0; + Cmd.nlocalsym = 0; + Cmd.iextdefsym = 0; + Cmd.nextdefsym = 0; + Cmd.iundefsym = 0; + Cmd.nundefsym = 0; + Cmd.tocoff = 0; + Cmd.ntoc = 0; + Cmd.modtaboff = 0; + Cmd.nmodtab = 0; + Cmd.extrefsymoff = 0; + Cmd.nextrefsyms = 0; + Cmd.indirectsymoff = 0; + Cmd.nindirectsyms = 0; + Cmd.extreloff = 0; + Cmd.nextrel = 0; + Cmd.locreloff = 0; + Cmd.nlocrel = 0; + return Cmd; +} + +MachO::linkedit_data_command +MachOObjectFile::getDataInCodeLoadCommand() const { + if (DataInCodeLoadCmd) + return getStruct<MachO::linkedit_data_command>(*this, DataInCodeLoadCmd); + + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_DATA_IN_CODE; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; +} + +MachO::linkedit_data_command +MachOObjectFile::getLinkOptHintsLoadCommand() const { + if (LinkOptHintsLoadCmd) + return getStruct<MachO::linkedit_data_command>(*this, LinkOptHintsLoadCmd); + + // If there is no LinkOptHintsLoadCmd return a load command with zero'ed + // fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_LINKER_OPTIMIZATION_HINT; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off)); + return makeArrayRef(Ptr, DyldInfo.rebase_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off)); + return makeArrayRef(Ptr, DyldInfo.bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off)); + return makeArrayRef(Ptr, DyldInfo.weak_bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off)); + return makeArrayRef(Ptr, DyldInfo.lazy_bind_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const { + if (!DyldInfoLoadCmd) + return None; + + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off)); + return makeArrayRef(Ptr, DyldInfo.export_size); +} + +ArrayRef<uint8_t> MachOObjectFile::getUuid() const { + if (!UuidLoadCmd) + return None; + // Returning a pointer is fine as uuid doesn't need endian swapping. + const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid); + return makeArrayRef(reinterpret_cast<const uint8_t *>(Ptr), 16); +} + +StringRef MachOObjectFile::getStringTableData() const { + MachO::symtab_command S = getSymtabLoadCommand(); + return getData().substr(S.stroff, S.strsize); +} + +bool MachOObjectFile::is64Bit() const { + return getType() == getMachOType(false, true) || + getType() == getMachOType(true, true); +} + +void MachOObjectFile::ReadULEB128s(uint64_t Index, + SmallVectorImpl<uint64_t> &Out) const { + DataExtractor extractor(ObjectFile::getData(), true, 0); + + uint32_t offset = Index; + uint64_t data = 0; + while (uint64_t delta = extractor.getULEB128(&offset)) { + data += delta; + Out.push_back(data); + } +} + +bool MachOObjectFile::isRelocatableObject() const { + return getHeader().filetype == MachO::MH_OBJECT; +} + +Expected<std::unique_ptr<MachOObjectFile>> +ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, + uint32_t UniversalCputype, + uint32_t UniversalIndex) { + StringRef Magic = Buffer.getBuffer().slice(0, 4); + if (Magic == "\xFE\xED\xFA\xCE") + return MachOObjectFile::create(Buffer, false, false, + UniversalCputype, UniversalIndex); + if (Magic == "\xCE\xFA\xED\xFE") + return MachOObjectFile::create(Buffer, true, false, + UniversalCputype, UniversalIndex); + if (Magic == "\xFE\xED\xFA\xCF") + return MachOObjectFile::create(Buffer, false, true, + UniversalCputype, UniversalIndex); + if (Magic == "\xCF\xFA\xED\xFE") + return MachOObjectFile::create(Buffer, true, true, + UniversalCputype, UniversalIndex); + return make_error<GenericBinaryError>("Unrecognized MachO magic number", + object_error::invalid_file_type); +} + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch<StringRef>(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} diff --git a/contrib/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm/lib/Object/MachOUniversal.cpp new file mode 100644 index 000000000000..309708e9b37c --- /dev/null +++ b/contrib/llvm/lib/Object/MachOUniversal.cpp @@ -0,0 +1,228 @@ +//===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOUniversalBinary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace object; + +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed fat file (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +template<typename T> +static T getUniversalBinaryStruct(const char *Ptr) { + T Res; + memcpy(&Res, Ptr, sizeof(T)); + // Universal binary headers have big-endian byte order. + if (sys::IsLittleEndianHost) + swapStruct(Res); + return Res; +} + +MachOUniversalBinary::ObjectForArch::ObjectForArch( + const MachOUniversalBinary *Parent, uint32_t Index) + : Parent(Parent), Index(Index) { + // The iterators use Parent as a nullptr and an Index+1 == NumberOfObjects. + if (!Parent || Index >= Parent->getNumberOfObjects()) { + clear(); + } else { + // Parse object header. + StringRef ParentData = Parent->getData(); + if (Parent->getMagic() == MachO::FAT_MAGIC) { + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch); + Header = getUniversalBinaryStruct<MachO::fat_arch>(HeaderPos); + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch_64); + Header64 = getUniversalBinaryStruct<MachO::fat_arch_64>(HeaderPos); + } + } +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOUniversalBinary::ObjectForArch::getAsObjectFile() const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsObjectFile() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + uint32_t cputype; + if (Parent->getMagic() == MachO::FAT_MAGIC) { + ObjectData = ParentData.substr(Header.offset, Header.size); + cputype = Header.cputype; + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + cputype = Header64.cputype; + } + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return ObjectFile::createMachOObjectFile(ObjBuffer, cputype, Index); +} + +Expected<std::unique_ptr<Archive>> +MachOUniversalBinary::ObjectForArch::getAsArchive() const { + if (!Parent) + report_fatal_error("MachOUniversalBinary::ObjectForArch::getAsArchive() " + "called when Parent is a nullptr"); + + StringRef ParentData = Parent->getData(); + StringRef ObjectData; + if (Parent->getMagic() == MachO::FAT_MAGIC) + ObjectData = ParentData.substr(Header.offset, Header.size); + else // Parent->getMagic() == MachO::FAT_MAGIC_64 + ObjectData = ParentData.substr(Header64.offset, Header64.size); + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return Archive::create(ObjBuffer); +} + +void MachOUniversalBinary::anchor() { } + +Expected<std::unique_ptr<MachOUniversalBinary>> +MachOUniversalBinary::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr<MachOUniversalBinary> Ret( + new MachOUniversalBinary(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} + +MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) + : Binary(Binary::ID_MachOUniversalBinary, Source), Magic(0), + NumberOfObjects(0) { + ErrorAsOutParameter ErrAsOutParam(&Err); + if (Data.getBufferSize() < sizeof(MachO::fat_header)) { + Err = make_error<GenericBinaryError>("File too small to be a Mach-O " + "universal file", + object_error::invalid_file_type); + return; + } + // Check for magic value and sufficient header size. + StringRef Buf = getData(); + MachO::fat_header H = + getUniversalBinaryStruct<MachO::fat_header>(Buf.begin()); + Magic = H.magic; + NumberOfObjects = H.nfat_arch; + if (NumberOfObjects == 0) { + Err = malformedError("contains zero architecture types"); + return; + } + uint32_t MinSize = sizeof(MachO::fat_header); + if (Magic == MachO::FAT_MAGIC) + MinSize += sizeof(MachO::fat_arch) * NumberOfObjects; + else if (Magic == MachO::FAT_MAGIC_64) + MinSize += sizeof(MachO::fat_arch_64) * NumberOfObjects; + else { + Err = malformedError("bad magic number"); + return; + } + if (Buf.size() < MinSize) { + Err = malformedError("fat_arch" + + Twine(Magic == MachO::FAT_MAGIC ? "" : "_64") + + " structs would extend past the end of the file"); + return; + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + uint64_t bigSize = A.getOffset(); + bigSize += A.getSize(); + if (bigSize > Buf.size()) { + Err = malformedError("offset plus size of cputype (" + + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") extends past the end of the file"); + return; + } +#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */ + if (A.getAlign() > MAXSECTALIGN) { + Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large " + "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") (maximum 2^" + Twine(MAXSECTALIGN) + ")"); + return; + } + if(A.getOffset() % (1 << A.getAlign()) != 0){ + Err = malformedError("offset: " + Twine(A.getOffset()) + + " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") not aligned on it's alignment (2^" + Twine(A.getAlign()) + ")"); + return; + } + if (A.getOffset() < MinSize) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") offset " + Twine(A.getOffset()) + " overlaps universal headers"); + return; + } + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + for (uint32_t j = i + 1; j < NumberOfObjects; j++) { + ObjectForArch B(this, j); + if (A.getCPUType() == B.getCPUType() && + (A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) == + (B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK)) { + Err = malformedError("contains two of the same architecture (cputype " + "(" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + "))"); + return; + } + if ((A.getOffset() >= B.getOffset() && + A.getOffset() < B.getOffset() + B.getSize()) || + (A.getOffset() + A.getSize() > B.getOffset() && + A.getOffset() + A.getSize() < B.getOffset() + B.getSize()) || + (A.getOffset() <= B.getOffset() && + A.getOffset() + A.getSize() >= B.getOffset() + B.getSize())) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(A.getOffset()) + " with a size of " + + Twine(A.getSize()) + ", overlaps cputype (" + Twine(B.getCPUType()) + + ") cpusubtype (" + Twine(B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(B.getOffset()) + " with a size of " + + Twine(B.getSize())); + return; + } + } + } + Err = Error::success(); +} + +Expected<std::unique_ptr<MachOObjectFile>> +MachOUniversalBinary::getObjectForArch(StringRef ArchName) const { + if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch) + return make_error<GenericBinaryError>("Unknown architecture " + "named: " + + ArchName, + object_error::arch_not_found); + + for (auto &Obj : objects()) + if (Obj.getArchFlagName() == ArchName) + return Obj.getAsObjectFile(); + return make_error<GenericBinaryError>("fat file does not " + "contain " + + ArchName, + object_error::arch_not_found); +} diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp new file mode 100644 index 000000000000..f2e7a218c13a --- /dev/null +++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp @@ -0,0 +1,280 @@ +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a symbol table built from in-memory IR. It provides +// access to GlobalValues and should only be used if such access is required +// (e.g. in the LTO implementation). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ModuleSymbolTable.h" +#include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <string> + +using namespace llvm; +using namespace object; + +void ModuleSymbolTable::addModule(Module *M) { + if (FirstMod) + assert(FirstMod->getTargetTriple() == M->getTargetTriple()); + else + FirstMod = M; + + for (GlobalValue &GV : M->global_values()) + SymTab.push_back(&GV); + + CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) AsmSymbol(Name, Flags)); + }); +} + +// Ensure ELF .symver aliases get the same binding as the defined symbol +// they alias with. +static void handleSymverAliases(const Module &M, RecordStreamer &Streamer) { + if (Streamer.symverAliases().empty()) + return; + + // The name in the assembler will be mangled, but the name in the IR + // might not, so we first compute a mapping from mangled name to GV. + Mangler Mang; + SmallString<64> MangledName; + StringMap<const GlobalValue *> MangledNameMap; + auto GetMangledName = [&](const GlobalValue &GV) { + if (!GV.hasName()) + return; + + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); + MangledNameMap[MangledName] = &GV; + }; + for (const Function &F : M) + GetMangledName(F); + for (const GlobalVariable &GV : M.globals()) + GetMangledName(GV); + for (const GlobalAlias &GA : M.aliases()) + GetMangledName(GA); + + // Walk all the recorded .symver aliases, and set up the binding + // for each alias. + for (auto &Symver : Streamer.symverAliases()) { + const MCSymbol *Aliasee = Symver.first; + MCSymbolAttr Attr = MCSA_Invalid; + + // First check if the aliasee binding was recorded in the asm. + RecordStreamer::State state = Streamer.getSymbolState(Aliasee); + switch (state) { + case RecordStreamer::Global: + case RecordStreamer::DefinedGlobal: + Attr = MCSA_Global; + break; + case RecordStreamer::UndefinedWeak: + case RecordStreamer::DefinedWeak: + Attr = MCSA_Weak; + break; + default: + break; + } + + // If we don't have a symbol attribute from assembly, then check if + // the aliasee was defined in the IR. + if (Attr == MCSA_Invalid) { + const auto *GV = M.getNamedValue(Aliasee->getName()); + if (!GV) { + auto MI = MangledNameMap.find(Aliasee->getName()); + if (MI != MangledNameMap.end()) + GV = MI->second; + else + continue; + } + if (GV->hasExternalLinkage()) + Attr = MCSA_Global; + else if (GV->hasLocalLinkage()) + Attr = MCSA_Local; + else if (GV->isWeakForLinker()) + Attr = MCSA_Weak; + } + if (Attr == MCSA_Invalid) + continue; + + // Set the detected binding on each alias with this aliasee. + for (auto &Alias : Symver.second) + Streamer.EmitSymbolAttribute(Alias, Attr); + } +} + +void ModuleSymbolTable::CollectAsmSymbols( + const Module &M, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + StringRef InlineAsm = M.getModuleInlineAsm(); + if (InlineAsm.empty()) + return; + + std::string Err; + const Triple TT(M.getTargetTriple()); + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + assert(T && T->hasMCAsmParser()); + + std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str())); + if (!MRI) + return; + + std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str())); + if (!MAI) + return; + + std::unique_ptr<MCSubtargetInfo> STI( + T->createMCSubtargetInfo(TT.str(), "", "")); + if (!STI) + return; + + std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); + if (!MCII) + return; + + MCObjectFileInfo MOFI; + MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); + MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx); + RecordStreamer Streamer(MCCtx); + T->createNullTargetStreamer(Streamer); + + std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI)); + + MCTargetOptions MCOptions; + std::unique_ptr<MCTargetAsmParser> TAP( + T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + return; + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return; + + handleSymverAliases(M, Streamer); + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + } +} + +void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { + if (S.is<AsmSymbol *>()) { + OS << S.get<AsmSymbol *>()->first; + return; + } + + auto *GV = S.get<GlobalValue *>(); + if (GV->hasDLLImportStorageClass()) + OS << "__imp_"; + + Mang.getNameWithPrefix(OS, GV, false); +} + +uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const { + if (S.is<AsmSymbol *>()) + return S.get<AsmSymbol *>()->second; + + auto *GV = S.get<GlobalValue *>(); + + uint32_t Res = BasicSymbolRef::SF_None; + if (GV->isDeclarationForLinker()) + Res |= BasicSymbolRef::SF_Undefined; + else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Hidden; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (GVar->isConstant()) + Res |= BasicSymbolRef::SF_Const; + } + if (dyn_cast_or_null<Function>(GV->getBaseObject())) + Res |= BasicSymbolRef::SF_Executable; + if (isa<GlobalAlias>(GV)) + Res |= BasicSymbolRef::SF_Indirect; + if (GV->hasPrivateLinkage()) + Res |= BasicSymbolRef::SF_FormatSpecific; + if (!GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Global; + if (GV->hasCommonLinkage()) + Res |= BasicSymbolRef::SF_Common; + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasExternalWeakLinkage()) + Res |= BasicSymbolRef::SF_Weak; + + if (GV->getName().startswith("llvm.")) + Res |= BasicSymbolRef::SF_FormatSpecific; + else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { + if (Var->getSection() == "llvm.metadata") + Res |= BasicSymbolRef::SF_FormatSpecific; + } + + return Res; +} diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp new file mode 100644 index 000000000000..1d2859cfbe9d --- /dev/null +++ b/contrib/llvm/lib/Object/Object.cpp @@ -0,0 +1,240 @@ +//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the C bindings to the file-format-independent object +// library. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Object.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Object/ObjectFile.h" + +using namespace llvm; +using namespace object; + +inline OwningBinary<ObjectFile> *unwrap(LLVMObjectFileRef OF) { + return reinterpret_cast<OwningBinary<ObjectFile> *>(OF); +} + +inline LLVMObjectFileRef wrap(const OwningBinary<ObjectFile> *OF) { + return reinterpret_cast<LLVMObjectFileRef>( + const_cast<OwningBinary<ObjectFile> *>(OF)); +} + +inline section_iterator *unwrap(LLVMSectionIteratorRef SI) { + return reinterpret_cast<section_iterator*>(SI); +} + +inline LLVMSectionIteratorRef +wrap(const section_iterator *SI) { + return reinterpret_cast<LLVMSectionIteratorRef> + (const_cast<section_iterator*>(SI)); +} + +inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) { + return reinterpret_cast<symbol_iterator*>(SI); +} + +inline LLVMSymbolIteratorRef +wrap(const symbol_iterator *SI) { + return reinterpret_cast<LLVMSymbolIteratorRef> + (const_cast<symbol_iterator*>(SI)); +} + +inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) { + return reinterpret_cast<relocation_iterator*>(SI); +} + +inline LLVMRelocationIteratorRef +wrap(const relocation_iterator *SI) { + return reinterpret_cast<LLVMRelocationIteratorRef> + (const_cast<relocation_iterator*>(SI)); +} + +// ObjectFile creation +LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { + std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf)); + Expected<std::unique_ptr<ObjectFile>> ObjOrErr( + ObjectFile::createObjectFile(Buf->getMemBufferRef())); + std::unique_ptr<ObjectFile> Obj; + if (!ObjOrErr) { + // TODO: Actually report errors helpfully. + consumeError(ObjOrErr.takeError()); + return nullptr; + } + + auto *Ret = new OwningBinary<ObjectFile>(std::move(ObjOrErr.get()), std::move(Buf)); + return wrap(Ret); +} + +void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { + delete unwrap(ObjectFile); +} + +// ObjectFile Section iterators +LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef OF) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + section_iterator SI = OB->getBinary()->section_begin(); + return wrap(new section_iterator(SI)); +} + +void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef OF, + LLVMSectionIteratorRef SI) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + return (*unwrap(SI) == OB->getBinary()->section_end()) ? 1 : 0; +} + +void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { + ++(*unwrap(SI)); +} + +void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, + LLVMSymbolIteratorRef Sym) { + Expected<section_iterator> SecOrErr = (*unwrap(Sym))->getSection(); + if (!SecOrErr) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(SecOrErr.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } + *unwrap(Sect) = *SecOrErr; +} + +// ObjectFile Symbol iterators +LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef OF) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + symbol_iterator SI = OB->getBinary()->symbol_begin(); + return wrap(new symbol_iterator(SI)); +} + +void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef OF, + LLVMSymbolIteratorRef SI) { + OwningBinary<ObjectFile> *OB = unwrap(OF); + return (*unwrap(SI) == OB->getBinary()->symbol_end()) ? 1 : 0; +} + +void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { + ++(*unwrap(SI)); +} + +// SectionRef accessors +const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { + StringRef ret; + if (std::error_code ec = (*unwrap(SI))->getName(ret)) + report_fatal_error(ec.message()); + return ret.data(); +} + +uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getSize(); +} + +const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { + StringRef ret; + if (std::error_code ec = (*unwrap(SI))->getContents(ret)) + report_fatal_error(ec.message()); + return ret.data(); +} + +uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getAddress(); +} + +LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, + LLVMSymbolIteratorRef Sym) { + return (*unwrap(SI))->containsSymbol(**unwrap(Sym)); +} + +// Section Relocation iterators +LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) { + relocation_iterator SI = (*unwrap(Section))->relocation_begin(); + return wrap(new relocation_iterator(SI)); +} + +void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section, + LLVMRelocationIteratorRef SI) { + return (*unwrap(SI) == (*unwrap(Section))->relocation_end()) ? 1 : 0; +} + +void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) { + ++(*unwrap(SI)); +} + + +// SymbolRef accessors +const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { + Expected<StringRef> Ret = (*unwrap(SI))->getName(); + if (!Ret) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(Ret.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } + return Ret->data(); +} + +uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { + Expected<uint64_t> Ret = (*unwrap(SI))->getAddress(); + if (!Ret) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(Ret.takeError(), OS, ""); + OS.flush(); + report_fatal_error(Buf); + } + return *Ret; +} + +uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { + return (*unwrap(SI))->getCommonSize(); +} + +// RelocationRef accessors +uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { + return (*unwrap(RI))->getOffset(); +} + +LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { + symbol_iterator ret = (*unwrap(RI))->getSymbol(); + return wrap(new symbol_iterator(ret)); +} + +uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { + return (*unwrap(RI))->getType(); +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { + SmallVector<char, 0> ret; + (*unwrap(RI))->getTypeName(ret); + char *str = static_cast<char*>(malloc(ret.size())); + std::copy(ret.begin(), ret.end(), str); + return str; +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) { + return strdup(""); +} + diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp new file mode 100644 index 000000000000..8377dd0d73fa --- /dev/null +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -0,0 +1,139 @@ +//===- ObjectFile.cpp - File format independent object file ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a file format independent ObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ObjectFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <system_error> + +using namespace llvm; +using namespace object; + +void ObjectFile::anchor() {} + +ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) + : SymbolicFile(Type, Source) {} + +bool SectionRef::containsSymbol(SymbolRef S) const { + Expected<section_iterator> SymSec = S.getSection(); + if (!SymSec) { + // TODO: Actually report errors helpfully. + consumeError(SymSec.takeError()); + return false; + } + return *this == **SymSec; +} + +uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const { + uint32_t Flags = getSymbolFlags(Ref); + if (Flags & SymbolRef::SF_Undefined) + return 0; + if (Flags & SymbolRef::SF_Common) + return getCommonSymbolSize(Ref); + return getSymbolValueImpl(Ref); +} + +std::error_code ObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + Expected<StringRef> Name = getSymbolName(Symb); + if (!Name) + return errorToErrorCode(Name.takeError()); + OS << *Name; + return std::error_code(); +} + +uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; } + +bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const { + StringRef SectName; + if (!getSectionName(Sec, SectName)) + return SectName == ".llvmbc"; + return false; +} + +section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { + return section_iterator(SectionRef(Sec, this)); +} + +Expected<std::unique_ptr<ObjectFile>> +ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { + StringRef Data = Object.getBuffer(); + if (Type == file_magic::unknown) + Type = identify_magic(Data); + + switch (Type) { + case file_magic::unknown: + case file_magic::bitcode: + case file_magic::coff_cl_gl_object: + case file_magic::archive: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: + return errorCodeToError(object_error::invalid_file_type); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + return errorOrToExpected(createELFObjectFile(Object)); + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + return createMachOObjectFile(Object); + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + return errorOrToExpected(createCOFFObjectFile(Object)); + case file_magic::wasm_object: + return createWasmObjectFile(Object); + } + llvm_unreachable("Unexpected Object File Type"); +} + +Expected<OwningBinary<ObjectFile>> +ObjectFile::createObjectFile(StringRef ObjectPath) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFile(ObjectPath); + if (std::error_code EC = FileOrErr.getError()) + return errorCodeToError(EC); + std::unique_ptr<MemoryBuffer> Buffer = std::move(FileOrErr.get()); + + Expected<std::unique_ptr<ObjectFile>> ObjOrErr = + createObjectFile(Buffer->getMemBufferRef()); + if (Error Err = ObjOrErr.takeError()) + return std::move(Err); + std::unique_ptr<ObjectFile> Obj = std::move(ObjOrErr.get()); + + return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer)); +} diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp new file mode 100644 index 000000000000..e94e9cfed394 --- /dev/null +++ b/contrib/llvm/lib/Object/RecordStreamer.cpp @@ -0,0 +1,118 @@ +//===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RecordStreamer.h" +#include "llvm/MC/MCSymbol.h" + +using namespace llvm; + +void RecordStreamer::markDefined(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Global: + S = DefinedGlobal; + break; + case NeverSeen: + case Defined: + case Used: + S = Defined; + break; + case DefinedWeak: + break; + case UndefinedWeak: + S = DefinedWeak; + } +} + +void RecordStreamer::markGlobal(const MCSymbol &Symbol, + MCSymbolAttr Attribute) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + S = (Attribute == MCSA_Weak) ? DefinedWeak : DefinedGlobal; + break; + + case NeverSeen: + case Global: + case Used: + S = (Attribute == MCSA_Weak) ? UndefinedWeak : Global; + break; + case UndefinedWeak: + case DefinedWeak: + break; + } +} + +void RecordStreamer::markUsed(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + case Global: + case DefinedWeak: + case UndefinedWeak: + break; + + case NeverSeen: + case Used: + S = Used; + break; + } +} + +void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } + +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + +RecordStreamer::const_iterator RecordStreamer::begin() { + return Symbols.begin(); +} + +RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } + +void RecordStreamer::EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI, bool) { + MCStreamer::EmitInstruction(Inst, STI); +} + +void RecordStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { + MCStreamer::EmitLabel(Symbol); + markDefined(*Symbol); +} + +void RecordStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + markDefined(*Symbol); + MCStreamer::EmitAssignment(Symbol, Value); +} + +bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + if (Attribute == MCSA_Global || Attribute == MCSA_Weak) + markGlobal(*Symbol, Attribute); + if (Attribute == MCSA_LazyReference) + markUsed(*Symbol); + return true; +} + +void RecordStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + markDefined(*Symbol); +} + +void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + markDefined(*Symbol); +} + +void RecordStreamer::emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) { + SymverAliasMap[Aliasee].push_back(Alias); +} diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h new file mode 100644 index 000000000000..4d119091a3d2 --- /dev/null +++ b/contrib/llvm/lib/Object/RecordStreamer.h @@ -0,0 +1,75 @@ +//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H +#define LLVM_LIB_OBJECT_RECORDSTREAMER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" +#include <vector> + +namespace llvm { + +class RecordStreamer : public MCStreamer { +public: + enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, + UndefinedWeak}; + +private: + StringMap<State> Symbols; + // Map of aliases created by .symver directives, saved so we can update + // their symbol binding after parsing complete. This maps from each + // aliasee to its list of aliases. + DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap; + + void markDefined(const MCSymbol &Symbol); + void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); + void markUsed(const MCSymbol &Symbol); + void visitUsedSymbol(const MCSymbol &Sym) override; + +public: + RecordStreamer(MCContext &Context); + + using const_iterator = StringMap<State>::const_iterator; + + const_iterator begin(); + const_iterator end(); + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override; + void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; + void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; + bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; + void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; + void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; + /// Record .symver aliases for later processing. + void emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) override; + /// Return the map of .symver aliasee to associated aliases. + DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() { + return SymverAliasMap; + } + + /// Get the state recorded for the given symbol. + State getSymbolState(const MCSymbol *Sym) { + auto SI = Symbols.find(Sym->getName()); + if (SI == Symbols.end()) + return NeverSeen; + return SI->second; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H diff --git a/contrib/llvm/lib/Object/SymbolSize.cpp b/contrib/llvm/lib/Object/SymbolSize.cpp new file mode 100644 index 000000000000..dd49d5f116b3 --- /dev/null +++ b/contrib/llvm/lib/Object/SymbolSize.cpp @@ -0,0 +1,94 @@ +//===- SymbolSize.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/SymbolSize.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" + +using namespace llvm; +using namespace object; + +// Orders increasingly by (SectionID, Address). +int llvm::object::compareAddress(const SymEntry *A, const SymEntry *B) { + if (A->SectionID != B->SectionID) + return A->SectionID < B->SectionID ? -1 : 1; + if (A->Address != B->Address) + return A->Address < B->Address ? -1 : 1; + return 0; +} + +static unsigned getSectionID(const ObjectFile &O, SectionRef Sec) { + if (auto *M = dyn_cast<MachOObjectFile>(&O)) + return M->getSectionID(Sec); + return cast<COFFObjectFile>(O).getSectionID(Sec); +} + +static unsigned getSymbolSectionID(const ObjectFile &O, SymbolRef Sym) { + if (auto *M = dyn_cast<MachOObjectFile>(&O)) + return M->getSymbolSectionID(Sym); + return cast<COFFObjectFile>(O).getSymbolSectionID(Sym); +} + +std::vector<std::pair<SymbolRef, uint64_t>> +llvm::object::computeSymbolSizes(const ObjectFile &O) { + std::vector<std::pair<SymbolRef, uint64_t>> Ret; + + if (const auto *E = dyn_cast<ELFObjectFileBase>(&O)) { + auto Syms = E->symbols(); + if (Syms.begin() == Syms.end()) + Syms = E->getDynamicSymbolIterators(); + for (ELFSymbolRef Sym : Syms) + Ret.push_back({Sym, Sym.getSize()}); + return Ret; + } + + // Collect sorted symbol addresses. Include dummy addresses for the end + // of each section. + std::vector<SymEntry> Addresses; + unsigned SymNum = 0; + for (symbol_iterator I = O.symbol_begin(), E = O.symbol_end(); I != E; ++I) { + SymbolRef Sym = *I; + uint64_t Value = Sym.getValue(); + Addresses.push_back({I, Value, SymNum, getSymbolSectionID(O, Sym)}); + ++SymNum; + } + for (SectionRef Sec : O.sections()) { + uint64_t Address = Sec.getAddress(); + uint64_t Size = Sec.getSize(); + Addresses.push_back( + {O.symbol_end(), Address + Size, 0, getSectionID(O, Sec)}); + } + array_pod_sort(Addresses.begin(), Addresses.end(), compareAddress); + + // Compute the size as the gap to the next symbol + for (unsigned I = 0, N = Addresses.size() - 1; I < N; ++I) { + auto &P = Addresses[I]; + if (P.I == O.symbol_end()) + continue; + + // If multiple symbol have the same address, give both the same size. + unsigned NextI = I + 1; + while (NextI < N && Addresses[NextI].Address == P.Address) + ++NextI; + + uint64_t Size = Addresses[NextI].Address - P.Address; + P.Address = Size; + } + + // Assign the sorted symbols in the original order. + Ret.resize(SymNum); + for (SymEntry &P : Addresses) { + if (P.I == O.symbol_end()) + continue; + Ret[P.Number] = {*P.I, P.Address}; + } + return Ret; +} diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp new file mode 100644 index 000000000000..1042d29d2350 --- /dev/null +++ b/contrib/llvm/lib/Object/SymbolicFile.cpp @@ -0,0 +1,94 @@ +//===- SymbolicFile.cpp - Interface that only provides symbols ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a file format independent SymbolicFile class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/SymbolicFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> + +using namespace llvm; +using namespace object; + +SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) + : Binary(Type, Source) {} + +SymbolicFile::~SymbolicFile() = default; + +Expected<std::unique_ptr<SymbolicFile>> +SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, + LLVMContext *Context) { + StringRef Data = Object.getBuffer(); + if (Type == file_magic::unknown) + Type = identify_magic(Data); + + switch (Type) { + case file_magic::bitcode: + if (Context) + return IRObjectFile::create(Object, *Context); + LLVM_FALLTHROUGH; + case file_magic::unknown: + case file_magic::archive: + case file_magic::coff_cl_gl_object: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: + return errorCodeToError(object_error::invalid_file_type); + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::wasm_object: + return ObjectFile::createObjectFile(Object, Type); + case file_magic::coff_import_library: + return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object)); + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { + Expected<std::unique_ptr<ObjectFile>> Obj = + ObjectFile::createObjectFile(Object, Type); + if (!Obj || !Context) + return std::move(Obj); + + ErrorOr<MemoryBufferRef> BCData = + IRObjectFile::findBitcodeInObject(*Obj->get()); + if (!BCData) + return std::move(Obj); + + return IRObjectFile::create( + MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), + *Context); + } + } + llvm_unreachable("Unexpected Binary File Type"); +} diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp new file mode 100644 index 000000000000..7f80bf0b83a0 --- /dev/null +++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp @@ -0,0 +1,991 @@ +//===- WasmObjectFile.cpp - Wasm object file implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <system_error> + +#define DEBUG_TYPE "wasm-object" + +using namespace llvm; +using namespace object; + +Expected<std::unique_ptr<WasmObjectFile>> +ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { + Error Err = Error::success(); + auto ObjectFile = llvm::make_unique<WasmObjectFile>(Buffer, Err); + if (Err) + return std::move(Err); + + return std::move(ObjectFile); +} + +#define VARINT7_MAX ((1<<7)-1) +#define VARINT7_MIN (-(1<<7)) +#define VARUINT7_MAX (1<<7) +#define VARUINT1_MAX (1) + +static uint8_t readUint8(const uint8_t *&Ptr) { return *Ptr++; } + +static uint32_t readUint32(const uint8_t *&Ptr) { + uint32_t Result = support::endian::read32le(Ptr); + Ptr += sizeof(Result); + return Result; +} + +static int32_t readFloat32(const uint8_t *&Ptr) { + int32_t Result = 0; + memcpy(&Result, Ptr, sizeof(Result)); + Ptr += sizeof(Result); + return Result; +} + +static int64_t readFloat64(const uint8_t *&Ptr) { + int64_t Result = 0; + memcpy(&Result, Ptr, sizeof(Result)); + Ptr += sizeof(Result); + return Result; +} + +static uint64_t readULEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + return Result; +} + +static StringRef readString(const uint8_t *&Ptr) { + uint32_t StringLen = readULEB128(Ptr); + StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen); + Ptr += StringLen; + return Return; +} + +static int64_t readLEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeSLEB128(Ptr, &Count); + Ptr += Count; + return Result; +} + +static uint8_t readVaruint1(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= VARUINT1_MAX && result >= 0); + return result; +} + +static int8_t readVarint7(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= VARINT7_MAX && result >= VARINT7_MIN); + return result; +} + +static uint8_t readVaruint7(const uint8_t *&Ptr) { + uint64_t result = readULEB128(Ptr); + assert(result <= VARUINT7_MAX); + return result; +} + +static int32_t readVarint32(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= INT32_MAX && result >= INT32_MIN); + return result; +} + +static uint32_t readVaruint32(const uint8_t *&Ptr) { + uint64_t result = readULEB128(Ptr); + assert(result <= UINT32_MAX); + return result; +} + +static int64_t readVarint64(const uint8_t *&Ptr) { + return readLEB128(Ptr); +} + +static uint8_t readOpcode(const uint8_t *&Ptr) { + return readUint8(Ptr); +} + +static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { + Expr.Opcode = readOpcode(Ptr); + + switch (Expr.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + Expr.Value.Int32 = readVarint32(Ptr); + break; + case wasm::WASM_OPCODE_I64_CONST: + Expr.Value.Int64 = readVarint64(Ptr); + break; + case wasm::WASM_OPCODE_F32_CONST: + Expr.Value.Float32 = readFloat32(Ptr); + break; + case wasm::WASM_OPCODE_F64_CONST: + Expr.Value.Float64 = readFloat64(Ptr); + break; + case wasm::WASM_OPCODE_GET_GLOBAL: + Expr.Value.Global = readULEB128(Ptr); + break; + default: + return make_error<GenericBinaryError>("Invalid opcode in init_expr", + object_error::parse_failed); + } + + uint8_t EndOpcode = readOpcode(Ptr); + if (EndOpcode != wasm::WASM_OPCODE_END) { + return make_error<GenericBinaryError>("Invalid init_expr", + object_error::parse_failed); + } + return Error::success(); +} + +static wasm::WasmLimits readLimits(const uint8_t *&Ptr) { + wasm::WasmLimits Result; + Result.Flags = readVaruint1(Ptr); + Result.Initial = readVaruint32(Ptr); + if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) + Result.Maximum = readVaruint32(Ptr); + return Result; +} + +static wasm::WasmTable readTable(const uint8_t *&Ptr) { + wasm::WasmTable Table; + Table.ElemType = readVarint7(Ptr); + Table.Limits = readLimits(Ptr); + return Table; +} + +static Error readSection(WasmSection &Section, const uint8_t *&Ptr, + const uint8_t *Start) { + // TODO(sbc): Avoid reading past EOF in the case of malformed files. + Section.Offset = Ptr - Start; + Section.Type = readVaruint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + if (Size == 0) + return make_error<StringError>("Zero length section", + object_error::parse_failed); + Section.Content = ArrayRef<uint8_t>(Ptr, Size); + Ptr += Size; + return Error::success(); +} + +WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) + : ObjectFile(Binary::ID_Wasm, Buffer) { + LinkingData.DataAlignment = 0; + LinkingData.DataSize = 0; + + ErrorAsOutParameter ErrAsOutParam(&Err); + Header.Magic = getData().substr(0, 4); + if (Header.Magic != StringRef("\0asm", 4)) { + Err = make_error<StringError>("Bad magic number", + object_error::parse_failed); + return; + } + const uint8_t *Ptr = getPtr(4); + Header.Version = readUint32(Ptr); + if (Header.Version != wasm::WasmVersion) { + Err = make_error<StringError>("Bad version number", + object_error::parse_failed); + return; + } + + const uint8_t *Eof = getPtr(getData().size()); + WasmSection Sec; + while (Ptr < Eof) { + if ((Err = readSection(Sec, Ptr, getPtr(0)))) + return; + if ((Err = parseSection(Sec))) + return; + + Sections.push_back(Sec); + } +} + +Error WasmObjectFile::parseSection(WasmSection &Sec) { + const uint8_t* Start = Sec.Content.data(); + const uint8_t* End = Start + Sec.Content.size(); + switch (Sec.Type) { + case wasm::WASM_SEC_CUSTOM: + return parseCustomSection(Sec, Start, End); + case wasm::WASM_SEC_TYPE: + return parseTypeSection(Start, End); + case wasm::WASM_SEC_IMPORT: + return parseImportSection(Start, End); + case wasm::WASM_SEC_FUNCTION: + return parseFunctionSection(Start, End); + case wasm::WASM_SEC_TABLE: + return parseTableSection(Start, End); + case wasm::WASM_SEC_MEMORY: + return parseMemorySection(Start, End); + case wasm::WASM_SEC_GLOBAL: + return parseGlobalSection(Start, End); + case wasm::WASM_SEC_EXPORT: + return parseExportSection(Start, End); + case wasm::WASM_SEC_START: + return parseStartSection(Start, End); + case wasm::WASM_SEC_ELEM: + return parseElemSection(Start, End); + case wasm::WASM_SEC_CODE: + return parseCodeSection(Start, End); + case wasm::WASM_SEC_DATA: + return parseDataSection(Start, End); + default: + return make_error<GenericBinaryError>("Bad section type", + object_error::parse_failed); + } +} + +Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; + switch (Type) { + case wasm::WASM_NAMES_FUNCTION: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + uint32_t Index = readVaruint32(Ptr); + StringRef Name = readString(Ptr); + if (!Name.empty()) + Symbols.emplace_back(Name, + WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME, + Sections.size(), Index); + } + break; + } + // Ignore local names for now + case wasm::WASM_NAMES_LOCAL: + default: + Ptr += Size; + break; + } + if (Ptr != SubSectionEnd) + return make_error<GenericBinaryError>("Name sub-section ended prematurely", + object_error::parse_failed); + } + + if (Ptr != End) + return make_error<GenericBinaryError>("Name section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, + const uint8_t *End) { + HasLinkingSection = true; + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; + switch (Type) { + case wasm::WASM_SYMBOL_INFO: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + StringRef Symbol = readString(Ptr); + DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n"); + uint32_t Flags = readVaruint32(Ptr); + auto iter = SymbolMap.find(Symbol); + if (iter == SymbolMap.end()) { + return make_error<GenericBinaryError>( + "Invalid symbol name in linking section: " + Symbol, + object_error::parse_failed); + } + uint32_t SymIndex = iter->second; + assert(SymIndex < Symbols.size()); + Symbols[SymIndex].Flags = Flags; + DEBUG(dbgs() << "Set symbol flags index:" + << SymIndex << " name:" + << Symbols[SymIndex].Name << " exptected:" + << Symbol << " flags: " << Flags << "\n"); + } + break; + } + case wasm::WASM_DATA_SIZE: + LinkingData.DataSize = readVaruint32(Ptr); + break; + case wasm::WASM_DATA_ALIGNMENT: + LinkingData.DataAlignment = readVaruint32(Ptr); + break; + case wasm::WASM_STACK_POINTER: + default: + Ptr += Size; + break; + } + if (Ptr != SubSectionEnd) + return make_error<GenericBinaryError>( + "Linking sub-section ended prematurely", object_error::parse_failed); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Linking section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { + for (WasmSection& Section : Sections) { + if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) + return &Section; + } + return nullptr; +} + +WasmSection* WasmObjectFile::findSectionByType(uint32_t Type) { + assert(Type != wasm::WASM_SEC_CUSTOM); + for (WasmSection& Section : Sections) { + if (Section.Type == Type) + return &Section; + } + return nullptr; +} + +Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, + const uint8_t *End) { + uint8_t SectionCode = readVarint7(Ptr); + WasmSection* Section = nullptr; + if (SectionCode == wasm::WASM_SEC_CUSTOM) { + StringRef Name = readString(Ptr); + Section = findCustomSectionByName(Name); + } else { + Section = findSectionByType(SectionCode); + } + if (!Section) + return make_error<GenericBinaryError>("Invalid section code", + object_error::parse_failed); + uint32_t RelocCount = readVaruint32(Ptr); + while (RelocCount--) { + wasm::WasmRelocation Reloc; + memset(&Reloc, 0, sizeof(Reloc)); + Reloc.Type = readVaruint32(Ptr); + Reloc.Offset = readVaruint32(Ptr); + Reloc.Index = readVaruint32(Ptr); + switch (Reloc.Type) { + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + break; + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + Reloc.Addend = readVarint32(Ptr); + break; + default: + return make_error<GenericBinaryError>("Bad relocation type: " + + Twine(Reloc.Type), + object_error::parse_failed); + } + Section->Relocations.push_back(Reloc); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Reloc section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCustomSection(WasmSection &Sec, + const uint8_t *Ptr, const uint8_t *End) { + Sec.Name = readString(Ptr); + if (Sec.Name == "name") { + if (Error Err = parseNameSection(Ptr, End)) + return Err; + } else if (Sec.Name == "linking") { + if (Error Err = parseLinkingSection(Ptr, End)) + return Err; + } else if (Sec.Name.startswith("reloc.")) { + if (Error Err = parseRelocSection(Sec.Name, Ptr, End)) + return Err; + } + return Error::success(); +} + +Error WasmObjectFile::parseTypeSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Signatures.reserve(Count); + while (Count--) { + wasm::WasmSignature Sig; + Sig.ReturnType = wasm::WASM_TYPE_NORESULT; + int8_t Form = readVarint7(Ptr); + if (Form != wasm::WASM_TYPE_FUNC) { + return make_error<GenericBinaryError>("Invalid signature type", + object_error::parse_failed); + } + uint32_t ParamCount = readVaruint32(Ptr); + Sig.ParamTypes.reserve(ParamCount); + while (ParamCount--) { + uint32_t ParamType = readVarint7(Ptr); + Sig.ParamTypes.push_back(ParamType); + } + uint32_t ReturnCount = readVaruint32(Ptr); + if (ReturnCount) { + if (ReturnCount != 1) { + return make_error<GenericBinaryError>( + "Multiple return types not supported", object_error::parse_failed); + } + Sig.ReturnType = readVarint7(Ptr); + } + Signatures.push_back(Sig); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Type section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Imports.reserve(Count); + for (uint32_t i = 0; i < Count; i++) { + wasm::WasmImport Im; + Im.Module = readString(Ptr); + Im.Field = readString(Ptr); + Im.Kind = readUint8(Ptr); + switch (Im.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + Im.SigIndex = readVaruint32(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT, + Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + break; + case wasm::WASM_EXTERNAL_GLOBAL: + Im.Global.Type = readVarint7(Ptr); + Im.Global.Mutable = readVaruint1(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, + Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + break; + case wasm::WASM_EXTERNAL_MEMORY: + Im.Memory = readLimits(Ptr); + break; + case wasm::WASM_EXTERNAL_TABLE: + Im.Table = readTable(Ptr); + if (Im.Table.ElemType != wasm::WASM_TYPE_ANYFUNC) { + return make_error<GenericBinaryError>("Invalid table element type", + object_error::parse_failed); + } + break; + default: + return make_error<GenericBinaryError>( + "Unexpected import kind", object_error::parse_failed); + } + Imports.push_back(Im); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Import section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseFunctionSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + FunctionTypes.reserve(Count); + while (Count--) { + FunctionTypes.push_back(readVaruint32(Ptr)); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Function section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseTableSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Tables.reserve(Count); + while (Count--) { + Tables.push_back(readTable(Ptr)); + if (Tables.back().ElemType != wasm::WASM_TYPE_ANYFUNC) { + return make_error<GenericBinaryError>("Invalid table element type", + object_error::parse_failed); + } + } + if (Ptr != End) + return make_error<GenericBinaryError>("Table section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseMemorySection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Memories.reserve(Count); + while (Count--) { + Memories.push_back(readLimits(Ptr)); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Memory section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Globals.reserve(Count); + while (Count--) { + wasm::WasmGlobal Global; + Global.Type = readVarint7(Ptr); + Global.Mutable = readVaruint1(Ptr); + if (Error Err = readInitExpr(Global.InitExpr, Ptr)) + return Err; + Globals.push_back(Global); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Global section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Exports.reserve(Count); + for (uint32_t i = 0; i < Count; i++) { + wasm::WasmExport Ex; + Ex.Name = readString(Ptr); + Ex.Kind = readUint8(Ptr); + Ex.Index = readVaruint32(Ptr); + WasmSymbol::SymbolType ExportType; + bool MakeSymbol = false; + switch (Ex.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + ExportType = WasmSymbol::SymbolType::FUNCTION_EXPORT; + MakeSymbol = true; + break; + case wasm::WASM_EXTERNAL_GLOBAL: + ExportType = WasmSymbol::SymbolType::GLOBAL_EXPORT; + MakeSymbol = true; + break; + case wasm::WASM_EXTERNAL_MEMORY: + case wasm::WASM_EXTERNAL_TABLE: + break; + default: + return make_error<GenericBinaryError>( + "Unexpected export kind", object_error::parse_failed); + } + if (MakeSymbol) { + auto Pair = SymbolMap.try_emplace(Ex.Name, Symbols.size()); + if (Pair.second) { + Symbols.emplace_back(Ex.Name, ExportType, + Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + } else { + uint32_t SymIndex = Pair.first->second; + Symbols[SymIndex] = WasmSymbol(Ex.Name, ExportType, Sections.size(), i); + DEBUG(dbgs() << "Replacing existing symbol: " << Symbols[SymIndex] + << " sym index:" << SymIndex << "\n"); + } + } + Exports.push_back(Ex); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Export section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) { + StartFunction = readVaruint32(Ptr); + if (StartFunction >= FunctionTypes.size()) + return make_error<GenericBinaryError>("Invalid start function", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t FunctionCount = readVaruint32(Ptr); + if (FunctionCount != FunctionTypes.size()) { + return make_error<GenericBinaryError>("Invalid function count", + object_error::parse_failed); + } + + CodeSection = ArrayRef<uint8_t>(Ptr, End - Ptr); + + while (FunctionCount--) { + wasm::WasmFunction Function; + uint32_t FunctionSize = readVaruint32(Ptr); + const uint8_t *FunctionEnd = Ptr + FunctionSize; + + uint32_t NumLocalDecls = readVaruint32(Ptr); + Function.Locals.reserve(NumLocalDecls); + while (NumLocalDecls--) { + wasm::WasmLocalDecl Decl; + Decl.Count = readVaruint32(Ptr); + Decl.Type = readVarint7(Ptr); + Function.Locals.push_back(Decl); + } + + uint32_t BodySize = FunctionEnd - Ptr; + Function.Body = ArrayRef<uint8_t>(Ptr, BodySize); + Ptr += BodySize; + assert(Ptr == FunctionEnd); + Functions.push_back(Function); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Code section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + ElemSegments.reserve(Count); + while (Count--) { + wasm::WasmElemSegment Segment; + Segment.TableIndex = readVaruint32(Ptr); + if (Segment.TableIndex != 0) { + return make_error<GenericBinaryError>("Invalid TableIndex", + object_error::parse_failed); + } + if (Error Err = readInitExpr(Segment.Offset, Ptr)) + return Err; + uint32_t NumElems = readVaruint32(Ptr); + while (NumElems--) { + Segment.Functions.push_back(readVaruint32(Ptr)); + } + ElemSegments.push_back(Segment); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Elem section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { + const uint8_t *Start = Ptr; + uint32_t Count = readVaruint32(Ptr); + DataSegments.reserve(Count); + while (Count--) { + WasmSegment Segment; + Segment.Data.MemoryIndex = readVaruint32(Ptr); + if (Error Err = readInitExpr(Segment.Data.Offset, Ptr)) + return Err; + uint32_t Size = readVaruint32(Ptr); + Segment.Data.Content = ArrayRef<uint8_t>(Ptr, Size); + Segment.SectionOffset = Ptr - Start; + Ptr += Size; + DataSegments.push_back(Segment); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Data section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +const uint8_t *WasmObjectFile::getPtr(size_t Offset) const { + return reinterpret_cast<const uint8_t *>(getData().substr(Offset, 1).data()); +} + +const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { + return Header; +} + +void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; } + +uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { + uint32_t Result = SymbolRef::SF_None; + const WasmSymbol &Sym = getWasmSymbol(Symb); + + DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); + if (Sym.Flags & wasm::WASM_SYMBOL_FLAG_WEAK) + Result |= SymbolRef::SF_Weak; + + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable; + break; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + Result |= SymbolRef::SF_Global | SymbolRef::SF_Executable; + break; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + Result |= SymbolRef::SF_Executable; + Result |= SymbolRef::SF_FormatSpecific; + break; + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + Result |= SymbolRef::SF_Undefined; + break; + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + Result |= SymbolRef::SF_Global; + break; + } + + return Result; +} + +basic_symbol_iterator WasmObjectFile::symbol_begin() const { + DataRefImpl Ref; + Ref.d.a = 0; + return BasicSymbolRef(Ref, this); +} + +basic_symbol_iterator WasmObjectFile::symbol_end() const { + DataRefImpl Ref; + Ref.d.a = Symbols.size(); + return BasicSymbolRef(Ref, this); +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(const DataRefImpl &Symb) const { + return Symbols[Symb.d.a]; +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { + return getWasmSymbol(Symb.getRawDataRefImpl()); +} + +Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { + return getWasmSymbol(Symb).Name; +} + +Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { + return getSymbolValue(Symb); +} + +uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + const WasmSymbol& Sym = getWasmSymbol(Symb); + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + return 0; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return Exports[Sym.ElementIndex].Index; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return Sym.ElementIndex; + } + llvm_unreachable("invalid symbol type"); +} + +uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +Expected<SymbolRef::Type> +WasmObjectFile::getSymbolType(DataRefImpl Symb) const { + const WasmSymbol &Sym = getWasmSymbol(Symb); + + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return SymbolRef::ST_Function; + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return SymbolRef::ST_Data; + } + + llvm_unreachable("Unknown WasmSymbol::SymbolType"); + return SymbolRef::ST_Other; +} + +Expected<section_iterator> +WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { + DataRefImpl Ref; + Ref.d.a = getWasmSymbol(Symb).Section; + return section_iterator(SectionRef(Ref, this)); +} + +void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } + +std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Res) const { + const WasmSection &S = Sections[Sec.d.a]; +#define ECase(X) \ + case wasm::WASM_SEC_##X: \ + Res = #X; \ + break + switch (S.Type) { + ECase(TYPE); + ECase(IMPORT); + ECase(FUNCTION); + ECase(TABLE); + ECase(MEMORY); + ECase(GLOBAL); + ECase(EXPORT); + ECase(START); + ECase(ELEM); + ECase(CODE); + ECase(DATA); + case wasm::WASM_SEC_CUSTOM: + Res = S.Name; + break; + default: + return object_error::invalid_section_index; + } +#undef ECase + return std::error_code(); +} + +uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } + +uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + +uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { + const WasmSection &S = Sections[Sec.d.a]; + return S.Content.size(); +} + +std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Res) const { + const WasmSection &S = Sections[Sec.d.a]; + // This will never fail since wasm sections can never be empty (user-sections + // must have a name and non-user sections each have a defined structure). + Res = StringRef(reinterpret_cast<const char *>(S.Content.data()), + S.Content.size()); + return std::error_code(); +} + +uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const { + return 1; +} + +bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool WasmObjectFile::isSectionText(DataRefImpl Sec) const { + return getWasmSection(Sec).Type == wasm::WASM_SEC_CODE; +} + +bool WasmObjectFile::isSectionData(DataRefImpl Sec) const { + return getWasmSection(Sec).Type == wasm::WASM_SEC_DATA; +} + +bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; } + +relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Ref) const { + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = 0; + return relocation_iterator(RelocationRef(RelocRef, this)); +} + +relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const { + const WasmSection &Sec = getWasmSection(Ref); + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = Sec.Relocations.size(); + return relocation_iterator(RelocationRef(RelocRef, this)); +} + +void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + Rel.d.b++; +} + +uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Offset; +} + +symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + SymbolRef Ref; + return symbol_iterator(Ref); +} + +uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Type; +} + +void WasmObjectFile::getRelocationTypeName( + DataRefImpl Ref, SmallVectorImpl<char> &Result) const { + const wasm::WasmRelocation& Rel = getWasmRelocation(Ref); + StringRef Res = "Unknown"; + +#define WASM_RELOC(name, value) \ + case wasm::name: \ + Res = #name; \ + break; + + switch (Rel.Type) { +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" + } + +#undef WASM_RELOC + + Result.append(Res.begin(), Res.end()); +} + +section_iterator WasmObjectFile::section_begin() const { + DataRefImpl Ref; + Ref.d.a = 0; + return section_iterator(SectionRef(Ref, this)); +} + +section_iterator WasmObjectFile::section_end() const { + DataRefImpl Ref; + Ref.d.a = Sections.size(); + return section_iterator(SectionRef(Ref, this)); +} + +uint8_t WasmObjectFile::getBytesInAddress() const { return 4; } + +StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; } + +unsigned WasmObjectFile::getArch() const { return Triple::wasm32; } + +SubtargetFeatures WasmObjectFile::getFeatures() const { + return SubtargetFeatures(); +} + +bool WasmObjectFile::isRelocatableObject() const { + return HasLinkingSection; +} + +const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + return Sections[Ref.d.a]; +} + +const WasmSection & +WasmObjectFile::getWasmSection(const SectionRef &Section) const { + return getWasmSection(Section.getRawDataRefImpl()); +} + +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const { + return getWasmRelocation(Ref.getRawDataRefImpl()); +} + +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + const WasmSection& Sec = Sections[Ref.d.a]; + assert(Ref.d.b < Sec.Relocations.size()); + return Sec.Relocations[Ref.d.b]; +} diff --git a/contrib/llvm/lib/Object/WindowsResource.cpp b/contrib/llvm/lib/Object/WindowsResource.cpp new file mode 100644 index 000000000000..246eee5ddb31 --- /dev/null +++ b/contrib/llvm/lib/Object/WindowsResource.cpp @@ -0,0 +1,720 @@ +//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the .res file class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/WindowsResource.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MathExtras.h" +#include <ctime> +#include <queue> +#include <sstream> +#include <system_error> + +using namespace llvm; +using namespace object; + +namespace llvm { +namespace object { + +#define RETURN_IF_ERROR(X) \ + if (auto EC = X) \ + return EC; + +const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); + +// COFF files seem to be inconsistent with alignment between sections, just use +// 8-byte because it makes everyone happy. +const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); + +uint32_t WindowsResourceParser::TreeNode::StringCount = 0; +uint32_t WindowsResourceParser::TreeNode::DataCount = 0; + +WindowsResource::WindowsResource(MemoryBufferRef Source) + : Binary(Binary::ID_WinRes, Source) { + size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; + BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), + support::little); +} + +Expected<std::unique_ptr<WindowsResource>> +WindowsResource::createWindowsResource(MemoryBufferRef Source) { + if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE) + return make_error<GenericBinaryError>( + "File too small to be a resource file", + object_error::invalid_file_type); + std::unique_ptr<WindowsResource> Ret(new WindowsResource(Source)); + return std::move(Ret); +} + +Expected<ResourceEntryRef> WindowsResource::getHeadEntry() { + Error Err = Error::success(); + auto Ref = ResourceEntryRef(BinaryStreamRef(BBS), this, Err); + if (Err) + return std::move(Err); + return Ref; +} + +ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref, + const WindowsResource *Owner, Error &Err) + : Reader(Ref), OwningRes(Owner) { + if (loadNext()) + Err = make_error<GenericBinaryError>("Could not read first entry.\n", + object_error::unexpected_eof); +} + +Error ResourceEntryRef::moveNext(bool &End) { + // Reached end of all the entries. + if (Reader.bytesRemaining() == 0) { + End = true; + return Error::success(); + } + RETURN_IF_ERROR(loadNext()); + + return Error::success(); +} + +static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID, + ArrayRef<UTF16> &Str, bool &IsString) { + uint16_t IDFlag; + RETURN_IF_ERROR(Reader.readInteger(IDFlag)); + IsString = IDFlag != 0xffff; + + if (IsString) { + Reader.setOffset( + Reader.getOffset() - + sizeof(uint16_t)); // Re-read the bytes which we used to check the flag. + RETURN_IF_ERROR(Reader.readWideString(Str)); + } else + RETURN_IF_ERROR(Reader.readInteger(ID)); + + return Error::success(); +} + +Error ResourceEntryRef::loadNext() { + const WinResHeaderPrefix *Prefix; + RETURN_IF_ERROR(Reader.readObject(Prefix)); + + if (Prefix->HeaderSize < MIN_HEADER_SIZE) + return make_error<GenericBinaryError>("Header size is too small.", + object_error::parse_failed); + + RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType)); + + RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_HEADER_ALIGNMENT)); + + RETURN_IF_ERROR(Reader.readObject(Suffix)); + + RETURN_IF_ERROR(Reader.readArray(Data, Prefix->DataSize)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_DATA_ALIGNMENT)); + + return Error::success(); +} + +WindowsResourceParser::WindowsResourceParser() : Root(false) {} + +Error WindowsResourceParser::parse(WindowsResource *WR) { + auto EntryOrErr = WR->getHeadEntry(); + if (!EntryOrErr) + return EntryOrErr.takeError(); + + ResourceEntryRef Entry = EntryOrErr.get(); + bool End = false; + while (!End) { + Data.push_back(Entry.getData()); + + bool IsNewTypeString = false; + bool IsNewNameString = false; + + Root.addEntry(Entry, IsNewTypeString, IsNewNameString); + + if (IsNewTypeString) + StringTable.push_back(Entry.getTypeString()); + + if (IsNewNameString) + StringTable.push_back(Entry.getNameString()); + + RETURN_IF_ERROR(Entry.moveNext(End)); + } + + return Error::success(); +} + +void WindowsResourceParser::printTree(raw_ostream &OS) const { + ScopedPrinter Writer(OS); + Root.print(Writer, "Resource Tree"); +} + +void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, + bool &IsNewTypeString, + bool &IsNewNameString) { + TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); + TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); + NameNode.addLanguageNode(Entry); +} + +WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { + if (IsStringNode) + StringIndex = StringCount++; +} + +WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) + : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), + Characteristics(Characteristics) { + DataIndex = DataCount++; +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createStringNode() { + return std::unique_ptr<TreeNode>(new TreeNode(true)); +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createIDNode() { + return std::unique_ptr<TreeNode>(new TreeNode(false)); +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) { + return std::unique_ptr<TreeNode>( + new TreeNode(MajorVersion, MinorVersion, Characteristics)); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, + bool &IsNewTypeString) { + if (Entry.checkTypeString()) + return addChild(Entry.getTypeString(), IsNewTypeString); + else + return addChild(Entry.getTypeID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, + bool &IsNewNameString) { + if (Entry.checkNameString()) + return addChild(Entry.getNameString(), IsNewNameString); + else + return addChild(Entry.getNameID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addLanguageNode( + const ResourceEntryRef &Entry) { + return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics()); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild( + uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics) { + auto Child = IDChildren.find(ID); + if (Child == IDChildren.end()) { + auto NewChild = + IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics) + : createIDNode(); + WindowsResourceParser::TreeNode &Node = *NewChild; + IDChildren.emplace(ID, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef, + bool &IsNewString) { + std::string NameString; + ArrayRef<UTF16> CorrectedName; + std::vector<UTF16> EndianCorrectedName; + if (sys::IsBigEndianHost) { + EndianCorrectedName.resize(NameRef.size() + 1); + std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1); + EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED; + CorrectedName = makeArrayRef(EndianCorrectedName); + } else + CorrectedName = NameRef; + convertUTF16ToUTF8String(CorrectedName, NameString); + + auto Child = StringChildren.find(NameString); + if (Child == StringChildren.end()) { + auto NewChild = createStringNode(); + IsNewString = true; + WindowsResourceParser::TreeNode &Node = *NewChild; + StringChildren.emplace(NameString, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, + StringRef Name) const { + ListScope NodeScope(Writer, Name); + for (auto const &Child : StringChildren) { + Child.second->print(Writer, Child.first); + } + for (auto const &Child : IDChildren) { + Child.second->print(Writer, to_string(Child.first)); + } +} + +// This function returns the size of the entire resource tree, including +// directory tables, directory entries, and data entries. It does not include +// the directory strings or the relocations of the .rsrc section. +uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { + uint32_t Size = (IDChildren.size() + StringChildren.size()) * + sizeof(coff_resource_dir_entry); + + // Reached a node pointing to a data entry. + if (IsDataNode) { + Size += sizeof(coff_resource_data_entry); + return Size; + } + + // If the node does not point to data, it must have a directory table pointing + // to other nodes. + Size += sizeof(coff_resource_dir_table); + + for (auto const &Child : StringChildren) { + Size += Child.second->getTreeSize(); + } + for (auto const &Child : IDChildren) { + Size += Child.second->getTreeSize(); + } + return Size; +} + +class WindowsResourceCOFFWriter { +public: + WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, Error &E); + std::unique_ptr<MemoryBuffer> write(); + +private: + void performFileLayout(); + void performSectionOneLayout(); + void performSectionTwoLayout(); + void writeCOFFHeader(); + void writeFirstSectionHeader(); + void writeSecondSectionHeader(); + void writeFirstSection(); + void writeSecondSection(); + void writeSymbolTable(); + void writeStringTable(); + void writeDirectoryTree(); + void writeDirectoryStringTable(); + void writeFirstSectionRelocations(); + std::unique_ptr<MemoryBuffer> OutputBuffer; + char *BufferStart; + uint64_t CurrentOffset = 0; + COFF::MachineTypes MachineType; + const WindowsResourceParser::TreeNode &Resources; + const ArrayRef<std::vector<uint8_t>> Data; + uint64_t FileSize; + uint32_t SymbolTableOffset; + uint32_t SectionOneSize; + uint32_t SectionOneOffset; + uint32_t SectionOneRelocations; + uint32_t SectionTwoSize; + uint32_t SectionTwoOffset; + const ArrayRef<std::vector<UTF16>> StringTable; + std::vector<uint32_t> StringTableOffsets; + std::vector<uint32_t> DataOffsets; + std::vector<uint32_t> RelocationAddresses; +}; + +WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( + COFF::MachineTypes MachineType, const WindowsResourceParser &Parser, + Error &E) + : MachineType(MachineType), Resources(Parser.getTree()), + Data(Parser.getData()), StringTable(Parser.getStringTable()) { + performFileLayout(); + + OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); +} + +void WindowsResourceCOFFWriter::performFileLayout() { + // Add size of COFF header. + FileSize = COFF::Header16Size; + + // one .rsrc section header for directory tree, another for resource data. + FileSize += 2 * COFF::SectionSize; + + performSectionOneLayout(); + performSectionTwoLayout(); + + // We have reached the address of the symbol table. + SymbolTableOffset = FileSize; + + FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource. + FileSize += 4; // four null bytes for the string table. +} + +void WindowsResourceCOFFWriter::performSectionOneLayout() { + SectionOneOffset = FileSize; + + SectionOneSize = Resources.getTreeSize(); + uint32_t CurrentStringOffset = SectionOneSize; + uint32_t TotalStringTableSize = 0; + for (auto const &String : StringTable) { + StringTableOffsets.push_back(CurrentStringOffset); + uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t); + CurrentStringOffset += StringSize; + TotalStringTableSize += StringSize; + } + SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t)); + + // account for the relocations of section one. + SectionOneRelocations = FileSize + SectionOneSize; + FileSize += SectionOneSize; + FileSize += + Data.size() * COFF::RelocationSize; // one relocation for each resource. + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::performSectionTwoLayout() { + // add size of .rsrc$2 section, which contains all resource data on 8-byte + // alignment. + SectionTwoOffset = FileSize; + SectionTwoSize = 0; + for (auto const &Entry : Data) { + DataOffsets.push_back(SectionTwoSize); + SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t)); + } + FileSize += SectionTwoSize; + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +static std::time_t getTime() { + std::time_t Now = time(nullptr); + if (Now < 0 || !isUInt<32>(Now)) + return UINT32_MAX; + return Now; +} + +std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() { + BufferStart = const_cast<char *>(OutputBuffer->getBufferStart()); + + writeCOFFHeader(); + writeFirstSectionHeader(); + writeSecondSectionHeader(); + writeFirstSection(); + writeSecondSection(); + writeSymbolTable(); + writeStringTable(); + + return std::move(OutputBuffer); +} + +void WindowsResourceCOFFWriter::writeCOFFHeader() { + // Write the COFF header. + auto *Header = reinterpret_cast<coff_file_header *>(BufferStart); + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Header->Machine = COFF::IMAGE_FILE_MACHINE_ARMNT; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Header->Machine = COFF::IMAGE_FILE_MACHINE_AMD64; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Header->Machine = COFF::IMAGE_FILE_MACHINE_I386; + break; + default: + Header->Machine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; + } + Header->NumberOfSections = 2; + Header->TimeDateStamp = getTime(); + Header->PointerToSymbolTable = SymbolTableOffset; + // One symbol for every resource plus 2 for each section and @feat.00 + Header->NumberOfSymbols = Data.size() + 5; + Header->SizeOfOptionalHeader = 0; + Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE; +} + +void WindowsResourceCOFFWriter::writeFirstSectionHeader() { + // Write the first section header. + CurrentOffset += sizeof(coff_file_header); + auto *SectionOneHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); + SectionOneHeader->VirtualSize = 0; + SectionOneHeader->VirtualAddress = 0; + SectionOneHeader->SizeOfRawData = SectionOneSize; + SectionOneHeader->PointerToRawData = SectionOneOffset; + SectionOneHeader->PointerToRelocations = SectionOneRelocations; + SectionOneHeader->PointerToLinenumbers = 0; + SectionOneHeader->NumberOfRelocations = Data.size(); + SectionOneHeader->NumberOfLinenumbers = 0; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeSecondSectionHeader() { + // Write the second section header. + CurrentOffset += sizeof(coff_section); + auto *SectionTwoHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); + SectionTwoHeader->VirtualSize = 0; + SectionTwoHeader->VirtualAddress = 0; + SectionTwoHeader->SizeOfRawData = SectionTwoSize; + SectionTwoHeader->PointerToRawData = SectionTwoOffset; + SectionTwoHeader->PointerToRelocations = 0; + SectionTwoHeader->PointerToLinenumbers = 0; + SectionTwoHeader->NumberOfRelocations = 0; + SectionTwoHeader->NumberOfLinenumbers = 0; + SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeFirstSection() { + // Write section one. + CurrentOffset += sizeof(coff_section); + + writeDirectoryTree(); + writeDirectoryStringTable(); + writeFirstSectionRelocations(); + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSecondSection() { + // Now write the .rsrc$02 section. + for (auto const &RawDataEntry : Data) { + std::copy(RawDataEntry.begin(), RawDataEntry.end(), + BufferStart + CurrentOffset); + CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + } + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSymbolTable() { + // Now write the symbol table. + // First, the feat symbol. + auto *Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); + Symbol->Value = 0x11; + Symbol->SectionNumber = 0xffff; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + + // Now write the .rsrc1 symbol + aux. + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 1; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + auto *Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); + Aux->Length = SectionOneSize; + Aux->NumberOfRelocations = Data.size(); + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write the .rsrc2 symbol + aux. + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); + Aux->Length = SectionTwoSize; + Aux->NumberOfRelocations = 0; + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write a symbol for each relocation. + for (unsigned i = 0; i < Data.size(); i++) { + char RelocationName[9]; + sprintf(RelocationName, "$R%06X", DataOffsets[i]); + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize); + Symbol->Value = DataOffsets[i]; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + } +} + +void WindowsResourceCOFFWriter::writeStringTable() { + // Just 4 null bytes for the string table. + auto COFFStringTable = reinterpret_cast<void *>(BufferStart + CurrentOffset); + memset(COFFStringTable, 0, 4); +} + +void WindowsResourceCOFFWriter::writeDirectoryTree() { + // Traverse parsed resource tree breadth-first and write the corresponding + // COFF objects. + std::queue<const WindowsResourceParser::TreeNode *> Queue; + Queue.push(&Resources); + uint32_t NextLevelOffset = + sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + std::vector<const WindowsResourceParser::TreeNode *> DataEntriesTreeOrder; + uint32_t CurrentRelativeOffset = 0; + + while (!Queue.empty()) { + auto CurrentNode = Queue.front(); + Queue.pop(); + auto *Table = reinterpret_cast<coff_resource_dir_table *>(BufferStart + + CurrentOffset); + Table->Characteristics = CurrentNode->getCharacteristics(); + Table->TimeDateStamp = 0; + Table->MajorVersion = CurrentNode->getMajorVersion(); + Table->MinorVersion = CurrentNode->getMinorVersion(); + auto &IDChildren = CurrentNode->getIDChildren(); + auto &StringChildren = CurrentNode->getStringChildren(); + Table->NumberOfNameEntries = StringChildren.size(); + Table->NumberOfIDEntries = IDChildren.size(); + CurrentOffset += sizeof(coff_resource_dir_table); + CurrentRelativeOffset += sizeof(coff_resource_dir_table); + + // Write the directory entries immediately following each directory table. + for (auto const &Child : StringChildren) { + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); + Entry->Identifier.setNameOffset( + StringTableOffsets[Child.second->getStringIndex()]); + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + for (auto const &Child : IDChildren) { + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); + Entry->Identifier.ID = Child.first; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + } + + RelocationAddresses.resize(Data.size()); + // Now write all the resource data entries. + for (auto DataNodes : DataEntriesTreeOrder) { + auto *Entry = reinterpret_cast<coff_resource_data_entry *>(BufferStart + + CurrentOffset); + RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; + Entry->DataRVA = 0; // Set to zero because it is a relocation. + Entry->DataSize = Data[DataNodes->getDataIndex()].size(); + Entry->Codepage = 0; + Entry->Reserved = 0; + CurrentOffset += sizeof(coff_resource_data_entry); + CurrentRelativeOffset += sizeof(coff_resource_data_entry); + } +} + +void WindowsResourceCOFFWriter::writeDirectoryStringTable() { + // Now write the directory string table for .rsrc$01 + uint32_t TotalStringTableSize = 0; + for (auto &String : StringTable) { + uint16_t Length = String.size(); + support::endian::write16le(BufferStart + CurrentOffset, Length); + CurrentOffset += sizeof(uint16_t); + auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset); + std::copy(String.begin(), String.end(), Start); + CurrentOffset += Length * sizeof(UTF16); + TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); + } + CurrentOffset += + alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; +} + +void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { + + // Now write the relocations for .rsrc$01 + // Five symbols already in table before we start, @feat.00 and 2 for each + // .rsrc section. + uint32_t NextSymbolIndex = 5; + for (unsigned i = 0; i < Data.size(); i++) { + auto *Reloc = + reinterpret_cast<coff_relocation *>(BufferStart + CurrentOffset); + Reloc->VirtualAddress = RelocationAddresses[i]; + Reloc->SymbolTableIndex = NextSymbolIndex++; + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; + break; + default: + Reloc->Type = 0; + } + CurrentOffset += sizeof(coff_relocation); + } +} + +Expected<std::unique_ptr<MemoryBuffer>> +writeWindowsResourceCOFF(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser) { + Error E = Error::success(); + WindowsResourceCOFFWriter Writer(MachineType, Parser, E); + if (E) + return std::move(E); + return Writer.write(); +} + +} // namespace object +} // namespace llvm |
