diff options
Diffstat (limited to 'lib/Object')
-rw-r--r-- | lib/Object/ArchiveWriter.cpp | 73 | ||||
-rw-r--r-- | lib/Object/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Object/Decompressor.cpp | 5 | ||||
-rw-r--r-- | lib/Object/ELFObjectFile.cpp | 262 | ||||
-rw-r--r-- | lib/Object/IRSymtab.cpp | 231 | ||||
-rw-r--r-- | lib/Object/MachOObjectFile.cpp | 856 | ||||
-rw-r--r-- | lib/Object/ModuleSummaryIndexObjectFile.cpp | 9 | ||||
-rw-r--r-- | lib/Object/ModuleSymbolTable.cpp | 97 | ||||
-rw-r--r-- | lib/Object/RecordStreamer.cpp | 11 | ||||
-rw-r--r-- | lib/Object/RecordStreamer.h | 23 | ||||
-rw-r--r-- | lib/Object/WasmObjectFile.cpp | 664 |
11 files changed, 1997 insertions, 235 deletions
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index f8e3c5a0a03f7..5b233aab2018a 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -122,12 +122,27 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, } } +static bool isBSDLike(object::Archive::Kind Kind) { + switch (Kind) { + case object::Archive::K_GNU: + return false; + case object::Archive::K_BSD: + case object::Archive::K_DARWIN: + return true; + case object::Archive::K_MIPS64: + case object::Archive::K_DARWIN64: + case object::Archive::K_COFF: + break; + } + llvm_unreachable("not supported for writting"); +} + static void print32(raw_ostream &Out, object::Archive::Kind Kind, uint32_t Val) { - if (Kind == object::Archive::K_GNU) - support::endian::Writer<support::big>(Out).write(Val); - else + if (isBSDLike(Kind)) support::endian::Writer<support::little>(Out).write(Val); + else + support::endian::Writer<support::big>(Out).write(Val); } static void printRestOfMemberHeader( @@ -178,7 +193,7 @@ printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin, std::vector<unsigned>::iterator &StringMapIndexIter, const sys::TimePoint<std::chrono::seconds> &ModTime, unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); if (!useStringTable(Thin, Name)) return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); @@ -285,10 +300,10 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, if (!HeaderStartOffset) { HeaderStartOffset = Out.tell(); - if (Kind == object::Archive::K_GNU) - printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); - else + if (isBSDLike(Kind)) printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0); + else + printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); BodyStartOffset = Out.tell(); print32(Out, Kind, 0); // number of entries or bytes } @@ -307,7 +322,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, return EC; NameOS << '\0'; MemberOffsetRefs.push_back(MemberNum); - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) print32(Out, Kind, NameOffset); print32(Out, Kind, 0); // member offset } @@ -316,10 +331,21 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, if (HeaderStartOffset == 0) return 0; + // ld64 prefers the cctools type archive which pads its string table to a + // boundary of sizeof(int32_t). + if (isBSDLike(Kind)) + for (unsigned P = OffsetToAlignment(NameOS.tell(), sizeof(int32_t)); P--;) + NameOS << '\0'; + StringRef StringTable = NameOS.str(); - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) print32(Out, Kind, StringTable.size()); // byte count of the string table Out << StringTable; + // If there are no symbols, emit an empty symbol table, to satisfy Solaris + // tools, older versions of which expect a symbol table in a non-empty + // archive, regardless of whether there are any symbols in it. + if (StringTable.size() == 0) + print32(Out, Kind, 0); // ld64 requires the next member header to start at an offset that is // 4 bytes aligned. @@ -336,10 +362,10 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, // Patch up the number of symbols. Out.seek(BodyStartOffset); unsigned NumSyms = MemberOffsetRefs.size(); - if (Kind == object::Archive::K_GNU) - print32(Out, Kind, NumSyms); - else + if (isBSDLike(Kind)) print32(Out, Kind, NumSyms * 8); + else + print32(Out, Kind, NumSyms); Out.seek(Pos); return BodyStartOffset + 4; @@ -351,8 +377,7 @@ llvm::writeArchive(StringRef ArcName, bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin, std::unique_ptr<MemoryBuffer> OldArchiveBuf) { - assert((!Thin || Kind == object::Archive::K_GNU) && - "Only the gnu format has a thin mode"); + assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); SmallString<128> TmpArchive; int TmpArchiveFD; if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", @@ -368,10 +393,6 @@ llvm::writeArchive(StringRef ArcName, std::vector<unsigned> MemberOffsetRefs; - std::vector<std::unique_ptr<MemoryBuffer>> Buffers; - std::vector<MemoryBufferRef> Members; - std::vector<sys::fs::file_status> NewMemberStatus; - unsigned MemberReferenceOffset = 0; if (WriteSymtab) { ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable( @@ -382,25 +403,35 @@ llvm::writeArchive(StringRef ArcName, } std::vector<unsigned> StringMapIndexes; - if (Kind != object::Archive::K_BSD) + if (!isBSDLike(Kind)) writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin); std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin(); std::vector<unsigned> MemberOffset; for (const NewArchiveMember &M : NewMembers) { MemoryBufferRef File = M.Buf->getMemBufferRef(); + unsigned Padding = 0; unsigned Pos = Out.tell(); MemberOffset.push_back(Pos); + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. This matches the behaviour with cctools and ensures that ld64 + // is happy with archives that we generate. + if (Kind == object::Archive::K_DARWIN) + Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); + printMemberHeader(Out, Kind, Thin, sys::path::filename(M.Buf->getBufferIdentifier()), StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms, - M.Buf->getBufferSize()); + M.Buf->getBufferSize() + Padding); if (!Thin) Out << File.getBuffer(); + while (Padding--) + Out << '\n'; if (Out.tell() % 2) Out << '\n'; } @@ -408,7 +439,7 @@ llvm::writeArchive(StringRef ArcName, if (MemberReferenceOffset) { Out.seek(MemberReferenceOffset); for (unsigned MemberNum : MemberOffsetRefs) { - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) Out.seek(Out.tell() + 4); // skip over the string offset print32(Out, Kind, MemberOffset[MemberNum]); } diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index b895c3fcc0507..2007f560c166d 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMObject ELFObjectFile.cpp Error.cpp IRObjectFile.cpp + IRSymtab.cpp MachOObjectFile.cpp MachOUniversal.cpp ModuleSummaryIndexObjectFile.cpp diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp index bca41fd9f4875..0be602b1fc1ab 100644 --- a/lib/Object/Decompressor.cpp +++ b/lib/Object/Decompressor.cpp @@ -95,8 +95,5 @@ Error Decompressor::decompress(SmallString<32> &Out) { Error Decompressor::decompress(MutableArrayRef<char> Buffer) { size_t Size = Buffer.size(); - zlib::Status Status = zlib::uncompress(SectionData, Buffer.data(), Size); - if (Status != zlib::StatusOK) - return createError("decompression failed"); - return Error::success(); + return zlib::uncompress(SectionData, Buffer.data(), Size); } diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 4bd69e34e3c3b..3f8c81c8e9110 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/MathExtras.h" namespace llvm { @@ -55,71 +57,247 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { return std::move(R); } -SubtargetFeatures ELFObjectFileBase::getFeatures() const { - switch (getEMachine()) { - case ELF::EM_MIPS: { - SubtargetFeatures Features; - unsigned PlatformFlags; - getPlatformFlags(PlatformFlags); +SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const { + SubtargetFeatures Features; + unsigned PlatformFlags; + getPlatformFlags(PlatformFlags); + + switch (PlatformFlags & ELF::EF_MIPS_ARCH) { + case ELF::EF_MIPS_ARCH_1: + break; + case ELF::EF_MIPS_ARCH_2: + Features.AddFeature("mips2"); + break; + case ELF::EF_MIPS_ARCH_3: + Features.AddFeature("mips3"); + break; + case ELF::EF_MIPS_ARCH_4: + Features.AddFeature("mips4"); + break; + case ELF::EF_MIPS_ARCH_5: + Features.AddFeature("mips5"); + break; + case ELF::EF_MIPS_ARCH_32: + Features.AddFeature("mips32"); + break; + case ELF::EF_MIPS_ARCH_64: + Features.AddFeature("mips64"); + break; + case ELF::EF_MIPS_ARCH_32R2: + Features.AddFeature("mips32r2"); + break; + case ELF::EF_MIPS_ARCH_64R2: + Features.AddFeature("mips64r2"); + break; + case ELF::EF_MIPS_ARCH_32R6: + Features.AddFeature("mips32r6"); + break; + case ELF::EF_MIPS_ARCH_64R6: + Features.AddFeature("mips64r6"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + switch (PlatformFlags & ELF::EF_MIPS_MACH) { + case ELF::EF_MIPS_MACH_NONE: + // No feature associated with this value. + break; + case ELF::EF_MIPS_MACH_OCTEON: + Features.AddFeature("cnmips"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } - switch (PlatformFlags & ELF::EF_MIPS_ARCH) { - case ELF::EF_MIPS_ARCH_1: + if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) + Features.AddFeature("mips16"); + if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) + Features.AddFeature("micromips"); + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { + SubtargetFeatures Features; + ARMAttributeParser Attributes; + std::error_code EC = getBuildAttributes(Attributes); + if (EC) + return SubtargetFeatures(); + + // both ARMv7-M and R have to support thumb hardware div + bool isV7 = false; + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) + isV7 = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch) + == ARMBuildAttrs::v7; + + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch_profile)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile)) { + case ARMBuildAttrs::ApplicationProfile: + Features.AddFeature("aclass"); break; - case ELF::EF_MIPS_ARCH_2: - Features.AddFeature("mips2"); + case ARMBuildAttrs::RealTimeProfile: + Features.AddFeature("rclass"); + if (isV7) + Features.AddFeature("hwdiv"); break; - case ELF::EF_MIPS_ARCH_3: - Features.AddFeature("mips3"); + case ARMBuildAttrs::MicroControllerProfile: + Features.AddFeature("mclass"); + if (isV7) + Features.AddFeature("hwdiv"); break; - case ELF::EF_MIPS_ARCH_4: - Features.AddFeature("mips4"); + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::THUMB_ISA_use)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use)) { + default: break; - case ELF::EF_MIPS_ARCH_5: - Features.AddFeature("mips5"); + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("thumb", false); + Features.AddFeature("thumb2", false); break; - case ELF::EF_MIPS_ARCH_32: - Features.AddFeature("mips32"); + case ARMBuildAttrs::AllowThumb32: + Features.AddFeature("thumb2"); break; - case ELF::EF_MIPS_ARCH_64: - Features.AddFeature("mips64"); + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::FP_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::FP_arch)) { + default: break; - case ELF::EF_MIPS_ARCH_32R2: - Features.AddFeature("mips32r2"); + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("vfp2", false); + Features.AddFeature("vfp3", false); + Features.AddFeature("vfp4", false); break; - case ELF::EF_MIPS_ARCH_64R2: - Features.AddFeature("mips64r2"); + case ARMBuildAttrs::AllowFPv2: + Features.AddFeature("vfp2"); break; - case ELF::EF_MIPS_ARCH_32R6: - Features.AddFeature("mips32r6"); + case ARMBuildAttrs::AllowFPv3A: + case ARMBuildAttrs::AllowFPv3B: + Features.AddFeature("vfp3"); break; - case ELF::EF_MIPS_ARCH_64R6: - Features.AddFeature("mips64r6"); + case ARMBuildAttrs::AllowFPv4A: + case ARMBuildAttrs::AllowFPv4B: + Features.AddFeature("vfp4"); break; - default: - llvm_unreachable("Unknown EF_MIPS_ARCH value"); } + } - switch (PlatformFlags & ELF::EF_MIPS_MACH) { - case ELF::EF_MIPS_MACH_NONE: - // No feature associated with this value. + if (Attributes.hasAttribute(ARMBuildAttrs::Advanced_SIMD_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch)) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("neon", false); + Features.AddFeature("fp16", false); + break; + case ARMBuildAttrs::AllowNeon: + Features.AddFeature("neon"); break; - case ELF::EF_MIPS_MACH_OCTEON: - Features.AddFeature("cnmips"); + case ARMBuildAttrs::AllowNeon2: + Features.AddFeature("neon"); + Features.AddFeature("fp16"); break; + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) { default: - llvm_unreachable("Unknown EF_MIPS_ARCH value"); + break; + case ARMBuildAttrs::DisallowDIV: + Features.AddFeature("hwdiv", false); + Features.AddFeature("hwdiv-arm", false); + break; + case ARMBuildAttrs::AllowDIVExt: + Features.AddFeature("hwdiv"); + Features.AddFeature("hwdiv-arm"); + break; } + } - if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) - Features.AddFeature("mips16"); - if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) - Features.AddFeature("micromips"); + return Features; +} - return Features; - } +SubtargetFeatures ELFObjectFileBase::getFeatures() const { + switch (getEMachine()) { + case ELF::EM_MIPS: + return getMIPSFeatures(); + case ELF::EM_ARM: + return getARMFeatures(); default: return SubtargetFeatures(); } } +// FIXME Encode from a tablegen description or target parser. +void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { + if (TheTriple.getSubArch() != Triple::NoSubArch) + return; + + ARMAttributeParser Attributes; + std::error_code EC = getBuildAttributes(Attributes); + if (EC) + return; + + std::string Triple; + // Default to ARM, but use the triple if it's been set. + if (TheTriple.getArch() == Triple::thumb || + TheTriple.getArch() == Triple::thumbeb) + Triple = "thumb"; + else + Triple = "arm"; + + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch)) { + case ARMBuildAttrs::v4: + Triple += "v4"; + break; + case ARMBuildAttrs::v4T: + Triple += "v4t"; + break; + case ARMBuildAttrs::v5T: + Triple += "v5t"; + break; + case ARMBuildAttrs::v5TE: + Triple += "v5te"; + break; + case ARMBuildAttrs::v5TEJ: + Triple += "v5tej"; + break; + case ARMBuildAttrs::v6: + Triple += "v6"; + break; + case ARMBuildAttrs::v6KZ: + Triple += "v6kz"; + break; + case ARMBuildAttrs::v6T2: + Triple += "v6t2"; + break; + case ARMBuildAttrs::v6K: + Triple += "v6k"; + break; + case ARMBuildAttrs::v7: + Triple += "v7"; + break; + case ARMBuildAttrs::v6_M: + Triple += "v6m"; + break; + case ARMBuildAttrs::v6S_M: + Triple += "v6sm"; + break; + case ARMBuildAttrs::v7E_M: + Triple += "v7em"; + break; + } + } + if (!isLittleEndian()) + Triple += "eb"; + + TheTriple.setArchName(Triple); +} + } // end namespace llvm diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp new file mode 100644 index 0000000000000..da1ef9946b505 --- /dev/null +++ b/lib/Object/IRSymtab.cpp @@ -0,0 +1,231 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/Analysis/ObjectUtils.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/StringSaver.h" + +using namespace llvm; +using namespace irsymtab; + +namespace { + +/// Stores the temporary state that is required to build an IR symbol table. +struct Builder { + SmallVector<char, 0> &Symtab; + SmallVector<char, 0> &Strtab; + Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab) + : Symtab(Symtab), Strtab(Strtab) {} + + StringTableBuilder StrtabBuilder{StringTableBuilder::ELF}; + + BumpPtrAllocator Alloc; + StringSaver Saver{Alloc}; + + DenseMap<const Comdat *, unsigned> ComdatMap; + ModuleSymbolTable Msymtab; + SmallPtrSet<GlobalValue *, 8> Used; + Mangler Mang; + Triple TT; + + std::vector<storage::Comdat> Comdats; + std::vector<storage::Module> Mods; + std::vector<storage::Symbol> Syms; + std::vector<storage::Uncommon> Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + void setStr(storage::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + } + template <typename T> + void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()), + reinterpret_cast<const char *>(Objs.data() + Objs.size())); + } + + Error addModule(Module *M); + Error addSymbol(ModuleSymbolTable::Symbol Sym); + + Error build(ArrayRef<Module *> Mods); +}; + +Error Builder::addModule(Module *M) { + collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); + + storage::Module Mod; + Mod.Begin = Msymtab.symbols().size(); + Msymtab.addModule(M); + Mod.End = Msymtab.symbols().size(); + Mods.push_back(Mod); + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (Metadata *Val = M->getModuleFlag("Linker Options")) { + MDNode *LinkerOptions = cast<MDNode>(Val); + for (const MDOperand &MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString(); + } + } + + return Error::success(); +} + +Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + storage::Symbol &Sym = Syms.back(); + Sym = {}; + + Sym.UncommonIndex = -1; + storage::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> storage::Uncommon & { + if (Unc) + return *Unc; + Sym.UncommonIndex = Uncommons.size(); + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(StringRef(Name))); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << storage::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << storage::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << storage::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << storage::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + if (Flags & object::BasicSymbolRef::SF_Executable) + Sym.Flags |= 1 << storage::Symbol::FB_executable; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast<GlobalValue *>(); + if (!GV) { + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + if (Used.count(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << storage::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; + if (canBeOmittedFromSymbolTable(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; + + if (Flags & object::BasicSymbolRef::SF_Common) { + Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize( + GV->getType()->getElementType()); + Uncommon().CommonAlign = GV->getAlignment(); + } + + const GlobalObject *Base = GV->getBaseObject(); + if (!Base) + return make_error<StringError>("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = Base->getComdat()) { + auto P = ComdatMap.insert(std::make_pair(C, Comdats.size())); + Sym.ComdatIndex = P.first->second; + + if (P.second) { + storage::Comdat Comdat; + setStr(Comdat.Name, C->getName()); + Comdats.push_back(Comdat); + } + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if ((Flags & object::BasicSymbolRef::SF_Weak) && + (Flags & object::BasicSymbolRef::SF_Indirect)) { + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName( + OS, cast<GlobalValue>( + cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts())); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + return Error::success(); +} + +Error Builder::build(ArrayRef<Module *> IRMods) { + storage::Header Hdr; + + assert(!IRMods.empty()); + setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + // This adds the symbols for each module to Msymtab. + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msym)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(storage::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + + *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; + + raw_svector_ostream OS(Strtab); + StrtabBuilder.finalizeInOrder(); + StrtabBuilder.write(OS); + + return Error::success(); +} + +} // anonymous namespace + +Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, + SmallVector<char, 0> &Strtab) { + return Builder(Symtab, Strtab).build(Mods); +} diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 5b018676eba3d..1753d2baaedd2 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -106,13 +106,6 @@ static StringRef parseSegmentOrSectionName(const char *P) { return StringRef(P, 16); } -// Helper to advance a section or symbol iterator multiple increments at a time. -template<class T> -static void advance(T &it, size_t Val) { - while (Val--) - ++it; -} - static unsigned getCPUType(const MachOObjectFile &O) { return O.getHeader().cputype; } @@ -368,7 +361,7 @@ static Error parseSegmentLoadCommand( CmdName + " extends past the end of the file"); if (S.vmsize != 0 && S.filesize > S.vmsize) return malformedError("load command " + Twine(LoadCommandIndex) + - " fileoff field in " + CmdName + + " filesize field in " + CmdName + " greater than vmsize field"); IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); } else @@ -784,6 +777,52 @@ static Error checkVersCommand(const MachOObjectFile &Obj, return Error::success(); } +static Error checkNoteCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::note_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_NOTE has incorrect cmdsize"); + MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Nt.offset > FileSize) + return malformedError("offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = Nt.offset; + BigSize += Nt.size; + if (BigSize > FileSize) + return malformedError("size field plus offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size, + "LC_NOTE data")) + return Err; + return Error::success(); +} + +static Error +parseBuildVersionCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char*> &BuildTools, + uint32_t LoadCommandIndex) { + MachO::build_version_command BVC = + getStruct<MachO::build_version_command>(Obj, Load.Ptr); + if (Load.C.cmdsize != + sizeof(MachO::build_version_command) + + BVC.ntools * sizeof(MachO::build_tool_version)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_BUILD_VERSION_COMMAND has incorrect cmdsize"); + + auto Start = Load.Ptr + sizeof(MachO::build_version_command); + BuildTools.resize(BVC.ntools); + for (unsigned i = 0; i < BVC.ntools; ++i) + BuildTools[i] = Start + i * sizeof(MachO::build_tool_version); + + return Error::success(); +} + static Error checkRpathCommand(const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, uint32_t LoadCommandIndex) { @@ -931,7 +970,26 @@ static Error checkThreadCommand(const MachOObjectFile &Obj, sys::swapByteOrder(count); state += sizeof(uint32_t); - if (cputype == MachO::CPU_TYPE_X86_64) { + if (cputype == MachO::CPU_TYPE_I386) { + if (flavor == MachO::x86_THREAD_STATE32) { + if (count != MachO::x86_THREAD_STATE32_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE32_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE32 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE32 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_X86_64) { if (flavor == MachO::x86_THREAD_STATE64) { if (count != MachO::x86_THREAD_STATE64_COUNT) return malformedError("load command " + Twine(LoadCommandIndex) + @@ -1280,6 +1338,12 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, "LC_VERSION_MIN_WATCHOS"))) return; + } else if (Load.C.cmd == MachO::LC_NOTE) { + if ((Err = checkNoteCommand(*this, Load, I, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_BUILD_VERSION) { + if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I))) + return; } else if (Load.C.cmd == MachO::LC_RPATH) { if ((Err = checkRpathCommand(*this, Load, I))) return; @@ -2201,6 +2265,10 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, return std::error_code(); } +uint32_t MachOObjectFile::getLibraryCount() const { + return Libraries.size(); +} + section_iterator MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { DataRefImpl Sec; @@ -2383,6 +2451,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, *ArchFlag = "armv7em"; return Triple("thumbv7em-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7K: + if (McpuDefault) + *McpuDefault = "cortex-a7"; if (ArchFlag) *ArchFlag = "armv7k"; return Triple("armv7k-apple-darwin"); @@ -2393,6 +2463,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, *ArchFlag = "armv7m"; return Triple("thumbv7m-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7S: + if (McpuDefault) + *McpuDefault = "cortex-a7"; if (ArchFlag) *ArchFlag = "armv7s"; return Triple("armv7s-apple-darwin"); @@ -2402,6 +2474,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_ARM64: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_ARM64_ALL: + if (McpuDefault) + *McpuDefault = "cyclone"; if (ArchFlag) *ArchFlag = "arm64"; return Triple("arm64-apple-darwin"); @@ -2674,10 +2748,11 @@ iterator_range<export_iterator> MachOObjectFile::exports() const { return exports(getDyldInfoExportsTrie()); } -MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit) - : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), - RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), - PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {} +MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), + SegmentIndex(-1), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), + PointerSize(is64Bit ? 8 : 4), Done(false) {} void MachORebaseEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2691,22 +2766,29 @@ void MachORebaseEntry::moveToEnd() { } void MachORebaseEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); // If in the middle of some loop, move to next rebasing in loop. SegmentOffset += AdvanceAmount; if (RemainingLoopCount) { --RemainingLoopCount; return; } + // REBASE_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen REBASE_OPCODE_DONE. if (Ptr == Opcodes.end()) { Done = true; return; } bool More = true; - while (More && !Malformed) { + while (More) { // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; uint8_t Byte = *Ptr++; uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + uint32_t Count, Skip; + const char *error = nullptr; switch (Opcode) { case MachO::REBASE_OPCODE_DONE: More = false; @@ -2716,6 +2798,13 @@ void MachORebaseEntry::moveNext() { break; case MachO::REBASE_OPCODE_SET_TYPE_IMM: RebaseType = ImmValue; + if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) { + *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)RebaseType) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " @@ -2723,7 +2812,23 @@ void MachORebaseEntry::moveNext() { break; case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; - SegmentOffset = readULEB128(); + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " @@ -2732,22 +2837,80 @@ void MachORebaseEntry::moveNext() { << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: - SegmentOffset += readULEB128(); + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } SegmentOffset += ImmValue * PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + false); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = PointerSize; - RemainingLoopCount = ImmValue - 1; + Skip = 0; + Count = ImmValue; + if (ImmValue != 0) + RemainingLoopCount = ImmValue - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " @@ -2757,8 +2920,38 @@ void MachORebaseEntry::moveNext() { << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = PointerSize; - RemainingLoopCount = readULEB128() - 1; + Skip = 0; + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " @@ -2768,8 +2961,35 @@ void MachORebaseEntry::moveNext() { << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: - AdvanceAmount = readULEB128() + PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + Count = 1; RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " @@ -2779,8 +2999,46 @@ void MachORebaseEntry::moveNext() { << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: - RemainingLoopCount = readULEB128() - 1; - AdvanceAmount = readULEB128() + PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " @@ -2790,23 +3048,25 @@ void MachORebaseEntry::moveNext() { << "\n"); return; default: - Malformed = true; + *E = malformedError("bad rebase info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; } } } -uint64_t MachORebaseEntry::readULEB128() { +uint64_t MachORebaseEntry::readULEB128(const char **error) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } +int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } @@ -2822,6 +3082,24 @@ StringRef MachORebaseEntry::typeName() const { return "unknown"; } +// For use with the SegIndex of a checked Mach-O Rebase entry +// to get the segment name. +StringRef MachORebaseEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the section name. +StringRef MachORebaseEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the address. +uint64_t MachORebaseEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { #ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); @@ -2834,25 +3112,29 @@ bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { } iterator_range<rebase_iterator> -MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) { - MachORebaseEntry Start(Opcodes, is64); +MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachORebaseEntry Start(&Err, O, Opcodes, is64); Start.moveToFirst(); - MachORebaseEntry Finish(Opcodes, is64); + MachORebaseEntry Finish(&Err, O, Opcodes, is64); Finish.moveToEnd(); return make_range(rebase_iterator(Start), rebase_iterator(Finish)); } -iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const { - return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit()); +iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) { + return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit()); } -MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) - : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), - Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0), - BindType(0), PointerSize(is64Bit ? 8 : 4), - TableKind(BK), Malformed(false), Done(false) {} +MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), + SegmentIndex(-1), LibraryOrdinalSet(false), Ordinal(0), Flags(0), + Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0), + PointerSize(is64Bit ? 8 : 4), TableKind(BK), Done(false) {} void MachOBindEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2866,24 +3148,31 @@ void MachOBindEntry::moveToEnd() { } void MachOBindEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); // If in the middle of some loop, move to next binding in loop. SegmentOffset += AdvanceAmount; if (RemainingLoopCount) { --RemainingLoopCount; return; } + // BIND_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen BIND_OPCODE_DONE. if (Ptr == Opcodes.end()) { Done = true; return; } bool More = true; - while (More && !Malformed) { + while (More) { // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; uint8_t Byte = *Ptr++; uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK; uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK; int8_t SignExtended; const uint8_t *SymStart; + uint32_t Count, Skip; + const char *error = nullptr; switch (Opcode) { case MachO::BIND_OPCODE_DONE: if (TableKind == Kind::Lazy) { @@ -2899,28 +3188,81 @@ void MachOBindEntry::moveNext() { break; } More = false; - Done = true; moveToEnd(); DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } Ordinal = ImmValue; + LibraryOrdinalSet = true; + if (ImmValue > O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)ImmValue) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: - Ordinal = readULEB128(); + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = readULEB128(&error); + LibraryOrdinalSet = true; + if (error) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Ordinal > (int)O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)Ordinal) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } if (ImmValue) { SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; Ordinal = SignExtended; + LibraryOrdinalSet = true; + if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " + "special ordinal: " + Twine((int)Ordinal) + " for opcode at: " + "0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } } else Ordinal = 0; DEBUG_WITH_TYPE( @@ -2931,9 +3273,16 @@ void MachOBindEntry::moveNext() { case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Flags = ImmValue; SymStart = Ptr; - while (*Ptr) { + while (*Ptr && (Ptr < Opcodes.end())) { ++Ptr; } + if (Ptr == Opcodes.end()) { + *E = malformedError("for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM " + "symbol name extends past opcodes for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } SymbolName = StringRef(reinterpret_cast<const char*>(SymStart), Ptr-SymStart); ++Ptr; @@ -2948,15 +3297,27 @@ void MachOBindEntry::moveNext() { break; case MachO::BIND_OPCODE_SET_TYPE_IMM: BindType = ImmValue; + if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) { + *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)ImmValue) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " << "BindType=" << (int)BindType << "\n"); break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: - Addend = readSLEB128(); - if (TableKind == Kind::Lazy) - Malformed = true; + Addend = readSLEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " @@ -2964,7 +3325,22 @@ void MachOBindEntry::moveNext() { break; case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; - SegmentOffset = readULEB128(); + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " @@ -2973,7 +3349,22 @@ void MachOBindEntry::moveNext() { << "\n"); break; case MachO::BIND_OPCODE_ADD_ADDR_ULEB: - SegmentOffset += readULEB128(); + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " << format("SegmentOffset=0x%06X", @@ -2982,16 +3373,83 @@ void MachOBindEntry::moveNext() { case MachO::BIND_OPCODE_DO_BIND: AdvanceAmount = PointerSize; RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) + + " for opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DO_BIND: " << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: - AdvanceAmount = readULEB128() + PointerSize; + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in " + "lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = readULEB128(&error) + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + // Note, this is not really an error until the next bind but make no sense + // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another + // bind operation. + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding " + "ULEB) " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } RemainingLoopCount = 0; - if (TableKind == Kind::Lazy) - Malformed = true; DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " @@ -3001,10 +3459,47 @@ void MachOBindEntry::moveNext() { << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = ImmValue * PointerSize + PointerSize; RemainingLoopCount = 0; - if (TableKind == Kind::Lazy) - Malformed = true; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " @@ -3012,10 +3507,65 @@ void MachOBindEntry::moveNext() { SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: - RemainingLoopCount = readULEB128() - 1; - AdvanceAmount = readULEB128() + PointerSize; - if (TableKind == Kind::Lazy) - Malformed = true; + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (count value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + AdvanceAmount = Skip + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (skip value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " @@ -3025,34 +3575,34 @@ void MachOBindEntry::moveNext() { << "\n"); return; default: - Malformed = true; + *E = malformedError("bad bind info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; } } } -uint64_t MachOBindEntry::readULEB128() { +uint64_t MachOBindEntry::readULEB128(const char **error) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -int64_t MachOBindEntry::readSLEB128() { +int64_t MachOBindEntry::readSLEB128(const char **error) { unsigned Count; - int64_t Result = decodeSLEB128(Ptr, &Count); + int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } +int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } @@ -3076,6 +3626,24 @@ uint32_t MachOBindEntry::flags() const { return Flags; } int MachOBindEntry::ordinal() const { return Ordinal; } +// For use with the SegIndex of a checked Mach-O Bind entry +// to get the segment name. +StringRef MachOBindEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the section name. +StringRef MachOBindEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the address. +uint64_t MachOBindEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { #ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); @@ -3087,30 +3655,149 @@ bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { (Done == Other.Done); } +// Build table of sections so SegIndex/SegOffset pairs can be translated. +BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { + uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0; + StringRef CurSegName; + uint64_t CurSegAddress; + for (const SectionRef &Section : Obj->sections()) { + SectionInfo Info; + Section.getName(Info.SectionName); + Info.Address = Section.getAddress(); + Info.Size = Section.getSize(); + Info.SegmentName = + Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); + if (!Info.SegmentName.equals(CurSegName)) { + ++CurSegIndex; + CurSegName = Info.SegmentName; + CurSegAddress = Info.Address; + } + Info.SegmentIndex = CurSegIndex - 1; + Info.OffsetInSegment = Info.Address - CurSegAddress; + Info.SegmentStartAddress = CurSegAddress; + Sections.push_back(Info); + } + MaxSegIndex = CurSegIndex; +} + +// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to +// validate a MachOBindEntry or MachORebaseEntry. +const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex, + uint64_t SegOffset, + bool endInvalid) { + if (SegIndex == -1) + return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB"; + if (SegIndex >= MaxSegIndex) + return "bad segIndex (too large)"; + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset > (SI.OffsetInSegment + SI.Size)) + continue; + if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return nullptr; + } + return "bad segOffset, too large"; +} + +// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for +// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in +// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for +// REBASE_OPCODE_DO_*_TIMES* opcodes. The SegIndex and SegOffset must have +// been already checked. +const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip, + uint8_t PointerSize, + int32_t SegIndex, + uint64_t SegOffset) { + const SectionInfo &SI = findSection(SegIndex, SegOffset); + uint64_t addr = SI.SegmentStartAddress + SegOffset; + if (addr >= SI.Address + SI.Size) + return "bad segOffset, too large"; + uint64_t i = 0; + if (Count > 1) + i = (Skip + PointerSize) * (Count - 1); + else if (Count == 1) + i = Skip + PointerSize; + if (addr + i >= SI.Address + SI.Size) { + // For rebase opcodes they can step from one section to another. + uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress; + const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false); + if (error) + return "bad count and skip, too large"; + } + return nullptr; +} + +// For use with the SegIndex of a checked Mach-O Bind or Rebase entry +// to get the segment name. +StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex == SegIndex) + return SI.SegmentName; + } + llvm_unreachable("invalid SegIndex"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// to get the SectionInfo. +const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection( + int32_t SegIndex, uint64_t SegOffset) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return SI; + } + llvm_unreachable("SegIndex and SegOffset not in any section"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the section name. +StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex, + uint64_t SegOffset) { + return findSection(SegIndex, SegOffset).SectionName; +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the address. +uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { + const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); + return SI.SegmentStartAddress + OffsetInSeg; +} + iterator_range<bind_iterator> -MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64, +MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64, MachOBindEntry::Kind BKind) { - MachOBindEntry Start(Opcodes, is64, BKind); + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); Start.moveToFirst(); - MachOBindEntry Finish(Opcodes, is64, BKind); + MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind); Finish.moveToEnd(); return make_range(bind_iterator(Start), bind_iterator(Finish)); } -iterator_range<bind_iterator> MachOObjectFile::bindTable() const { - return bindTable(getDyldInfoBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Regular); } -iterator_range<bind_iterator> MachOObjectFile::lazyBindTable() const { - return bindTable(getDyldInfoLazyBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Lazy); } -iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const { - return bindTable(getDyldInfoWeakBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Weak); } @@ -3289,6 +3976,21 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::version_min_command>(*this, L.Ptr); } +MachO::note_command +MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::note_command>(*this, L.Ptr); +} + +MachO::build_version_command +MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::build_version_command>(*this, L.Ptr); +} + +MachO::build_tool_version +MachOObjectFile::getBuildToolVersion(unsigned index) const { + return getStruct<MachO::build_tool_version>(*this, BuildTools[index]); +} + MachO::dylib_command MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::dylib_command>(*this, L.Ptr); diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp index 11ace84b9cebb..de1ddab88fd40 100644 --- a/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ b/lib/Object/ModuleSummaryIndexObjectFile.cpp @@ -96,13 +96,18 @@ ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) { // Parse the module summary index out of an IR file and return the summary // index object if found, or nullptr if not. Expected<std::unique_ptr<ModuleSummaryIndex>> -llvm::getModuleSummaryIndexForFile(StringRef Path) { +llvm::getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier) { ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = MemoryBuffer::getFileOrSTDIN(Path); std::error_code EC = FileOrErr.getError(); if (EC) return errorCodeToError(EC); - MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); + std::unique_ptr<MemoryBuffer> MemBuffer = std::move(FileOrErr.get()); + // If Identifier is non-empty, use it as the buffer identifier, which + // will become the module path in the index. + if (Identifier.empty()) + Identifier = MemBuffer->getBufferIdentifier(); + MemoryBufferRef BufferRef(MemBuffer->getBuffer(), Identifier); if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) return nullptr; Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp index 90488007ff59a..9a935d8e08699 100644 --- a/lib/Object/ModuleSymbolTable.cpp +++ b/lib/Object/ModuleSymbolTable.cpp @@ -43,27 +43,98 @@ void ModuleSymbolTable::addModule(Module *M) { else FirstMod = M; - for (Function &F : *M) - SymTab.push_back(&F); - for (GlobalVariable &GV : M->globals()) + for (GlobalValue &GV : M->global_values()) SymTab.push_back(&GV); - for (GlobalAlias &GA : M->aliases()) - SymTab.push_back(&GA); - - CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(), - [this](StringRef Name, BasicSymbolRef::Flags Flags) { - SymTab.push_back(new (AsmSymbols.Allocate()) - AsmSymbol(Name, Flags)); - }); + + CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) AsmSymbol(Name, Flags)); + }); +} + +// Ensure ELF .symver aliases get the same binding as the defined symbol +// they alias with. +static void handleSymverAliases(const Module &M, RecordStreamer &Streamer) { + if (Streamer.symverAliases().empty()) + return; + + // The name in the assembler will be mangled, but the name in the IR + // might not, so we first compute a mapping from mangled name to GV. + Mangler Mang; + SmallString<64> MangledName; + StringMap<const GlobalValue *> MangledNameMap; + auto GetMangledName = [&](const GlobalValue &GV) { + if (!GV.hasName()) + return; + + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); + MangledNameMap[MangledName] = &GV; + }; + for (const Function &F : M) + GetMangledName(F); + for (const GlobalVariable &GV : M.globals()) + GetMangledName(GV); + for (const GlobalAlias &GA : M.aliases()) + GetMangledName(GA); + + // Walk all the recorded .symver aliases, and set up the binding + // for each alias. + for (auto &Symver : Streamer.symverAliases()) { + const MCSymbol *Aliasee = Symver.first; + MCSymbolAttr Attr = MCSA_Invalid; + + // First check if the aliasee binding was recorded in the asm. + RecordStreamer::State state = Streamer.getSymbolState(Aliasee); + switch (state) { + case RecordStreamer::Global: + case RecordStreamer::DefinedGlobal: + Attr = MCSA_Global; + break; + case RecordStreamer::UndefinedWeak: + case RecordStreamer::DefinedWeak: + Attr = MCSA_Weak; + break; + default: + break; + } + + // If we don't have a symbol attribute from assembly, then check if + // the aliasee was defined in the IR. + if (Attr == MCSA_Invalid) { + const auto *GV = M.getNamedValue(Aliasee->getName()); + if (!GV) { + auto MI = MangledNameMap.find(Aliasee->getName()); + if (MI != MangledNameMap.end()) + GV = MI->second; + else + continue; + } + if (GV->hasExternalLinkage()) + Attr = MCSA_Global; + else if (GV->hasLocalLinkage()) + Attr = MCSA_Local; + else if (GV->isWeakForLinker()) + Attr = MCSA_Weak; + } + if (Attr == MCSA_Invalid) + continue; + + // Set the detected binding on each alias with this aliasee. + for (auto &Alias : Symver.second) + Streamer.EmitSymbolAttribute(Alias, Attr); + } } void ModuleSymbolTable::CollectAsmSymbols( - const Triple &TT, StringRef InlineAsm, + const Module &M, function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + StringRef InlineAsm = M.getModuleInlineAsm(); if (InlineAsm.empty()) return; std::string Err; + const Triple TT(M.getTargetTriple()); const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); assert(T && T->hasMCAsmParser()); @@ -106,6 +177,8 @@ void ModuleSymbolTable::CollectAsmSymbols( if (Parser->Run(false)) return; + handleSymverAliases(M, Streamer); + for (auto &KV : Streamer) { StringRef Key = KV.first(); RecordStreamer::State Value = KV.second; diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp index 572b960bc85f0..c9c27451f8098 100644 --- a/lib/Object/RecordStreamer.cpp +++ b/lib/Object/RecordStreamer.cpp @@ -1,4 +1,4 @@ -//===-- RecordStreamer.cpp - Record asm definde and used symbols ----------===// +//===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===// // // The LLVM Compiler Infrastructure // @@ -78,11 +78,11 @@ RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} void RecordStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool) { MCStreamer::EmitInstruction(Inst, STI); } -void RecordStreamer::EmitLabel(MCSymbol *Symbol) { +void RecordStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { MCStreamer::EmitLabel(Symbol); markDefined(*Symbol); } @@ -110,3 +110,8 @@ void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { markDefined(*Symbol); } + +void RecordStreamer::emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) { + SymverAliasMap[Aliasee].push_back(Alias); +} diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h index 617d8a43fbd26..a845ecd786a8e 100644 --- a/lib/Object/RecordStreamer.h +++ b/lib/Object/RecordStreamer.h @@ -20,6 +20,10 @@ public: private: StringMap<State> Symbols; + // Map of aliases created by .symver directives, saved so we can update + // their symbol binding after parsing complete. This maps from each + // aliasee to its list of aliases. + DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap; void markDefined(const MCSymbol &Symbol); void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); void markUsed(const MCSymbol &Symbol); @@ -30,14 +34,29 @@ public: const_iterator begin(); const_iterator end(); RecordStreamer(MCContext &Context); - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; - void EmitLabel(MCSymbol *Symbol) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override; + void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; + /// Record .symver aliases for later processing. + void emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) override; + /// Return the map of .symver aliasee to associated aliases. + DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() { + return SymverAliasMap; + } + /// Get the state recorded for the given symbol. + State getSymbolState(const MCSymbol *Sym) { + auto SI = Symbols.find(Sym->getName()); + if (SI == Symbols.end()) + return NeverSeen; + return SI->second; + } }; } #endif diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 2b61a8a034f6c..fc1dca35424e3 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -1,4 +1,4 @@ -//===- WasmObjectFile.cpp - Wasm object file implementation -----*- C++ -*-===// +//===- WasmObjectFile.cpp - Wasm object file implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,26 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Wasm.h" +#include <algorithm> +#include <cstdint> +#include <system_error> -namespace llvm { -namespace object { +using namespace llvm; +using namespace object; Expected<std::unique_ptr<WasmObjectFile>> ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { @@ -24,34 +38,139 @@ ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { return std::move(ObjectFile); } -namespace { +#define VARINT7_MAX ((1<<7)-1) +#define VARINT7_MIN (-(1<<7)) +#define VARUINT7_MAX (1<<7) +#define VARUINT1_MAX (1) -uint32_t readUint32(const uint8_t *&Ptr) { +static uint8_t readUint8(const uint8_t *&Ptr) { return *Ptr++; } + +static uint32_t readUint32(const uint8_t *&Ptr) { uint32_t Result = support::endian::read32le(Ptr); Ptr += sizeof(Result); return Result; } -uint64_t readULEB128(const uint8_t *&Ptr) { +static int32_t readFloat32(const uint8_t *&Ptr) { + int32_t Result = 0; + memcpy(&Result, Ptr, sizeof(Result)); + Ptr += sizeof(Result); + return Result; +} + +static int64_t readFloat64(const uint8_t *&Ptr) { + int64_t Result = 0; + memcpy(&Result, Ptr, sizeof(Result)); + Ptr += sizeof(Result); + return Result; +} + +static uint64_t readULEB128(const uint8_t *&Ptr) { unsigned Count; uint64_t Result = decodeULEB128(Ptr, &Count); Ptr += Count; return Result; } -StringRef readString(const uint8_t *&Ptr) { +static StringRef readString(const uint8_t *&Ptr) { uint32_t StringLen = readULEB128(Ptr); StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen); Ptr += StringLen; return Return; } -Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr, - const uint8_t *Start) { +static int64_t readLEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeSLEB128(Ptr, &Count); + Ptr += Count; + return Result; +} + +static uint8_t readVaruint1(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= VARUINT1_MAX && result >= 0); + return result; +} + +static int8_t readVarint7(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= VARINT7_MAX && result >= VARINT7_MIN); + return result; +} + +static uint8_t readVaruint7(const uint8_t *&Ptr) { + uint64_t result = readULEB128(Ptr); + assert(result <= VARUINT7_MAX); + return result; +} + +static int32_t readVarint32(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= INT32_MAX && result >= INT32_MIN); + return result; +} + +static uint32_t readVaruint32(const uint8_t *&Ptr) { + uint64_t result = readULEB128(Ptr); + assert(result <= UINT32_MAX); + return result; +} + +static int64_t readVarint64(const uint8_t *&Ptr) { + return readLEB128(Ptr); +} + +static uint8_t readOpcode(const uint8_t *&Ptr) { + return readUint8(Ptr); +} + +static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { + Expr.Opcode = readOpcode(Ptr); + + switch (Expr.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + Expr.Value.Int32 = readVarint32(Ptr); + break; + case wasm::WASM_OPCODE_I64_CONST: + Expr.Value.Int64 = readVarint64(Ptr); + break; + case wasm::WASM_OPCODE_F32_CONST: + Expr.Value.Float32 = readFloat32(Ptr); + break; + case wasm::WASM_OPCODE_F64_CONST: + Expr.Value.Float64 = readFloat64(Ptr); + break; + case wasm::WASM_OPCODE_GET_GLOBAL: + Expr.Value.Global = readUint32(Ptr); + break; + default: + return make_error<GenericBinaryError>("Invalid opcode in init_expr", + object_error::parse_failed); + } + + uint8_t EndOpcode = readOpcode(Ptr); + if (EndOpcode != wasm::WASM_OPCODE_END) { + return make_error<GenericBinaryError>("Invalid init_expr", + object_error::parse_failed); + } + return Error::success(); +} + +static wasm::WasmLimits readLimits(const uint8_t *&Ptr) { + wasm::WasmLimits Result; + Result.Flags = readVaruint1(Ptr); + Result.Initial = readVaruint32(Ptr); + if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) + Result.Maximum = readVaruint32(Ptr); + return Result; +} + +static Error readSection(WasmSection &Section, const uint8_t *&Ptr, + const uint8_t *Start) { // TODO(sbc): Avoid reading past EOF in the case of malformed files. Section.Offset = Ptr - Start; - Section.Type = readULEB128(Ptr); - uint32_t Size = readULEB128(Ptr); + Section.Type = readVaruint7(Ptr); + uint32_t Size = readVaruint32(Ptr); if (Size == 0) return make_error<StringError>("Zero length section", object_error::parse_failed); @@ -59,10 +178,9 @@ Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr, Ptr += Size; return Error::success(); } -} WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) - : ObjectFile(Binary::ID_Wasm, Buffer) { + : ObjectFile(Binary::ID_Wasm, Buffer), StartFunction(-1) { ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -79,21 +197,388 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) } const uint8_t *Eof = getPtr(getData().size()); - wasm::WasmSection Sec; + WasmSection Sec; while (Ptr < Eof) { if ((Err = readSection(Sec, Ptr, getPtr(0)))) return; - if (Sec.Type == wasm::WASM_SEC_USER) { - if ((Err = parseUserSection(Sec, Sec.Content.data(), Sec.Content.size()))) - return; - } + if ((Err = parseSection(Sec))) + return; + Sections.push_back(Sec); } } -Error WasmObjectFile::parseUserSection(wasm::WasmSection &Sec, - const uint8_t *Ptr, size_t Length) { +Error WasmObjectFile::parseSection(WasmSection &Sec) { + const uint8_t* Start = Sec.Content.data(); + const uint8_t* End = Start + Sec.Content.size(); + switch (Sec.Type) { + case wasm::WASM_SEC_CUSTOM: + return parseCustomSection(Sec, Start, End); + case wasm::WASM_SEC_TYPE: + return parseTypeSection(Start, End); + case wasm::WASM_SEC_IMPORT: + return parseImportSection(Start, End); + case wasm::WASM_SEC_FUNCTION: + return parseFunctionSection(Start, End); + case wasm::WASM_SEC_TABLE: + return parseTableSection(Start, End); + case wasm::WASM_SEC_MEMORY: + return parseMemorySection(Start, End); + case wasm::WASM_SEC_GLOBAL: + return parseGlobalSection(Start, End); + case wasm::WASM_SEC_EXPORT: + return parseExportSection(Start, End); + case wasm::WASM_SEC_START: + return parseStartSection(Start, End); + case wasm::WASM_SEC_ELEM: + return parseElemSection(Start, End); + case wasm::WASM_SEC_CODE: + return parseCodeSection(Start, End); + case wasm::WASM_SEC_DATA: + return parseDataSection(Start, End); + default: + return make_error<GenericBinaryError>("Bad section type", + object_error::parse_failed); + } +} + +Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + switch (Type) { + case wasm::WASM_NAMES_FUNCTION: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + /*uint32_t Index =*/readVaruint32(Ptr); + StringRef Name = readString(Ptr); + if (Name.size()) + Symbols.emplace_back(Name, + WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME); + } + break; + } + // Ignore local names for now + case wasm::WASM_NAMES_LOCAL: + default: + Ptr += Size; + break; + } + } + + if (Ptr != End) + return make_error<GenericBinaryError>("Name section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { + for (WasmSection& Section : Sections) { + if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) + return &Section; + } + return nullptr; +} + +WasmSection* WasmObjectFile::findSectionByType(uint32_t Type) { + assert(Type != wasm::WASM_SEC_CUSTOM); + for (WasmSection& Section : Sections) { + if (Section.Type == Type) + return &Section; + } + return nullptr; +} + +Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, + const uint8_t *End) { + uint8_t SectionCode = readVarint7(Ptr); + WasmSection* Section = nullptr; + if (SectionCode == wasm::WASM_SEC_CUSTOM) { + StringRef Name = readString(Ptr); + Section = findCustomSectionByName(Name); + } else { + Section = findSectionByType(SectionCode); + } + if (!Section) + return make_error<GenericBinaryError>("Invalid section code", + object_error::parse_failed); + uint32_t RelocCount = readVaruint32(Ptr); + while (RelocCount--) { + wasm::WasmRelocation Reloc; + memset(&Reloc, 0, sizeof(Reloc)); + Reloc.Type = readVaruint32(Ptr); + Reloc.Offset = readVaruint32(Ptr); + Reloc.Index = readVaruint32(Ptr); + switch (Reloc.Type) { + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + break; + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + Reloc.Addend = readVaruint32(Ptr); + break; + default: + return make_error<GenericBinaryError>("Bad relocation type", + object_error::parse_failed); + } + Section->Relocations.push_back(Reloc); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Reloc section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCustomSection(WasmSection &Sec, + const uint8_t *Ptr, const uint8_t *End) { Sec.Name = readString(Ptr); + if (Sec.Name == "name") { + if (Error Err = parseNameSection(Ptr, End)) + return Err; + } else if (Sec.Name.startswith("reloc.")) { + if (Error Err = parseRelocSection(Sec.Name, Ptr, End)) + return Err; + } + return Error::success(); +} + +Error WasmObjectFile::parseTypeSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Signatures.reserve(Count); + while (Count--) { + wasm::WasmSignature Sig; + Sig.ReturnType = wasm::WASM_TYPE_NORESULT; + int8_t Form = readVarint7(Ptr); + if (Form != wasm::WASM_TYPE_FUNC) { + return make_error<GenericBinaryError>("Invalid signature type", + object_error::parse_failed); + } + uint32_t ParamCount = readVaruint32(Ptr); + Sig.ParamTypes.reserve(ParamCount); + while (ParamCount--) { + uint32_t ParamType = readVarint7(Ptr); + Sig.ParamTypes.push_back(ParamType); + } + uint32_t ReturnCount = readVaruint32(Ptr); + if (ReturnCount) { + if (ReturnCount != 1) { + return make_error<GenericBinaryError>( + "Multiple return types not supported", object_error::parse_failed); + } + Sig.ReturnType = readVarint7(Ptr); + } + Signatures.push_back(Sig); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Type section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Imports.reserve(Count); + while (Count--) { + wasm::WasmImport Im; + Im.Module = readString(Ptr); + Im.Field = readString(Ptr); + Im.Kind = readUint8(Ptr); + switch (Im.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + Im.SigIndex = readVaruint32(Ptr); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT); + break; + case wasm::WASM_EXTERNAL_GLOBAL: + Im.GlobalType = readVarint7(Ptr); + Im.GlobalMutable = readVaruint1(Ptr); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT); + break; + default: + // TODO(sbc): Handle other kinds of imports + return make_error<GenericBinaryError>( + "Unexpected import kind", object_error::parse_failed); + } + Imports.push_back(Im); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Import section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseFunctionSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + FunctionTypes.reserve(Count); + while (Count--) { + FunctionTypes.push_back(readVaruint32(Ptr)); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Function section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseTableSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Tables.reserve(Count); + while (Count--) { + wasm::WasmTable Table; + Table.ElemType = readVarint7(Ptr); + if (Table.ElemType != wasm::WASM_TYPE_ANYFUNC) { + return make_error<GenericBinaryError>("Invalid table element type", + object_error::parse_failed); + } + Table.Limits = readLimits(Ptr); + Tables.push_back(Table); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Table section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseMemorySection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Memories.reserve(Count); + while (Count--) { + Memories.push_back(readLimits(Ptr)); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Memory section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Globals.reserve(Count); + while (Count--) { + wasm::WasmGlobal Global; + Global.Type = readVarint7(Ptr); + Global.Mutable = readVaruint1(Ptr); + if (Error Err = readInitExpr(Global.InitExpr, Ptr)) + return Err; + Globals.push_back(Global); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Global section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Exports.reserve(Count); + while (Count--) { + wasm::WasmExport Ex; + Ex.Name = readString(Ptr); + Ex.Kind = readUint8(Ptr); + Ex.Index = readVaruint32(Ptr); + Exports.push_back(Ex); + switch (Ex.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT); + break; + case wasm::WASM_EXTERNAL_GLOBAL: + Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT); + break; + default: + // TODO(sbc): Handle other kinds of exports + return make_error<GenericBinaryError>( + "Unexpected export kind", object_error::parse_failed); + } + } + if (Ptr != End) + return make_error<GenericBinaryError>("Export section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) { + StartFunction = readVaruint32(Ptr); + if (StartFunction < FunctionTypes.size()) + return make_error<GenericBinaryError>("Invalid start function", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t FunctionCount = readVaruint32(Ptr); + if (FunctionCount != FunctionTypes.size()) { + return make_error<GenericBinaryError>("Invalid function count", + object_error::parse_failed); + } + + CodeSection = ArrayRef<uint8_t>(Ptr, End - Ptr); + + while (FunctionCount--) { + wasm::WasmFunction Function; + uint32_t FunctionSize = readVaruint32(Ptr); + const uint8_t *FunctionEnd = Ptr + FunctionSize; + + uint32_t NumLocalDecls = readVaruint32(Ptr); + Function.Locals.reserve(NumLocalDecls); + while (NumLocalDecls--) { + wasm::WasmLocalDecl Decl; + Decl.Count = readVaruint32(Ptr); + Decl.Type = readVarint7(Ptr); + Function.Locals.push_back(Decl); + } + + uint32_t BodySize = FunctionEnd - Ptr; + Function.Body = ArrayRef<uint8_t>(Ptr, BodySize); + Ptr += BodySize; + assert(Ptr == FunctionEnd); + Functions.push_back(Function); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Code section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + ElemSegments.reserve(Count); + while (Count--) { + wasm::WasmElemSegment Segment; + Segment.TableIndex = readVaruint32(Ptr); + if (Segment.TableIndex != 0) { + return make_error<GenericBinaryError>("Invalid TableIndex", + object_error::parse_failed); + } + if (Error Err = readInitExpr(Segment.Offset, Ptr)) + return Err; + uint32_t NumElems = readVaruint32(Ptr); + while (NumElems--) { + Segment.Functions.push_back(readVaruint32(Ptr)); + } + ElemSegments.push_back(Segment); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Elem section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + DataSegments.reserve(Count); + while (Count--) { + wasm::WasmDataSegment Segment; + Segment.Index = readVaruint32(Ptr); + if (Error Err = readInitExpr(Segment.Offset, Ptr)) + return Err; + uint32_t Size = readVaruint32(Ptr); + Segment.Content = ArrayRef<uint8_t>(Ptr, Size); + Ptr += Size; + DataSegments.push_back(Segment); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Data section ended prematurely", + object_error::parse_failed); return Error::success(); } @@ -105,37 +590,48 @@ const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { return Header; } -void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { - llvm_unreachable("not yet implemented"); -} - -std::error_code WasmObjectFile::printSymbolName(raw_ostream &OS, - DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return object_error::invalid_symbol_index; -} +void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; } uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return 0; + const WasmSymbol &Sym = getWasmSymbol(Symb); + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + return object::SymbolRef::SF_Undefined | SymbolRef::SF_Executable; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + return object::SymbolRef::SF_Global | SymbolRef::SF_Executable; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return object::SymbolRef::SF_Executable; + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + return object::SymbolRef::SF_Undefined; + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return object::SymbolRef::SF_Global; + } + llvm_unreachable("Unknown WasmSymbol::SymbolType"); } basic_symbol_iterator WasmObjectFile::symbol_begin() const { - return BasicSymbolRef(DataRefImpl(), this); + DataRefImpl Ref; + Ref.d.a = 0; + return BasicSymbolRef(Ref, this); } basic_symbol_iterator WasmObjectFile::symbol_end() const { - return BasicSymbolRef(DataRefImpl(), this); + DataRefImpl Ref; + Ref.d.a = Symbols.size(); + return BasicSymbolRef(Ref, this); +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(DataRefImpl Symb) const { + return Symbols[Symb.d.a]; } Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + const WasmSymbol &Sym = getWasmSymbol(Symb); + return Sym.Name; } Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + return (uint64_t)Symb.d.a; } uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { @@ -169,7 +665,7 @@ void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, StringRef &Res) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; + const WasmSection &S = Sections[Sec.d.a]; #define ECase(X) \ case wasm::WASM_SEC_##X: \ Res = #X; \ @@ -186,7 +682,7 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, ECase(ELEM); ECase(CODE); ECase(DATA); - case wasm::WASM_SEC_USER: + case wasm::WASM_SEC_CUSTOM: Res = S.Name; break; default: @@ -199,13 +695,13 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; + const WasmSection &S = Sections[Sec.d.a]; return S.Content.size(); } std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; + const WasmSection &S = Sections[Sec.d.a]; // This will never fail since wasm sections can never be empty (user-sections // must have a name and non-user sections each have a defined structure). Res = StringRef(reinterpret_cast<const char *>(S.Content.data()), @@ -222,13 +718,11 @@ bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const { } bool WasmObjectFile::isSectionText(DataRefImpl Sec) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; - return S.Type == wasm::WASM_SEC_CODE; + return getWasmSection(Sec).Type == wasm::WASM_SEC_CODE; } bool WasmObjectFile::isSectionData(DataRefImpl Sec) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; - return S.Type == wasm::WASM_SEC_DATA; + return getWasmSection(Sec).Type == wasm::WASM_SEC_DATA; } bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; } @@ -237,31 +731,28 @@ bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; } bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; } -relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Sec) const { - llvm_unreachable("not yet implemented"); - RelocationRef Rel; - return relocation_iterator(Rel); +relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Ref) const { + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = 0; + return relocation_iterator(RelocationRef(RelocRef, this)); } -relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Sec) const { - llvm_unreachable("not yet implemented"); - RelocationRef Rel; - return relocation_iterator(Rel); -} - -section_iterator WasmObjectFile::getRelocatedSection(DataRefImpl Sec) const { - llvm_unreachable("not yet implemented"); - SectionRef Ref; - return section_iterator(Ref); +relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const { + const WasmSection &Sec = getWasmSection(Ref); + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = Sec.Relocations.size(); + return relocation_iterator(RelocationRef(RelocRef, this)); } void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { - llvm_unreachable("not yet implemented"); + Rel.d.b++; } -uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Rel) const { - llvm_unreachable("not yet implemented"); - return 0; +uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Offset; } symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { @@ -270,14 +761,28 @@ symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { return symbol_iterator(Ref); } -uint64_t WasmObjectFile::getRelocationType(DataRefImpl Rel) const { - llvm_unreachable("not yet implemented"); - return 0; +uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Type; } void WasmObjectFile::getRelocationTypeName( - DataRefImpl Rel, SmallVectorImpl<char> &Result) const { - llvm_unreachable("not yet implemented"); + DataRefImpl Ref, SmallVectorImpl<char> &Result) const { + const wasm::WasmRelocation& Rel = getWasmRelocation(Ref); + StringRef Res = "Unknown"; + +#define WASM_RELOC(name, value) \ + case wasm::name: \ + Res = #name; \ + break; + + switch (Rel.Type) { +#include "llvm/Support/WasmRelocs/WebAssembly.def" + } + +#undef WASM_RELOC + + Result.append(Res.begin(), Res.end()); } section_iterator WasmObjectFile::section_begin() const { @@ -304,10 +809,25 @@ SubtargetFeatures WasmObjectFile::getFeatures() const { bool WasmObjectFile::isRelocatableObject() const { return false; } -const wasm::WasmSection * +const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + return Sections[Ref.d.a]; +} + +const WasmSection & WasmObjectFile::getWasmSection(const SectionRef &Section) const { - return &Sections[Section.getRawDataRefImpl().d.a]; + return getWasmSection(Section.getRawDataRefImpl()); } -} // end namespace object -} // end namespace llvm +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const { + return getWasmRelocation(Ref.getRawDataRefImpl()); +} + +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + const WasmSection& Sec = Sections[Ref.d.a]; + assert(Ref.d.b < Sec.Relocations.size()); + return Sec.Relocations[Ref.d.b]; +} |