diff options
Diffstat (limited to 'lib/Object')
-rw-r--r-- | lib/Object/Archive.cpp | 12 | ||||
-rw-r--r-- | lib/Object/ArchiveWriter.cpp | 33 | ||||
-rw-r--r-- | lib/Object/Binary.cpp | 3 | ||||
-rw-r--r-- | lib/Object/COFFImportFile.cpp | 33 | ||||
-rw-r--r-- | lib/Object/COFFModuleDefinition.cpp | 13 | ||||
-rw-r--r-- | lib/Object/COFFObjectFile.cpp | 40 | ||||
-rw-r--r-- | lib/Object/ELF.cpp | 273 | ||||
-rw-r--r-- | lib/Object/ELFObjectFile.cpp | 16 | ||||
-rw-r--r-- | lib/Object/IRSymtab.cpp | 18 | ||||
-rw-r--r-- | lib/Object/MachOObjectFile.cpp | 84 | ||||
-rw-r--r-- | lib/Object/ModuleSymbolTable.cpp | 157 | ||||
-rw-r--r-- | lib/Object/Object.cpp | 2 | ||||
-rw-r--r-- | lib/Object/ObjectFile.cpp | 1 | ||||
-rw-r--r-- | lib/Object/RecordStreamer.cpp | 123 | ||||
-rw-r--r-- | lib/Object/RecordStreamer.h | 42 | ||||
-rw-r--r-- | lib/Object/SymbolSize.cpp | 4 | ||||
-rw-r--r-- | lib/Object/SymbolicFile.cpp | 1 | ||||
-rw-r--r-- | lib/Object/WasmObjectFile.cpp | 981 | ||||
-rw-r--r-- | lib/Object/WindowsResource.cpp | 6 |
19 files changed, 1271 insertions, 571 deletions
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index b17eefd220b8..8ec115a5566c 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -175,15 +175,19 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { "the end of the string table for archive member " "header at offset " + Twine(ArchiveOffset)); } - const char *addr = Parent->getStringTable().begin() + StringOffset; // GNU long file names end with a "/\n". if (Parent->kind() == Archive::K_GNU || Parent->kind() == Archive::K_GNU64) { - StringRef::size_type End = StringRef(addr).find('\n'); - return StringRef(addr, End - 1); + size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); + if (End == StringRef::npos || End < 1 || + Parent->getStringTable()[End - 1] != '/') { + return malformedError("string table at long name offset " + + Twine(StringOffset) + "not terminated"); + } + return Parent->getStringTable().slice(StringOffset, End - 1); } - return addr; + return Parent->getStringTable().begin() + StringOffset; } if (Name.startswith("#1/")) { diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index b3b812daae2e..ea17b2220a0b 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -35,15 +35,6 @@ using namespace llvm; -// The SYM64 format is used when an archive's member offsets are larger than -// 32-bits can hold. The need for this shift in format is detected by -// writeArchive. To test this we need to generate a file with a member that has -// an offset larger than 32-bits but this demands a very slow test. To speed -// the test up we use this flag to pretend like the cutoff happens before -// 32-bits and instead happens at some much smaller value. -static cl::opt<int> Sym64Threshold("sym64-threshold", cl::Hidden, - cl::init(32)); - NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} @@ -145,10 +136,8 @@ static bool isBSDLike(object::Archive::Kind Kind) { template <class T> static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { - if (isBSDLike(Kind)) - support::endian::Writer<support::little>(Out).write(Val); - else - support::endian::Writer<support::big>(Out).write(Val); + support::endian::write(Out, Val, + isBSDLike(Kind) ? support::little : support::big); } static void printRestOfMemberHeader( @@ -216,7 +205,7 @@ static std::string computeRelativePath(StringRef From, StringRef To) { for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) sys::path::append(Relative, *ToI); -#ifdef LLVM_ON_WIN32 +#ifdef _WIN32 // Replace backslashes with slashes so that the path is portable between *nix // and Windows. std::replace(Relative.begin(), Relative.end(), '\\', '/'); @@ -305,8 +294,7 @@ static bool isArchiveSymbol(const object::BasicSymbolRef &S) { return false; if (!(Symflags & object::SymbolRef::SF_Global)) return false; - if (Symflags & object::SymbolRef::SF_Undefined && - !(Symflags & object::SymbolRef::SF_Indirect)) + if (Symflags & object::SymbolRef::SF_Undefined) return false; return true; } @@ -490,6 +478,19 @@ Error llvm::writeArchive(StringRef ArcName, // We assume 32-bit symbols to see if 32-bit symbols are possible or not. MaxOffset += M.Symbols.size() * 4; } + + // The SYM64 format is used when an archive's member offsets are larger than + // 32-bits can hold. The need for this shift in format is detected by + // writeArchive. To test this we need to generate a file with a member that + // has an offset larger than 32-bits but this demands a very slow test. To + // speed the test up we use this environment variable to pretend like the + // cutoff happens before 32-bits and instead happens at some much smaller + // value. + const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); + int Sym64Threshold = 32; + if (Sym64Env) + StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); + // If LastOffset isn't going to fit in a 32-bit varible we need to switch // to 64-bit. Note that the file can be larger than 4GB as long as the last // member starts before the 4GB offset. diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp index c4565db459e6..d7c25921ec36 100644 --- a/lib/Object/Binary.cpp +++ b/lib/Object/Binary.cpp @@ -75,6 +75,9 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer, return MachOUniversalBinary::create(Buffer); case file_magic::windows_resource: return WindowsResource::createWindowsResource(Buffer); + case file_magic::pdb: + // PDB does not support the Binary interface. + return errorCodeToError(object_error::invalid_file_type); case file_magic::unknown: case file_magic::coff_cl_gl_object: // Unrecognized object file format. diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index 93631f1ad811..dc11cc4bcffe 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -91,7 +91,15 @@ static void writeStringTable(std::vector<uint8_t> &B, } static ImportNameType getNameType(StringRef Sym, StringRef ExtName, - MachineTypes Machine) { + MachineTypes Machine, bool MinGW) { + // A decorated stdcall function in MSVC is exported with the + // type IMPORT_NAME, and the exported function name includes the + // the leading underscore. In MinGW on the other hand, a decorated + // stdcall function still omits the underscore (IMPORT_NAME_NOPREFIX). + // See the comment in isDecorated in COFFModuleDefinition.cpp for more + // details. + if (ExtName.startswith("_") && ExtName.contains('@') && !MinGW) + return IMPORT_NAME; if (Sym != ExtName) return IMPORT_NAME_UNDECORATE; if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) @@ -538,7 +546,12 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, u16(0), IMAGE_SYM_CLASS_WEAK_EXTERNAL, 1}, - {{{2, 0, 0, 0, 3, 0, 0, 0}}, u32(0), u16(0), u16(0), uint8_t(0), 0}, + {{{2, 0, 0, 0, IMAGE_WEAK_EXTERN_SEARCH_ALIAS, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_NULL, + 0}, }; SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t); @@ -558,7 +571,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, Error writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef<COFFShortExport> Exports, - MachineTypes Machine, bool MakeWeakAliases) { + MachineTypes Machine, bool MinGW) { std::vector<NewArchiveMember> Members; ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); @@ -576,12 +589,6 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path, if (E.Private) continue; - if (E.isWeak() && MakeWeakAliases) { - Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false)); - Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true)); - continue; - } - ImportType ImportType = IMPORT_CODE; if (E.Data) ImportType = IMPORT_DATA; @@ -589,7 +596,7 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path, ImportType = IMPORT_CONST; StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; - ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); + ImportNameType NameType = getNameType(SymbolName, E.Name, Machine, MinGW); Expected<std::string> Name = E.ExtName.empty() ? SymbolName : replace(SymbolName, E.Name, E.ExtName); @@ -597,6 +604,12 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path, if (!Name) return Name.takeError(); + if (!E.AliasTarget.empty() && *Name != E.AliasTarget) { + Members.push_back(OF.createWeakExternal(E.AliasTarget, *Name, false)); + Members.push_back(OF.createWeakExternal(E.AliasTarget, *Name, true)); + continue; + } + Members.push_back( OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); } diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp index a571354648d6..c703071b86e0 100644 --- a/lib/Object/COFFModuleDefinition.cpp +++ b/lib/Object/COFFModuleDefinition.cpp @@ -37,6 +37,7 @@ enum Kind { Identifier, Comma, Equal, + EqualEqual, KwBase, KwConstant, KwData, @@ -104,9 +105,10 @@ public: } case '=': Buf = Buf.drop_front(); - // GNU dlltool accepts both = and ==. - if (Buf.startswith("=")) + if (Buf.startswith("=")) { Buf = Buf.drop_front(); + return Token(EqualEqual, "=="); + } return Token(Equal, "="); case ',': Buf = Buf.drop_front(); @@ -282,6 +284,13 @@ private: E.Private = true; continue; } + if (Tok.K == EqualEqual) { + read(); + E.AliasTarget = Tok.Value; + if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef)) + E.AliasTarget = std::string("_").append(E.AliasTarget); + continue; + } unget(); Info.Exports.push_back(E); return Error::success(); diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index b544fa5c1470..d72da3187e07 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -217,10 +217,10 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { if (Symb.isExternal() || Symb.isWeakExternal()) Result |= SymbolRef::SF_Global; - if (Symb.isWeakExternal()) { + if (const coff_aux_weak_external *AWE = Symb.getWeakExternal()) { Result |= SymbolRef::SF_Weak; - // We use indirect to allow the archiver to write weak externs - Result |= SymbolRef::SF_Indirect; + if (AWE->Characteristics != COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS) + Result |= SymbolRef::SF_Undefined; } if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) @@ -235,7 +235,7 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { if (Symb.isCommon()) Result |= SymbolRef::SF_Common; - if (Symb.isAnyUndefined()) + if (Symb.isUndefined()) Result |= SymbolRef::SF_Undefined; return Result; @@ -910,6 +910,12 @@ Triple::ArchType COFFObjectFile::getArch() const { } } +Expected<uint64_t> COFFObjectFile::getStartAddress() const { + if (PE32Header) + return PE32Header->AddressOfEntryPoint; + return 0; +} + iterator_range<import_directory_iterator> COFFObjectFile::import_directories() const { return make_range(import_directory_begin(), import_directory_end()); @@ -944,7 +950,7 @@ COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { std::error_code COFFObjectFile::getDataDirectory(uint32_t Index, const data_directory *&Res) const { - // Error if if there's no data directory or the index is out of range. + // Error if there's no data directory or the index is out of range. if (!DataDirectory) { Res = nullptr; return object_error::parse_failed; @@ -973,6 +979,21 @@ std::error_code COFFObjectFile::getSection(int32_t Index, return object_error::parse_failed; } +std::error_code COFFObjectFile::getSection(StringRef SectionName, + const coff_section *&Result) const { + Result = nullptr; + StringRef SecName; + for (const SectionRef &Section : sections()) { + if (std::error_code E = Section.getName(SecName)) + return E; + if (SecName == SectionName) { + Result = getCOFFSection(Section); + return std::error_code(); + } + } + return object_error::parse_failed; +} + std::error_code COFFObjectFile::getString(uint32_t Offset, StringRef &Result) const { if (StringTableSize <= 4) @@ -1147,13 +1168,10 @@ COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const { return toRel(Reloc.getRawDataRefImpl()); } -iterator_range<const coff_relocation *> +ArrayRef<coff_relocation> COFFObjectFile::getRelocations(const coff_section *Sec) const { - const coff_relocation *I = getFirstReloc(Sec, Data, base()); - const coff_relocation *E = I; - if (I) - E += getNumberOfRelocations(Sec, Data, base()); - return make_range(I, E); + return {getFirstReloc(Sec, Data, base()), + getNumberOfRelocations(Sec, Data, base())}; } #define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type) \ diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 5906dc5f5307..2eefb7ef13a3 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -125,13 +125,6 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, break; } break; - case ELF::EM_WEBASSEMBLY: - switch (Type) { -#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def" - default: - break; - } - break; case ELF::EM_AMDGPU: switch (Type) { #include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" @@ -154,6 +147,50 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, #undef ELF_RELOC +uint32_t llvm::object::getELFRelrRelocationType(uint32_t Machine) { + switch (Machine) { + case ELF::EM_X86_64: + return ELF::R_X86_64_RELATIVE; + case ELF::EM_386: + case ELF::EM_IAMCU: + return ELF::R_386_RELATIVE; + case ELF::EM_MIPS: + break; + case ELF::EM_AARCH64: + return ELF::R_AARCH64_RELATIVE; + case ELF::EM_ARM: + return ELF::R_ARM_RELATIVE; + case ELF::EM_ARC_COMPACT: + case ELF::EM_ARC_COMPACT2: + return ELF::R_ARC_RELATIVE; + case ELF::EM_AVR: + break; + case ELF::EM_HEXAGON: + return ELF::R_HEX_RELATIVE; + case ELF::EM_LANAI: + break; + case ELF::EM_PPC: + break; + case ELF::EM_PPC64: + return ELF::R_PPC64_RELATIVE; + case ELF::EM_RISCV: + return ELF::R_RISCV_RELATIVE; + case ELF::EM_S390: + return ELF::R_390_RELATIVE; + case ELF::EM_SPARC: + case ELF::EM_SPARC32PLUS: + case ELF::EM_SPARCV9: + return ELF::R_SPARC_RELATIVE; + case ELF::EM_AMDGPU: + break; + case ELF::EM_BPF: + break; + default: + break; + } + return 0; +} + StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { switch (Machine) { case ELF::EM_ARM: @@ -202,9 +239,14 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_RELR); STRINGIFY_ENUM_CASE(ELF, SHT_ANDROID_REL); STRINGIFY_ENUM_CASE(ELF, SHT_ANDROID_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_ANDROID_RELR); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LINKER_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CALL_GRAPH_PROFILE); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); @@ -217,6 +259,85 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { template <class ELFT> Expected<std::vector<typename ELFT::Rela>> +ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const { + // This function decodes the contents of an SHT_RELR packed relocation + // section. + // + // Proposal for adding SHT_RELR sections to generic-abi is here: + // https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg + // + // The encoded sequence of Elf64_Relr entries in a SHT_RELR section looks + // like [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] + // + // i.e. start with an address, followed by any number of bitmaps. The address + // entry encodes 1 relocation. The subsequent bitmap entries encode up to 63 + // relocations each, at subsequent offsets following the last address entry. + // + // The bitmap entries must have 1 in the least significant bit. The assumption + // here is that an address cannot have 1 in lsb. Odd addresses are not + // supported. + // + // Excluding the least significant bit in the bitmap, each non-zero bit in + // the bitmap represents a relocation to be applied to a corresponding machine + // word that follows the base address word. The second least significant bit + // represents the machine word immediately following the initial address, and + // each bit that follows represents the next word, in linear order. As such, + // a single bitmap can encode up to 31 relocations in a 32-bit object, and + // 63 relocations in a 64-bit object. + // + // This encoding has a couple of interesting properties: + // 1. Looking at any entry, it is clear whether it's an address or a bitmap: + // even means address, odd means bitmap. + // 2. Just a simple list of addresses is a valid encoding. + + Elf_Rela Rela; + Rela.r_info = 0; + Rela.r_addend = 0; + Rela.setType(getRelrRelocationType(), false); + std::vector<Elf_Rela> Relocs; + + // Word type: uint32_t for Elf32, and uint64_t for Elf64. + typedef typename ELFT::uint Word; + + // Word size in number of bytes. + const size_t WordSize = sizeof(Word); + + // Number of bits used for the relocation offsets bitmap. + // These many relative relocations can be encoded in a single entry. + const size_t NBits = 8*WordSize - 1; + + Word Base = 0; + for (const Elf_Relr &R : relrs) { + Word Entry = R; + if ((Entry&1) == 0) { + // Even entry: encodes the offset for next relocation. + Rela.r_offset = Entry; + Relocs.push_back(Rela); + // Set base offset for subsequent bitmap entries. + Base = Entry + WordSize; + continue; + } + + // Odd entry: encodes bitmap for relocations starting at base. + Word Offset = Base; + while (Entry != 0) { + Entry >>= 1; + if ((Entry&1) != 0) { + Rela.r_offset = Offset; + Relocs.push_back(Rela); + } + Offset += WordSize; + } + + // Advance base offset by NBits words. + Base += NBits * WordSize; + } + + return Relocs; +} + +template <class ELFT> +Expected<std::vector<typename ELFT::Rela>> ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const { // This function reads relocations in Android's packed relocation format, // which is based on SLEB128 and delta encoding. @@ -299,6 +420,144 @@ ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const { return Relocs; } +template <class ELFT> +const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch, + uint64_t Type) const { +#define DYNAMIC_STRINGIFY_ENUM(tag, value) \ + case value: \ + return #tag; + +#define DYNAMIC_TAG(n, v) + switch (Arch) { + case ELF::EM_HEXAGON: + switch (Type) { +#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef HEXAGON_DYNAMIC_TAG + } + + case ELF::EM_MIPS: + switch (Type) { +#define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef MIPS_DYNAMIC_TAG + } + + case ELF::EM_PPC64: + switch (Type) { +#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef PPC64_DYNAMIC_TAG + } + } +#undef DYNAMIC_TAG + switch (Type) { +// Now handle all dynamic tags except the architecture specific ones +#define MIPS_DYNAMIC_TAG(name, value) +#define HEXAGON_DYNAMIC_TAG(name, value) +#define PPC64_DYNAMIC_TAG(name, value) +// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc. +#define DYNAMIC_TAG_MARKER(name, value) +#define DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef DYNAMIC_TAG +#undef MIPS_DYNAMIC_TAG +#undef HEXAGON_DYNAMIC_TAG +#undef PPC64_DYNAMIC_TAG +#undef DYNAMIC_TAG_MARKER +#undef DYNAMIC_STRINGIFY_ENUM + default: + return "unknown"; + } +} + +template <class ELFT> +const char *ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const { + return getDynamicTagAsString(getHeader()->e_machine, Type); +} + +template <class ELFT> +Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const { + ArrayRef<Elf_Dyn> Dyn; + size_t DynSecSize = 0; + + auto ProgramHeadersOrError = program_headers(); + if (!ProgramHeadersOrError) + return ProgramHeadersOrError.takeError(); + + for (const Elf_Phdr &Phdr : *ProgramHeadersOrError) { + if (Phdr.p_type == ELF::PT_DYNAMIC) { + Dyn = makeArrayRef( + reinterpret_cast<const Elf_Dyn *>(base() + Phdr.p_offset), + Phdr.p_filesz / sizeof(Elf_Dyn)); + DynSecSize = Phdr.p_filesz; + break; + } + } + + // If we can't find the dynamic section in the program headers, we just fall + // back on the sections. + if (Dyn.empty()) { + auto SectionsOrError = sections(); + if (!SectionsOrError) + return SectionsOrError.takeError(); + + for (const Elf_Shdr &Sec : *SectionsOrError) { + if (Sec.sh_type == ELF::SHT_DYNAMIC) { + Expected<ArrayRef<Elf_Dyn>> DynOrError = + getSectionContentsAsArray<Elf_Dyn>(&Sec); + if (!DynOrError) + return DynOrError.takeError(); + Dyn = *DynOrError; + DynSecSize = Sec.sh_size; + break; + } + } + + if (!Dyn.data()) + return ArrayRef<Elf_Dyn>(); + } + + if (Dyn.empty()) + return createError("invalid empty dynamic section"); + + if (DynSecSize % sizeof(Elf_Dyn) != 0) + return createError("malformed dynamic section"); + + if (Dyn.back().d_tag != ELF::DT_NULL) + return createError("dynamic sections must be DT_NULL terminated"); + + return Dyn; +} + +template <class ELFT> +Expected<const uint8_t *> ELFFile<ELFT>::toMappedAddr(uint64_t VAddr) const { + auto ProgramHeadersOrError = program_headers(); + if (!ProgramHeadersOrError) + return ProgramHeadersOrError.takeError(); + + llvm::SmallVector<Elf_Phdr *, 4> LoadSegments; + + for (const Elf_Phdr &Phdr : *ProgramHeadersOrError) + if (Phdr.p_type == ELF::PT_LOAD) + LoadSegments.push_back(const_cast<Elf_Phdr *>(&Phdr)); + + const Elf_Phdr *const *I = + std::upper_bound(LoadSegments.begin(), LoadSegments.end(), VAddr, + [](uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) { + return VAddr < Phdr->p_vaddr; + }); + + if (I == LoadSegments.begin()) + return createError("Virtual address is not in any segment"); + --I; + const Elf_Phdr &Phdr = **I; + uint64_t Delta = VAddr - Phdr.p_vaddr; + if (Delta >= Phdr.p_filesz) + return createError("Virtual address is not in any segment"); + return base() + Phdr.p_offset + Delta; +} + template class llvm::object::ELFFile<ELF32LE>; template class llvm::object::ELFFile<ELF32BE>; template class llvm::object::ELFFile<ELF64LE>; diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 0aad1c89a2d8..e806c8f28b15 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -76,8 +76,7 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const { SubtargetFeatures Features; - unsigned PlatformFlags; - getPlatformFlags(PlatformFlags); + unsigned PlatformFlags = getPlatformFlags(); switch (PlatformFlags & ELF::EF_MIPS_ARCH) { case ELF::EF_MIPS_ARCH_1: @@ -239,12 +238,25 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { return Features; } +SubtargetFeatures ELFObjectFileBase::getRISCVFeatures() const { + SubtargetFeatures Features; + unsigned PlatformFlags = getPlatformFlags(); + + if (PlatformFlags & ELF::EF_RISCV_RVC) { + Features.AddFeature("c"); + } + + return Features; +} + SubtargetFeatures ELFObjectFileBase::getFeatures() const { switch (getEMachine()) { case ELF::EM_MIPS: return getMIPSFeatures(); case ELF::EM_ARM: return getARMFeatures(); + case ELF::EM_RISCV: + return getRISCVFeatures(); default: return SubtargetFeatures(); } diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp index 2d8d3f7c0878..344d565349c0 100644 --- a/lib/Object/IRSymtab.cpp +++ b/lib/Object/IRSymtab.cpp @@ -15,7 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/ObjectUtils.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -42,6 +42,12 @@ using namespace llvm; using namespace irsymtab; +static const char *LibcallRoutineNames[] = { +#define HANDLE_LIBCALL(code, name) name, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL +}; + namespace { const char *getExpectedProducerName() { @@ -226,13 +232,19 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, setStr(Sym.IRName, GV->getName()); - if (Used.count(GV)) + bool IsBuiltinFunc = false; + + for (const char *LibcallName : LibcallRoutineNames) + if (GV->getName() == LibcallName) + IsBuiltinFunc = true; + + if (Used.count(GV) || IsBuiltinFunc) Sym.Flags |= 1 << storage::Symbol::FB_used; if (GV->isThreadLocal()) Sym.Flags |= 1 << storage::Symbol::FB_tls; if (GV->hasGlobalUnnamedAddr()) Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; - if (canBeOmittedFromSymbolTable(GV)) + if (GV->canBeOmittedFromSymbolTable()) Sym.Flags |= 1 << storage::Symbol::FB_may_omit; Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 2e3415618e5f..e422903f2805 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -107,7 +107,8 @@ getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L, } static const char *getPtr(const MachOObjectFile &O, size_t Offset) { - return O.getData().substr(Offset, 1).data(); + assert(Offset <= O.getData().size()); + return O.getData().data() + Offset; } static MachO::nlist_base @@ -1011,7 +1012,43 @@ static Error checkThreadCommand(const MachOObjectFile &Obj, CmdName + " command"); } } else if (cputype == MachO::CPU_TYPE_X86_64) { - if (flavor == MachO::x86_THREAD_STATE64) { + if (flavor == MachO::x86_THREAD_STATE) { + if (count != MachO::x86_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state_t); + } else if (flavor == MachO::x86_FLOAT_STATE) { + if (count != MachO::x86_FLOAT_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_FLOAT_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_FLOAT_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_float_state_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_FLOAT_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_float_state_t); + } else if (flavor == MachO::x86_EXCEPTION_STATE) { + if (count != MachO::x86_EXCEPTION_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_EXCEPTION_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_EXCEPTION_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_exception_state_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_EXCEPTION_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_exception_state_t); + } else if (flavor == MachO::x86_THREAD_STATE64) { if (count != MachO::x86_THREAD_STATE64_COUNT) return malformedError("load command " + Twine(LoadCommandIndex) + " count not x86_THREAD_STATE64_COUNT for " @@ -1023,6 +1060,18 @@ static Error checkThreadCommand(const MachOObjectFile &Obj, " x86_THREAD_STATE64 extends past end of " "command in " + CmdName + " command"); state += sizeof(MachO::x86_thread_state64_t); + } else if (flavor == MachO::x86_EXCEPTION_STATE64) { + if (count != MachO::x86_EXCEPTION_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_EXCEPTION_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_EXCEPTION_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_exception_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_EXCEPTION_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_exception_state64_t); } else { return malformedError("load command " + Twine(LoadCommandIndex) + " unknown flavor (" + Twine(flavor) + ") for " @@ -1659,6 +1708,10 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const { StringRef StringTable = getStringTableData(); MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + if (Entry.n_strx == 0) + // A n_strx value of 0 indicates that no name is associated with a + // particular symbol table entry. + return StringRef(); const char *Start = &StringTable.data()[Entry.n_strx]; if (Start < getData().begin() || Start >= getData().end()) { return malformedError("bad string index: " + Twine(Entry.n_strx) + @@ -1886,6 +1939,27 @@ uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { return uint64_t(1) << Align; } +Expected<SectionRef> MachOObjectFile::getSection(unsigned SectionIndex) const { + if (SectionIndex < 1 || SectionIndex > Sections.size()) + return malformedError("bad section index: " + Twine((int)SectionIndex)); + + DataRefImpl DRI; + DRI.d.a = SectionIndex - 1; + return SectionRef(DRI, this); +} + +Expected<SectionRef> MachOObjectFile::getSection(StringRef SectionName) const { + StringRef SecName; + for (const SectionRef &Section : sections()) { + if (std::error_code E = Section.getName(SecName)) + return errorCodeToError(E); + if (SecName == SectionName) { + return Section; + } + } + return errorCodeToError(object_error::parse_failed); +} + bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const { return false; } @@ -1916,8 +1990,10 @@ unsigned MachOObjectFile::getSectionID(SectionRef Sec) const { } bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const { - // FIXME: Unimplemented. - return false; + uint32_t Flags = getSectionFlags(*this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + return SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL; } bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const { diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp index 64446525b916..b353ef3c835b 100644 --- a/lib/Object/ModuleSymbolTable.cpp +++ b/lib/Object/ModuleSymbolTable.cpp @@ -24,7 +24,6 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -69,84 +68,9 @@ void ModuleSymbolTable::addModule(Module *M) { }); } -// Ensure ELF .symver aliases get the same binding as the defined symbol -// they alias with. -static void handleSymverAliases(const Module &M, RecordStreamer &Streamer) { - if (Streamer.symverAliases().empty()) - return; - - // The name in the assembler will be mangled, but the name in the IR - // might not, so we first compute a mapping from mangled name to GV. - Mangler Mang; - SmallString<64> MangledName; - StringMap<const GlobalValue *> MangledNameMap; - auto GetMangledName = [&](const GlobalValue &GV) { - if (!GV.hasName()) - return; - - MangledName.clear(); - MangledName.reserve(GV.getName().size() + 1); - Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); - MangledNameMap[MangledName] = &GV; - }; - for (const Function &F : M) - GetMangledName(F); - for (const GlobalVariable &GV : M.globals()) - GetMangledName(GV); - for (const GlobalAlias &GA : M.aliases()) - GetMangledName(GA); - - // Walk all the recorded .symver aliases, and set up the binding - // for each alias. - for (auto &Symver : Streamer.symverAliases()) { - const MCSymbol *Aliasee = Symver.first; - MCSymbolAttr Attr = MCSA_Invalid; - - // First check if the aliasee binding was recorded in the asm. - RecordStreamer::State state = Streamer.getSymbolState(Aliasee); - switch (state) { - case RecordStreamer::Global: - case RecordStreamer::DefinedGlobal: - Attr = MCSA_Global; - break; - case RecordStreamer::UndefinedWeak: - case RecordStreamer::DefinedWeak: - Attr = MCSA_Weak; - break; - default: - break; - } - - // If we don't have a symbol attribute from assembly, then check if - // the aliasee was defined in the IR. - if (Attr == MCSA_Invalid) { - const auto *GV = M.getNamedValue(Aliasee->getName()); - if (!GV) { - auto MI = MangledNameMap.find(Aliasee->getName()); - if (MI != MangledNameMap.end()) - GV = MI->second; - else - continue; - } - if (GV->hasExternalLinkage()) - Attr = MCSA_Global; - else if (GV->hasLocalLinkage()) - Attr = MCSA_Local; - else if (GV->isWeakForLinker()) - Attr = MCSA_Weak; - } - if (Attr == MCSA_Invalid) - continue; - - // Set the detected binding on each alias with this aliasee. - for (auto &Alias : Symver.second) - Streamer.EmitSymbolAttribute(Alias, Attr); - } -} - -void ModuleSymbolTable::CollectAsmSymbols( - const Module &M, - function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { +static void +initializeRecordStreamer(const Module &M, + function_ref<void(RecordStreamer &)> Init) { StringRef InlineAsm = M.getModuleInlineAsm(); if (InlineAsm.empty()) return; @@ -176,7 +100,7 @@ void ModuleSymbolTable::CollectAsmSymbols( MCObjectFileInfo MOFI; MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, MCCtx); - RecordStreamer Streamer(MCCtx); + RecordStreamer Streamer(MCCtx, M); T->createNullTargetStreamer(Streamer); std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); @@ -195,36 +119,53 @@ void ModuleSymbolTable::CollectAsmSymbols( if (Parser->Run(false)) return; - handleSymverAliases(M, Streamer); + Init(Streamer); +} - for (auto &KV : Streamer) { - StringRef Key = KV.first(); - RecordStreamer::State Value = KV.second; - // FIXME: For now we just assume that all asm symbols are executable. - uint32_t Res = BasicSymbolRef::SF_Executable; - switch (Value) { - case RecordStreamer::NeverSeen: - llvm_unreachable("NeverSeen should have been replaced earlier"); - case RecordStreamer::DefinedGlobal: - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::Defined: - break; - case RecordStreamer::Global: - case RecordStreamer::Used: - Res |= BasicSymbolRef::SF_Undefined; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::DefinedWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::UndefinedWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Undefined; +void ModuleSymbolTable::CollectAsmSymbols( + const Module &M, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { + Streamer.flushSymverDirectives(); + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); } - AsmSymbol(Key, BasicSymbolRef::Flags(Res)); - } + }); +} + +void ModuleSymbolTable::CollectAsmSymvers( + const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver) { + initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { + for (auto &KV : Streamer.symverAliases()) + for (auto &Alias : KV.second) + AsmSymver(KV.first->getName(), Alias); + }); } void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 1d2859cfbe9d..5fd823e0117e 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -228,7 +228,7 @@ uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { SmallVector<char, 0> ret; (*unwrap(RI))->getTypeName(ret); - char *str = static_cast<char*>(malloc(ret.size())); + char *str = static_cast<char*>(safe_malloc(ret.size())); std::copy(ret.begin(), ret.end(), str); return str; } diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 652a2b2497ef..db0ff220c4d8 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -119,6 +119,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { case file_magic::archive: case file_magic::macho_universal_binary: case file_magic::windows_resource: + case file_magic::pdb: return errorCodeToError(object_error::invalid_file_type); case file_magic::elf: case file_magic::elf_relocatable: diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp index e94e9cfed394..1f57867dd21a 100644 --- a/lib/Object/RecordStreamer.cpp +++ b/lib/Object/RecordStreamer.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "RecordStreamer.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; @@ -70,7 +73,8 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) { void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } -RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} +RecordStreamer::RecordStreamer(MCContext &Context, const Module &M) + : MCStreamer(Context), M(M) {} RecordStreamer::const_iterator RecordStreamer::begin() { return Symbols.begin(); @@ -103,7 +107,8 @@ bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol, } void RecordStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment, + SMLoc Loc) { markDefined(*Symbol); } @@ -112,7 +117,117 @@ void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, markDefined(*Symbol); } -void RecordStreamer::emitELFSymverDirective(MCSymbol *Alias, +RecordStreamer::State RecordStreamer::getSymbolState(const MCSymbol *Sym) { + auto SI = Symbols.find(Sym->getName()); + if (SI == Symbols.end()) + return NeverSeen; + return SI->second; +} + +void RecordStreamer::emitELFSymverDirective(StringRef AliasName, const MCSymbol *Aliasee) { - SymverAliasMap[Aliasee].push_back(Alias); + SymverAliasMap[Aliasee].push_back(AliasName); +} + +iterator_range<RecordStreamer::const_symver_iterator> +RecordStreamer::symverAliases() { + return {SymverAliasMap.begin(), SymverAliasMap.end()}; +} + +void RecordStreamer::flushSymverDirectives() { + // Mapping from mangled name to GV. + StringMap<const GlobalValue *> MangledNameMap; + // The name in the assembler will be mangled, but the name in the IR + // might not, so we first compute a mapping from mangled name to GV. + Mangler Mang; + SmallString<64> MangledName; + for (const GlobalValue &GV : M.global_values()) { + if (!GV.hasName()) + continue; + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); + MangledNameMap[MangledName] = &GV; + } + + // Walk all the recorded .symver aliases, and set up the binding + // for each alias. + for (auto &Symver : SymverAliasMap) { + const MCSymbol *Aliasee = Symver.first; + MCSymbolAttr Attr = MCSA_Invalid; + bool IsDefined = false; + + // First check if the aliasee binding was recorded in the asm. + RecordStreamer::State state = getSymbolState(Aliasee); + switch (state) { + case RecordStreamer::Global: + case RecordStreamer::DefinedGlobal: + Attr = MCSA_Global; + break; + case RecordStreamer::UndefinedWeak: + case RecordStreamer::DefinedWeak: + Attr = MCSA_Weak; + break; + default: + break; + } + + switch (state) { + case RecordStreamer::Defined: + case RecordStreamer::DefinedGlobal: + case RecordStreamer::DefinedWeak: + IsDefined = true; + break; + case RecordStreamer::NeverSeen: + case RecordStreamer::Global: + case RecordStreamer::Used: + case RecordStreamer::UndefinedWeak: + break; + } + + if (Attr == MCSA_Invalid || !IsDefined) { + const GlobalValue *GV = M.getNamedValue(Aliasee->getName()); + if (!GV) { + auto MI = MangledNameMap.find(Aliasee->getName()); + if (MI != MangledNameMap.end()) + GV = MI->second; + } + if (GV) { + // If we don't have a symbol attribute from assembly, then check if + // the aliasee was defined in the IR. + if (Attr == MCSA_Invalid) { + if (GV->hasExternalLinkage()) + Attr = MCSA_Global; + else if (GV->hasLocalLinkage()) + Attr = MCSA_Local; + else if (GV->isWeakForLinker()) + Attr = MCSA_Weak; + } + IsDefined = IsDefined || !GV->isDeclarationForLinker(); + } + } + + // Set the detected binding on each alias with this aliasee. + for (auto AliasName : Symver.second) { + std::pair<StringRef, StringRef> Split = AliasName.split("@@@"); + SmallString<128> NewName; + if (!Split.second.empty() && !Split.second.startswith("@")) { + // Special processing for "@@@" according + // https://sourceware.org/binutils/docs/as/Symver.html + const char *Separator = IsDefined ? "@@" : "@"; + AliasName = + (Split.first + Separator + Split.second).toStringRef(NewName); + } + MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); + // TODO: Handle "@@@". Depending on SymbolAttribute value it needs to be + // converted into @ or @@. + const MCExpr *Value = MCSymbolRefExpr::create(Aliasee, getContext()); + if (IsDefined) + markDefined(*Alias); + // Don't use EmitAssignment override as it always marks alias as defined. + MCStreamer::EmitAssignment(Alias, Value); + if (Attr != MCSA_Invalid) + EmitSymbolAttribute(Alias, Attr); + } + } } diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h index 4d119091a3d2..3d5ae59b58fe 100644 --- a/lib/Object/RecordStreamer.h +++ b/lib/Object/RecordStreamer.h @@ -20,17 +20,24 @@ namespace llvm { +class GlobalValue; +class Module; + class RecordStreamer : public MCStreamer { public: enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, UndefinedWeak}; private: + const Module &M; StringMap<State> Symbols; // Map of aliases created by .symver directives, saved so we can update // their symbol binding after parsing complete. This maps from each // aliasee to its list of aliases. - DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap; + DenseMap<const MCSymbol *, std::vector<StringRef>> SymverAliasMap; + + /// Get the state recorded for the given symbol. + State getSymbolState(const MCSymbol *Sym); void markDefined(const MCSymbol &Symbol); void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); @@ -38,36 +45,33 @@ private: void visitUsedSymbol(const MCSymbol &Sym) override; public: - RecordStreamer(MCContext &Context); + RecordStreamer(MCContext &Context, const Module &M); - using const_iterator = StringMap<State>::const_iterator; - - const_iterator begin(); - const_iterator end(); void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) override; void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override; + unsigned ByteAlignment, SMLoc Loc = SMLoc()) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; /// Record .symver aliases for later processing. - void emitELFSymverDirective(MCSymbol *Alias, + void emitELFSymverDirective(StringRef AliasName, const MCSymbol *Aliasee) override; - /// Return the map of .symver aliasee to associated aliases. - DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() { - return SymverAliasMap; - } - /// Get the state recorded for the given symbol. - State getSymbolState(const MCSymbol *Sym) { - auto SI = Symbols.find(Sym->getName()); - if (SI == Symbols.end()) - return NeverSeen; - return SI->second; - } + // Emit ELF .symver aliases and ensure they have the same binding as the + // defined symbol they alias with. + void flushSymverDirectives(); + + // Symbols iterators + using const_iterator = StringMap<State>::const_iterator; + const_iterator begin(); + const_iterator end(); + + // SymverAliasMap iterators + using const_symver_iterator = decltype(SymverAliasMap)::const_iterator; + iterator_range<const_symver_iterator> symverAliases(); }; } // end namespace llvm diff --git a/lib/Object/SymbolSize.cpp b/lib/Object/SymbolSize.cpp index dd49d5f116b3..004fb1b07546 100644 --- a/lib/Object/SymbolSize.cpp +++ b/lib/Object/SymbolSize.cpp @@ -66,6 +66,10 @@ llvm::object::computeSymbolSizes(const ObjectFile &O) { Addresses.push_back( {O.symbol_end(), Address + Size, 0, getSectionID(O, Sec)}); } + + if (Addresses.empty()) + return Ret; + array_pod_sort(Addresses.begin(), Addresses.end(), compareAddress); // Compute the size as the gap to the next symbol diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 2e7f2cc0d1d9..3e998a2682b8 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -52,6 +52,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, case file_magic::coff_cl_gl_object: case file_magic::macho_universal_binary: case file_magic::windows_resource: + case file_magic::pdb: return errorCodeToError(object_error::invalid_file_type); case file_magic::elf: case file_magic::elf_executable: diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 48f98df6f34d..4d4c887b2d97 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/SubtargetFeature.h" @@ -33,6 +35,23 @@ using namespace llvm; using namespace object; +void WasmSymbol::print(raw_ostream &Out) const { + Out << "Name=" << Info.Name + << ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind)) + << ", Flags=" << Info.Flags; + if (!isTypeData()) { + Out << ", ElemIndex=" << Info.ElementIndex; + } else if (isDefined()) { + Out << ", Segment=" << Info.DataRef.Segment; + Out << ", Offset=" << Info.DataRef.Offset; + Out << ", Size=" << Info.DataRef.Size; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void WasmSymbol::dump() const { print(dbgs()); } +#endif + Expected<std::unique_ptr<WasmObjectFile>> ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { Error Err = Error::success(); @@ -48,112 +67,119 @@ ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { #define VARUINT7_MAX (1<<7) #define VARUINT1_MAX (1) -static uint8_t readUint8(const uint8_t *&Ptr) { return *Ptr++; } +static uint8_t readUint8(WasmObjectFile::ReadContext &Ctx) { + if (Ctx.Ptr == Ctx.End) + report_fatal_error("EOF while reading uint8"); + return *Ctx.Ptr++; +} -static uint32_t readUint32(const uint8_t *&Ptr) { - uint32_t Result = support::endian::read32le(Ptr); - Ptr += sizeof(Result); +static uint32_t readUint32(WasmObjectFile::ReadContext &Ctx) { + if (Ctx.Ptr + 4 > Ctx.End) + report_fatal_error("EOF while reading uint32"); + uint32_t Result = support::endian::read32le(Ctx.Ptr); + Ctx.Ptr += 4; return Result; } -static int32_t readFloat32(const uint8_t *&Ptr) { +static int32_t readFloat32(WasmObjectFile::ReadContext &Ctx) { int32_t Result = 0; - memcpy(&Result, Ptr, sizeof(Result)); - Ptr += sizeof(Result); + memcpy(&Result, Ctx.Ptr, sizeof(Result)); + Ctx.Ptr += sizeof(Result); return Result; } -static int64_t readFloat64(const uint8_t *&Ptr) { +static int64_t readFloat64(WasmObjectFile::ReadContext &Ctx) { int64_t Result = 0; - memcpy(&Result, Ptr, sizeof(Result)); - Ptr += sizeof(Result); + memcpy(&Result, Ctx.Ptr, sizeof(Result)); + Ctx.Ptr += sizeof(Result); return Result; } -static uint64_t readULEB128(const uint8_t *&Ptr) { +static uint64_t readULEB128(WasmObjectFile::ReadContext &Ctx) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); - Ptr += Count; + const char* Error = nullptr; + uint64_t Result = decodeULEB128(Ctx.Ptr, &Count, Ctx.End, &Error); + if (Error) + report_fatal_error(Error); + Ctx.Ptr += Count; return Result; } -static StringRef readString(const uint8_t *&Ptr) { - uint32_t StringLen = readULEB128(Ptr); - StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen); - Ptr += StringLen; +static StringRef readString(WasmObjectFile::ReadContext &Ctx) { + uint32_t StringLen = readULEB128(Ctx); + if (Ctx.Ptr + StringLen > Ctx.End) + report_fatal_error("EOF while reading string"); + StringRef Return = + StringRef(reinterpret_cast<const char *>(Ctx.Ptr), StringLen); + Ctx.Ptr += StringLen; return Return; } -static int64_t readLEB128(const uint8_t *&Ptr) { +static int64_t readLEB128(WasmObjectFile::ReadContext &Ctx) { unsigned Count; - uint64_t Result = decodeSLEB128(Ptr, &Count); - Ptr += Count; + const char* Error = nullptr; + uint64_t Result = decodeSLEB128(Ctx.Ptr, &Count, Ctx.End, &Error); + if (Error) + report_fatal_error(Error); + Ctx.Ptr += Count; return Result; } -static uint8_t readVaruint1(const uint8_t *&Ptr) { - int64_t result = readLEB128(Ptr); - assert(result <= VARUINT1_MAX && result >= 0); - return result; -} - -static int8_t readVarint7(const uint8_t *&Ptr) { - int64_t result = readLEB128(Ptr); - assert(result <= VARINT7_MAX && result >= VARINT7_MIN); - return result; -} - -static uint8_t readVaruint7(const uint8_t *&Ptr) { - uint64_t result = readULEB128(Ptr); - assert(result <= VARUINT7_MAX); +static uint8_t readVaruint1(WasmObjectFile::ReadContext &Ctx) { + int64_t result = readLEB128(Ctx); + if (result > VARUINT1_MAX || result < 0) + report_fatal_error("LEB is outside Varuint1 range"); return result; } -static int32_t readVarint32(const uint8_t *&Ptr) { - int64_t result = readLEB128(Ptr); - assert(result <= INT32_MAX && result >= INT32_MIN); +static int32_t readVarint32(WasmObjectFile::ReadContext &Ctx) { + int64_t result = readLEB128(Ctx); + if (result > INT32_MAX || result < INT32_MIN) + report_fatal_error("LEB is outside Varint32 range"); return result; } -static uint32_t readVaruint32(const uint8_t *&Ptr) { - uint64_t result = readULEB128(Ptr); - assert(result <= UINT32_MAX); +static uint32_t readVaruint32(WasmObjectFile::ReadContext &Ctx) { + uint64_t result = readULEB128(Ctx); + if (result > UINT32_MAX) + report_fatal_error("LEB is outside Varuint32 range"); return result; } -static int64_t readVarint64(const uint8_t *&Ptr) { - return readLEB128(Ptr); +static int64_t readVarint64(WasmObjectFile::ReadContext &Ctx) { + return readLEB128(Ctx); } -static uint8_t readOpcode(const uint8_t *&Ptr) { - return readUint8(Ptr); +static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) { + return readUint8(Ctx); } -static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { - Expr.Opcode = readOpcode(Ptr); +static Error readInitExpr(wasm::WasmInitExpr &Expr, + WasmObjectFile::ReadContext &Ctx) { + Expr.Opcode = readOpcode(Ctx); switch (Expr.Opcode) { case wasm::WASM_OPCODE_I32_CONST: - Expr.Value.Int32 = readVarint32(Ptr); + Expr.Value.Int32 = readVarint32(Ctx); break; case wasm::WASM_OPCODE_I64_CONST: - Expr.Value.Int64 = readVarint64(Ptr); + Expr.Value.Int64 = readVarint64(Ctx); break; case wasm::WASM_OPCODE_F32_CONST: - Expr.Value.Float32 = readFloat32(Ptr); + Expr.Value.Float32 = readFloat32(Ctx); break; case wasm::WASM_OPCODE_F64_CONST: - Expr.Value.Float64 = readFloat64(Ptr); + Expr.Value.Float64 = readFloat64(Ctx); break; case wasm::WASM_OPCODE_GET_GLOBAL: - Expr.Value.Global = readULEB128(Ptr); + Expr.Value.Global = readULEB128(Ctx); break; default: return make_error<GenericBinaryError>("Invalid opcode in init_expr", object_error::parse_failed); } - uint8_t EndOpcode = readOpcode(Ptr); + uint8_t EndOpcode = readOpcode(Ctx); if (EndOpcode != wasm::WASM_OPCODE_END) { return make_error<GenericBinaryError>("Invalid init_expr", object_error::parse_failed); @@ -161,42 +187,46 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { return Error::success(); } -static wasm::WasmLimits readLimits(const uint8_t *&Ptr) { +static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) { wasm::WasmLimits Result; - Result.Flags = readVaruint1(Ptr); - Result.Initial = readVaruint32(Ptr); + Result.Flags = readVaruint1(Ctx); + Result.Initial = readVaruint32(Ctx); if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) - Result.Maximum = readVaruint32(Ptr); + Result.Maximum = readVaruint32(Ctx); return Result; } -static wasm::WasmTable readTable(const uint8_t *&Ptr) { +static wasm::WasmTable readTable(WasmObjectFile::ReadContext &Ctx) { wasm::WasmTable Table; - Table.ElemType = readVarint7(Ptr); - Table.Limits = readLimits(Ptr); + Table.ElemType = readUint8(Ctx); + Table.Limits = readLimits(Ctx); return Table; } -static Error readSection(WasmSection &Section, const uint8_t *&Ptr, - const uint8_t *Start, const uint8_t *Eof) { - Section.Offset = Ptr - Start; - Section.Type = readVaruint7(Ptr); - uint32_t Size = readVaruint32(Ptr); +static Error readSection(WasmSection &Section, + WasmObjectFile::ReadContext &Ctx) { + Section.Offset = Ctx.Ptr - Ctx.Start; + Section.Type = readUint8(Ctx); + LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n"); + uint32_t Size = readVaruint32(Ctx); if (Size == 0) return make_error<StringError>("Zero length section", object_error::parse_failed); - if (Ptr + Size > Eof) + if (Ctx.Ptr + Size > Ctx.End) return make_error<StringError>("Section too large", object_error::parse_failed); - Section.Content = ArrayRef<uint8_t>(Ptr, Size); - Ptr += Size; + if (Section.Type == wasm::WASM_SEC_CUSTOM) { + const uint8_t *NameStart = Ctx.Ptr; + Section.Name = readString(Ctx); + Size -= Ctx.Ptr - NameStart; + } + Section.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size); + Ctx.Ptr += Size; return Error::success(); } WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) : ObjectFile(Binary::ID_Wasm, Buffer) { - LinkingData.DataSize = 0; - ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -205,16 +235,18 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) return; } - const uint8_t *Eof = getPtr(getData().size()); - const uint8_t *Ptr = getPtr(4); + ReadContext Ctx; + Ctx.Start = getPtr(0); + Ctx.Ptr = Ctx.Start + 4; + Ctx.End = Ctx.Start + getData().size(); - if (Ptr + 4 > Eof) { + if (Ctx.Ptr + 4 > Ctx.End) { Err = make_error<StringError>("Missing version number", object_error::parse_failed); return; } - Header.Version = readUint32(Ptr); + Header.Version = readUint32(Ctx); if (Header.Version != wasm::WasmVersion) { Err = make_error<StringError>("Bad version number", object_error::parse_failed); @@ -222,8 +254,8 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) } WasmSection Sec; - while (Ptr < Eof) { - if ((Err = readSection(Sec, Ptr, getPtr(0), Eof))) + while (Ctx.Ptr < Ctx.End) { + if ((Err = readSection(Sec, Ctx))) return; if ((Err = parseSection(Sec))) return; @@ -233,344 +265,476 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) } Error WasmObjectFile::parseSection(WasmSection &Sec) { - const uint8_t* Start = Sec.Content.data(); - const uint8_t* End = Start + Sec.Content.size(); + ReadContext Ctx; + Ctx.Start = Sec.Content.data(); + Ctx.End = Ctx.Start + Sec.Content.size(); + Ctx.Ptr = Ctx.Start; switch (Sec.Type) { case wasm::WASM_SEC_CUSTOM: - return parseCustomSection(Sec, Start, End); + return parseCustomSection(Sec, Ctx); case wasm::WASM_SEC_TYPE: - return parseTypeSection(Start, End); + return parseTypeSection(Ctx); case wasm::WASM_SEC_IMPORT: - return parseImportSection(Start, End); + return parseImportSection(Ctx); case wasm::WASM_SEC_FUNCTION: - return parseFunctionSection(Start, End); + return parseFunctionSection(Ctx); case wasm::WASM_SEC_TABLE: - return parseTableSection(Start, End); + return parseTableSection(Ctx); case wasm::WASM_SEC_MEMORY: - return parseMemorySection(Start, End); + return parseMemorySection(Ctx); case wasm::WASM_SEC_GLOBAL: - return parseGlobalSection(Start, End); + return parseGlobalSection(Ctx); case wasm::WASM_SEC_EXPORT: - return parseExportSection(Start, End); + return parseExportSection(Ctx); case wasm::WASM_SEC_START: - return parseStartSection(Start, End); + return parseStartSection(Ctx); case wasm::WASM_SEC_ELEM: - return parseElemSection(Start, End); + return parseElemSection(Ctx); case wasm::WASM_SEC_CODE: - return parseCodeSection(Start, End); + return parseCodeSection(Ctx); case wasm::WASM_SEC_DATA: - return parseDataSection(Start, End); + return parseDataSection(Ctx); default: return make_error<GenericBinaryError>("Bad section type", object_error::parse_failed); } } -Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { - while (Ptr < End) { - uint8_t Type = readVarint7(Ptr); - uint32_t Size = readVaruint32(Ptr); - const uint8_t *SubSectionEnd = Ptr + Size; +Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { + llvm::DenseSet<uint64_t> Seen; + if (Functions.size() != FunctionTypes.size()) { + return make_error<GenericBinaryError>("Names must come after code section", + object_error::parse_failed); + } + + while (Ctx.Ptr < Ctx.End) { + uint8_t Type = readUint8(Ctx); + uint32_t Size = readVaruint32(Ctx); + const uint8_t *SubSectionEnd = Ctx.Ptr + Size; switch (Type) { case wasm::WASM_NAMES_FUNCTION: { - uint32_t Count = readVaruint32(Ptr); + uint32_t Count = readVaruint32(Ctx); while (Count--) { - uint32_t Index = readVaruint32(Ptr); - StringRef Name = readString(Ptr); - if (!Name.empty()) - Symbols.emplace_back(Name, - WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME, - Sections.size(), Index); + uint32_t Index = readVaruint32(Ctx); + if (!Seen.insert(Index).second) + return make_error<GenericBinaryError>("Function named more than once", + object_error::parse_failed); + StringRef Name = readString(Ctx); + if (!isValidFunctionIndex(Index) || Name.empty()) + return make_error<GenericBinaryError>("Invalid name entry", + object_error::parse_failed); + DebugNames.push_back(wasm::WasmFunctionName{Index, Name}); + if (isDefinedFunctionIndex(Index)) + getDefinedFunction(Index).DebugName = Name; } break; } // Ignore local names for now case wasm::WASM_NAMES_LOCAL: default: - Ptr += Size; + Ctx.Ptr += Size; break; } - if (Ptr != SubSectionEnd) + if (Ctx.Ptr != SubSectionEnd) return make_error<GenericBinaryError>("Name sub-section ended prematurely", object_error::parse_failed); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Name section ended prematurely", object_error::parse_failed); return Error::success(); } -void WasmObjectFile::populateSymbolTable() { - // Add imports to symbol table - size_t GlobalIndex = 0; - size_t FunctionIndex = 0; - for (const wasm::WasmImport& Import : Imports) { - switch (Import.Kind) { - case wasm::WASM_EXTERNAL_GLOBAL: - assert(Import.Global.Type == wasm::WASM_TYPE_I32); - SymbolMap.try_emplace(Import.Field, Symbols.size()); - Symbols.emplace_back(Import.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, - ImportSection, GlobalIndex++); - DEBUG(dbgs() << "Adding import: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); - break; - case wasm::WASM_EXTERNAL_FUNCTION: - SymbolMap.try_emplace(Import.Field, Symbols.size()); - Symbols.emplace_back(Import.Field, - WasmSymbol::SymbolType::FUNCTION_IMPORT, - ImportSection, FunctionIndex++, Import.SigIndex); - DEBUG(dbgs() << "Adding import: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); - break; - default: - break; - } +Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) { + HasLinkingSection = true; + if (Functions.size() != FunctionTypes.size()) { + return make_error<GenericBinaryError>( + "Linking data must come after code section", object_error::parse_failed); } - // Add exports to symbol table - for (const wasm::WasmExport& Export : Exports) { - if (Export.Kind == wasm::WASM_EXTERNAL_FUNCTION || - Export.Kind == wasm::WASM_EXTERNAL_GLOBAL) { - WasmSymbol::SymbolType ExportType = - Export.Kind == wasm::WASM_EXTERNAL_FUNCTION - ? WasmSymbol::SymbolType::FUNCTION_EXPORT - : WasmSymbol::SymbolType::GLOBAL_EXPORT; - auto Pair = SymbolMap.try_emplace(Export.Name, Symbols.size()); - if (Pair.second) { - Symbols.emplace_back(Export.Name, ExportType, - ExportSection, Export.Index); - DEBUG(dbgs() << "Adding export: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); - } else { - uint32_t SymIndex = Pair.first->second; - const WasmSymbol &OldSym = Symbols[SymIndex]; - WasmSymbol NewSym(Export.Name, ExportType, ExportSection, Export.Index); - NewSym.setAltIndex(OldSym.ElementIndex); - Symbols[SymIndex] = NewSym; - - DEBUG(dbgs() << "Replacing existing symbol: " << NewSym - << " sym index:" << SymIndex << "\n"); - } - } + LinkingData.Version = readVaruint32(Ctx); + if (LinkingData.Version != wasm::WasmMetadataVersion) { + return make_error<GenericBinaryError>( + "Unexpected metadata version: " + Twine(LinkingData.Version) + + " (Expected: " + Twine(wasm::WasmMetadataVersion) + ")", + object_error::parse_failed); } -} - -Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, - const uint8_t *End) { - HasLinkingSection = true; - - // Only populate the symbol table with imports and exports if the object - // has a linking section (i.e. its a relocatable object file). Otherwise - // the global might not represent symbols at all. - populateSymbolTable(); - while (Ptr < End) { - uint8_t Type = readVarint7(Ptr); - uint32_t Size = readVaruint32(Ptr); - const uint8_t *SubSectionEnd = Ptr + Size; + const uint8_t *OrigEnd = Ctx.End; + while (Ctx.Ptr < OrigEnd) { + Ctx.End = OrigEnd; + uint8_t Type = readUint8(Ctx); + uint32_t Size = readVaruint32(Ctx); + LLVM_DEBUG(dbgs() << "readSubsection type=" << int(Type) << " size=" << Size + << "\n"); + Ctx.End = Ctx.Ptr + Size; switch (Type) { - case wasm::WASM_SYMBOL_INFO: { - uint32_t Count = readVaruint32(Ptr); - while (Count--) { - StringRef Symbol = readString(Ptr); - DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n"); - uint32_t Flags = readVaruint32(Ptr); - auto iter = SymbolMap.find(Symbol); - if (iter == SymbolMap.end()) { - return make_error<GenericBinaryError>( - "Invalid symbol name in linking section: " + Symbol, - object_error::parse_failed); - } - uint32_t SymIndex = iter->second; - assert(SymIndex < Symbols.size()); - Symbols[SymIndex].Flags = Flags; - DEBUG(dbgs() << "Set symbol flags index:" - << SymIndex << " name:" - << Symbols[SymIndex].Name << " expected:" - << Symbol << " flags: " << Flags << "\n"); - } - break; - } - case wasm::WASM_DATA_SIZE: - LinkingData.DataSize = readVaruint32(Ptr); + case wasm::WASM_SYMBOL_TABLE: + if (Error Err = parseLinkingSectionSymtab(Ctx)) + return Err; break; case wasm::WASM_SEGMENT_INFO: { - uint32_t Count = readVaruint32(Ptr); + uint32_t Count = readVaruint32(Ctx); if (Count > DataSegments.size()) return make_error<GenericBinaryError>("Too many segment names", object_error::parse_failed); for (uint32_t i = 0; i < Count; i++) { - DataSegments[i].Data.Name = readString(Ptr); - DataSegments[i].Data.Alignment = readVaruint32(Ptr); - DataSegments[i].Data.Flags = readVaruint32(Ptr); + DataSegments[i].Data.Name = readString(Ctx); + DataSegments[i].Data.Alignment = readVaruint32(Ctx); + DataSegments[i].Data.Flags = readVaruint32(Ctx); } break; } case wasm::WASM_INIT_FUNCS: { - uint32_t Count = readVaruint32(Ptr); + uint32_t Count = readVaruint32(Ctx); LinkingData.InitFunctions.reserve(Count); for (uint32_t i = 0; i < Count; i++) { wasm::WasmInitFunc Init; - Init.Priority = readVaruint32(Ptr); - Init.FunctionIndex = readVaruint32(Ptr); - if (!isValidFunctionIndex(Init.FunctionIndex)) - return make_error<GenericBinaryError>("Invalid function index: " + - Twine(Init.FunctionIndex), + Init.Priority = readVaruint32(Ctx); + Init.Symbol = readVaruint32(Ctx); + if (!isValidFunctionSymbol(Init.Symbol)) + return make_error<GenericBinaryError>("Invalid function symbol: " + + Twine(Init.Symbol), object_error::parse_failed); LinkingData.InitFunctions.emplace_back(Init); } break; } + case wasm::WASM_COMDAT_INFO: + if (Error Err = parseLinkingSectionComdat(Ctx)) + return Err; + break; default: - Ptr += Size; + Ctx.Ptr += Size; break; } - if (Ptr != SubSectionEnd) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>( "Linking sub-section ended prematurely", object_error::parse_failed); } - if (Ptr != End) + if (Ctx.Ptr != OrigEnd) return make_error<GenericBinaryError>("Linking section ended prematurely", object_error::parse_failed); return Error::success(); } -WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { - for (WasmSection& Section : Sections) { - if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) - return &Section; +Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); + LinkingData.SymbolTable.reserve(Count); + Symbols.reserve(Count); + StringSet<> SymbolNames; + + std::vector<wasm::WasmImport *> ImportedGlobals; + std::vector<wasm::WasmImport *> ImportedFunctions; + ImportedGlobals.reserve(Imports.size()); + ImportedFunctions.reserve(Imports.size()); + for (auto &I : Imports) { + if (I.Kind == wasm::WASM_EXTERNAL_FUNCTION) + ImportedFunctions.emplace_back(&I); + else if (I.Kind == wasm::WASM_EXTERNAL_GLOBAL) + ImportedGlobals.emplace_back(&I); } - return nullptr; -} -WasmSection* WasmObjectFile::findSectionByType(uint32_t Type) { - assert(Type != wasm::WASM_SEC_CUSTOM); - for (WasmSection& Section : Sections) { - if (Section.Type == Type) - return &Section; + while (Count--) { + wasm::WasmSymbolInfo Info; + const wasm::WasmSignature *FunctionType = nullptr; + const wasm::WasmGlobalType *GlobalType = nullptr; + + Info.Kind = readUint8(Ctx); + Info.Flags = readVaruint32(Ctx); + bool IsDefined = (Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0; + + switch (Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + Info.ElementIndex = readVaruint32(Ctx); + if (!isValidFunctionIndex(Info.ElementIndex) || + IsDefined != isDefinedFunctionIndex(Info.ElementIndex)) + return make_error<GenericBinaryError>("invalid function symbol index", + object_error::parse_failed); + if (IsDefined) { + Info.Name = readString(Ctx); + unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions; + FunctionType = &Signatures[FunctionTypes[FuncIndex]]; + wasm::WasmFunction &Function = Functions[FuncIndex]; + if (Function.SymbolName.empty()) + Function.SymbolName = Info.Name; + } else { + wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex]; + FunctionType = &Signatures[Import.SigIndex]; + Info.Name = Import.Field; + Info.Module = Import.Module; + } + break; + + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + Info.ElementIndex = readVaruint32(Ctx); + if (!isValidGlobalIndex(Info.ElementIndex) || + IsDefined != isDefinedGlobalIndex(Info.ElementIndex)) + return make_error<GenericBinaryError>("invalid global symbol index", + object_error::parse_failed); + if (!IsDefined && + (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) == + wasm::WASM_SYMBOL_BINDING_WEAK) + return make_error<GenericBinaryError>("undefined weak global symbol", + object_error::parse_failed); + if (IsDefined) { + Info.Name = readString(Ctx); + unsigned GlobalIndex = Info.ElementIndex - NumImportedGlobals; + wasm::WasmGlobal &Global = Globals[GlobalIndex]; + GlobalType = &Global.Type; + if (Global.SymbolName.empty()) + Global.SymbolName = Info.Name; + } else { + wasm::WasmImport &Import = *ImportedGlobals[Info.ElementIndex]; + Info.Name = Import.Field; + GlobalType = &Import.Global; + } + break; + + case wasm::WASM_SYMBOL_TYPE_DATA: + Info.Name = readString(Ctx); + if (IsDefined) { + uint32_t Index = readVaruint32(Ctx); + if (Index >= DataSegments.size()) + return make_error<GenericBinaryError>("invalid data symbol index", + object_error::parse_failed); + uint32_t Offset = readVaruint32(Ctx); + uint32_t Size = readVaruint32(Ctx); + if (Offset + Size > DataSegments[Index].Data.Content.size()) + return make_error<GenericBinaryError>("invalid data symbol offset", + object_error::parse_failed); + Info.DataRef = wasm::WasmDataReference{Index, Offset, Size}; + } + break; + + case wasm::WASM_SYMBOL_TYPE_SECTION: { + if ((Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) != + wasm::WASM_SYMBOL_BINDING_LOCAL) + return make_error<GenericBinaryError>( + "Section symbols must have local binding", + object_error::parse_failed); + Info.ElementIndex = readVaruint32(Ctx); + // Use somewhat unique section name as symbol name. + StringRef SectionName = Sections[Info.ElementIndex].Name; + Info.Name = SectionName; + break; + } + + default: + return make_error<GenericBinaryError>("Invalid symbol type", + object_error::parse_failed); + } + + if ((Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) != + wasm::WASM_SYMBOL_BINDING_LOCAL && + !SymbolNames.insert(Info.Name).second) + return make_error<GenericBinaryError>("Duplicate symbol name " + + Twine(Info.Name), + object_error::parse_failed); + LinkingData.SymbolTable.emplace_back(Info); + Symbols.emplace_back(LinkingData.SymbolTable.back(), FunctionType, + GlobalType); + LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n"); } - return nullptr; -} - -Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, - const uint8_t *End) { - uint8_t SectionCode = readVarint7(Ptr); - WasmSection* Section = nullptr; - if (SectionCode == wasm::WASM_SEC_CUSTOM) { - StringRef Name = readString(Ptr); - Section = findCustomSectionByName(Name); - } else { - Section = findSectionByType(SectionCode); + + return Error::success(); +} + +Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) { + uint32_t ComdatCount = readVaruint32(Ctx); + StringSet<> ComdatSet; + for (unsigned ComdatIndex = 0; ComdatIndex < ComdatCount; ++ComdatIndex) { + StringRef Name = readString(Ctx); + if (Name.empty() || !ComdatSet.insert(Name).second) + return make_error<GenericBinaryError>("Bad/duplicate COMDAT name " + Twine(Name), + object_error::parse_failed); + LinkingData.Comdats.emplace_back(Name); + uint32_t Flags = readVaruint32(Ctx); + if (Flags != 0) + return make_error<GenericBinaryError>("Unsupported COMDAT flags", + object_error::parse_failed); + + uint32_t EntryCount = readVaruint32(Ctx); + while (EntryCount--) { + unsigned Kind = readVaruint32(Ctx); + unsigned Index = readVaruint32(Ctx); + switch (Kind) { + default: + return make_error<GenericBinaryError>("Invalid COMDAT entry type", + object_error::parse_failed); + case wasm::WASM_COMDAT_DATA: + if (Index >= DataSegments.size()) + return make_error<GenericBinaryError>("COMDAT data index out of range", + object_error::parse_failed); + if (DataSegments[Index].Data.Comdat != UINT32_MAX) + return make_error<GenericBinaryError>("Data segment in two COMDATs", + object_error::parse_failed); + DataSegments[Index].Data.Comdat = ComdatIndex; + break; + case wasm::WASM_COMDAT_FUNCTION: + if (!isDefinedFunctionIndex(Index)) + return make_error<GenericBinaryError>("COMDAT function index out of range", + object_error::parse_failed); + if (getDefinedFunction(Index).Comdat != UINT32_MAX) + return make_error<GenericBinaryError>("Function in two COMDATs", + object_error::parse_failed); + getDefinedFunction(Index).Comdat = ComdatIndex; + break; + } + } } - if (!Section) - return make_error<GenericBinaryError>("Invalid section code", + return Error::success(); +} + +Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { + uint32_t SectionIndex = readVaruint32(Ctx); + if (SectionIndex >= Sections.size()) + return make_error<GenericBinaryError>("Invalid section index", object_error::parse_failed); - uint32_t RelocCount = readVaruint32(Ptr); + WasmSection& Section = Sections[SectionIndex]; + uint32_t RelocCount = readVaruint32(Ctx); + uint32_t EndOffset = Section.Content.size(); while (RelocCount--) { - wasm::WasmRelocation Reloc; - memset(&Reloc, 0, sizeof(Reloc)); - Reloc.Type = readVaruint32(Ptr); - Reloc.Offset = readVaruint32(Ptr); - Reloc.Index = readVaruint32(Ptr); + wasm::WasmRelocation Reloc = {}; + Reloc.Type = readVaruint32(Ctx); + Reloc.Offset = readVaruint32(Ctx); + Reloc.Index = readVaruint32(Ctx); switch (Reloc.Type) { case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + if (!isValidFunctionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("Bad relocation function index", + object_error::parse_failed); + break; case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + if (Reloc.Index >= Signatures.size()) + return make_error<GenericBinaryError>("Bad relocation type index", + object_error::parse_failed); + break; case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + if (!isValidGlobalSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("Bad relocation global index", + object_error::parse_failed); break; case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: - Reloc.Addend = readVarint32(Ptr); + if (!isValidDataSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("Bad relocation data index", + object_error::parse_failed); + Reloc.Addend = readVarint32(Ctx); + break; + case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + if (!isValidFunctionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("Bad relocation function index", + object_error::parse_failed); + Reloc.Addend = readVarint32(Ctx); + break; + case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32: + if (!isValidSectionSymbol(Reloc.Index)) + return make_error<GenericBinaryError>("Bad relocation section index", + object_error::parse_failed); + Reloc.Addend = readVarint32(Ctx); break; default: return make_error<GenericBinaryError>("Bad relocation type: " + Twine(Reloc.Type), object_error::parse_failed); } - Section->Relocations.push_back(Reloc); + + // Relocations must fit inside the section, and must appear in order. They + // also shouldn't overlap a function/element boundary, but we don't bother + // to check that. + uint64_t Size = 5; + if (Reloc.Type == wasm::R_WEBASSEMBLY_TABLE_INDEX_I32 || + Reloc.Type == wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32 || + Reloc.Type == wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32 || + Reloc.Type == wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32) + Size = 4; + if (Reloc.Offset + Size > EndOffset) + return make_error<GenericBinaryError>("Bad relocation offset", + object_error::parse_failed); + + Section.Relocations.push_back(Reloc); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Reloc section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseCustomSection(WasmSection &Sec, - const uint8_t *Ptr, const uint8_t *End) { - Sec.Name = readString(Ptr); +Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) { if (Sec.Name == "name") { - if (Error Err = parseNameSection(Ptr, End)) + if (Error Err = parseNameSection(Ctx)) return Err; } else if (Sec.Name == "linking") { - if (Error Err = parseLinkingSection(Ptr, End)) + if (Error Err = parseLinkingSection(Ctx)) return Err; } else if (Sec.Name.startswith("reloc.")) { - if (Error Err = parseRelocSection(Sec.Name, Ptr, End)) + if (Error Err = parseRelocSection(Sec.Name, Ctx)) return Err; } return Error::success(); } -Error WasmObjectFile::parseTypeSection(const uint8_t *Ptr, const uint8_t *End) { - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); Signatures.reserve(Count); while (Count--) { wasm::WasmSignature Sig; Sig.ReturnType = wasm::WASM_TYPE_NORESULT; - int8_t Form = readVarint7(Ptr); + uint8_t Form = readUint8(Ctx); if (Form != wasm::WASM_TYPE_FUNC) { return make_error<GenericBinaryError>("Invalid signature type", object_error::parse_failed); } - uint32_t ParamCount = readVaruint32(Ptr); + uint32_t ParamCount = readVaruint32(Ctx); Sig.ParamTypes.reserve(ParamCount); while (ParamCount--) { - uint32_t ParamType = readVarint7(Ptr); + uint32_t ParamType = readUint8(Ctx); Sig.ParamTypes.push_back(ParamType); } - uint32_t ReturnCount = readVaruint32(Ptr); + uint32_t ReturnCount = readVaruint32(Ctx); if (ReturnCount) { if (ReturnCount != 1) { return make_error<GenericBinaryError>( "Multiple return types not supported", object_error::parse_failed); } - Sig.ReturnType = readVarint7(Ptr); + Sig.ReturnType = readUint8(Ctx); } Signatures.push_back(Sig); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Type section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) { - ImportSection = Sections.size(); - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseImportSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); Imports.reserve(Count); for (uint32_t i = 0; i < Count; i++) { wasm::WasmImport Im; - Im.Module = readString(Ptr); - Im.Field = readString(Ptr); - Im.Kind = readUint8(Ptr); + Im.Module = readString(Ctx); + Im.Field = readString(Ctx); + Im.Kind = readUint8(Ctx); switch (Im.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: NumImportedFunctions++; - Im.SigIndex = readVaruint32(Ptr); + Im.SigIndex = readVaruint32(Ctx); break; case wasm::WASM_EXTERNAL_GLOBAL: NumImportedGlobals++; - Im.Global.Type = readVarint7(Ptr); - Im.Global.Mutable = readVaruint1(Ptr); + Im.Global.Type = readUint8(Ctx); + Im.Global.Mutable = readVaruint1(Ctx); break; case wasm::WASM_EXTERNAL_MEMORY: - Im.Memory = readLimits(Ptr); + Im.Memory = readLimits(Ctx); break; case wasm::WASM_EXTERNAL_TABLE: - Im.Table = readTable(Ptr); + Im.Table = readTable(Ctx); if (Im.Table.ElemType != wasm::WASM_TYPE_ANYFUNC) return make_error<GenericBinaryError>("Invalid table element type", object_error::parse_failed); @@ -581,90 +745,95 @@ Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) } Imports.push_back(Im); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Import section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseFunctionSection(const uint8_t *Ptr, const uint8_t *End) { - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseFunctionSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); FunctionTypes.reserve(Count); + uint32_t NumTypes = Signatures.size(); while (Count--) { - FunctionTypes.push_back(readVaruint32(Ptr)); + uint32_t Type = readVaruint32(Ctx); + if (Type >= NumTypes) + return make_error<GenericBinaryError>("Invalid function type", + object_error::parse_failed); + FunctionTypes.push_back(Type); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Function section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseTableSection(const uint8_t *Ptr, const uint8_t *End) { - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseTableSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); Tables.reserve(Count); while (Count--) { - Tables.push_back(readTable(Ptr)); + Tables.push_back(readTable(Ctx)); if (Tables.back().ElemType != wasm::WASM_TYPE_ANYFUNC) { return make_error<GenericBinaryError>("Invalid table element type", object_error::parse_failed); } } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Table section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseMemorySection(const uint8_t *Ptr, const uint8_t *End) { - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseMemorySection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); Memories.reserve(Count); while (Count--) { - Memories.push_back(readLimits(Ptr)); + Memories.push_back(readLimits(Ctx)); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Memory section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) { - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) { + GlobalSection = Sections.size(); + uint32_t Count = readVaruint32(Ctx); Globals.reserve(Count); while (Count--) { wasm::WasmGlobal Global; - Global.Type = readVarint7(Ptr); - Global.Mutable = readVaruint1(Ptr); - if (Error Err = readInitExpr(Global.InitExpr, Ptr)) + Global.Index = NumImportedGlobals + Globals.size(); + Global.Type.Type = readUint8(Ctx); + Global.Type.Mutable = readVaruint1(Ctx); + if (Error Err = readInitExpr(Global.InitExpr, Ctx)) return Err; Globals.push_back(Global); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Global section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) { - ExportSection = Sections.size(); - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseExportSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); Exports.reserve(Count); for (uint32_t i = 0; i < Count; i++) { wasm::WasmExport Ex; - Ex.Name = readString(Ptr); - Ex.Kind = readUint8(Ptr); - Ex.Index = readVaruint32(Ptr); + Ex.Name = readString(Ctx); + Ex.Kind = readUint8(Ctx); + Ex.Index = readVaruint32(Ctx); switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: - if (Ex.Index >= FunctionTypes.size() + NumImportedFunctions) + if (!isValidFunctionIndex(Ex.Index)) return make_error<GenericBinaryError>("Invalid function export", object_error::parse_failed); break; - case wasm::WASM_EXTERNAL_GLOBAL: { - if (Ex.Index >= Globals.size() + NumImportedGlobals) + case wasm::WASM_EXTERNAL_GLOBAL: + if (!isValidGlobalIndex(Ex.Index)) return make_error<GenericBinaryError>("Invalid global export", object_error::parse_failed); break; - } case wasm::WASM_EXTERNAL_MEMORY: case wasm::WASM_EXTERNAL_TABLE: break; @@ -674,27 +843,65 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) } Exports.push_back(Ex); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Export section ended prematurely", object_error::parse_failed); return Error::success(); } bool WasmObjectFile::isValidFunctionIndex(uint32_t Index) const { - return Index < FunctionTypes.size() + NumImportedFunctions; + return Index < NumImportedFunctions + FunctionTypes.size(); +} + +bool WasmObjectFile::isDefinedFunctionIndex(uint32_t Index) const { + return Index >= NumImportedFunctions && isValidFunctionIndex(Index); +} + +bool WasmObjectFile::isValidGlobalIndex(uint32_t Index) const { + return Index < NumImportedGlobals + Globals.size(); +} + +bool WasmObjectFile::isDefinedGlobalIndex(uint32_t Index) const { + return Index >= NumImportedGlobals && isValidGlobalIndex(Index); } -Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) { - StartFunction = readVaruint32(Ptr); +bool WasmObjectFile::isValidFunctionSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeFunction(); +} + +bool WasmObjectFile::isValidGlobalSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeGlobal(); +} + +bool WasmObjectFile::isValidDataSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeData(); +} + +bool WasmObjectFile::isValidSectionSymbol(uint32_t Index) const { + return Index < Symbols.size() && Symbols[Index].isTypeSection(); +} + +wasm::WasmFunction &WasmObjectFile::getDefinedFunction(uint32_t Index) { + assert(isDefinedFunctionIndex(Index)); + return Functions[Index - NumImportedFunctions]; +} + +wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) { + assert(isDefinedGlobalIndex(Index)); + return Globals[Index - NumImportedGlobals]; +} + +Error WasmObjectFile::parseStartSection(ReadContext &Ctx) { + StartFunction = readVaruint32(Ctx); if (!isValidFunctionIndex(StartFunction)) return make_error<GenericBinaryError>("Invalid start function", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) { - const uint8_t *CodeSectionStart = Ptr; - uint32_t FunctionCount = readVaruint32(Ptr); +Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) { + CodeSection = Sections.size(); + uint32_t FunctionCount = readVaruint32(Ctx); if (FunctionCount != FunctionTypes.size()) { return make_error<GenericBinaryError>("Invalid function count", object_error::parse_failed); @@ -702,83 +909,93 @@ Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) { while (FunctionCount--) { wasm::WasmFunction Function; - const uint8_t *FunctionStart = Ptr; - uint32_t Size = readVaruint32(Ptr); - const uint8_t *FunctionEnd = Ptr + Size; + const uint8_t *FunctionStart = Ctx.Ptr; + uint32_t Size = readVaruint32(Ctx); + const uint8_t *FunctionEnd = Ctx.Ptr + Size; - Function.CodeSectionOffset = FunctionStart - CodeSectionStart; + Function.CodeOffset = Ctx.Ptr - FunctionStart; + Function.Index = NumImportedFunctions + Functions.size(); + Function.CodeSectionOffset = FunctionStart - Ctx.Start; Function.Size = FunctionEnd - FunctionStart; - uint32_t NumLocalDecls = readVaruint32(Ptr); + uint32_t NumLocalDecls = readVaruint32(Ctx); Function.Locals.reserve(NumLocalDecls); while (NumLocalDecls--) { wasm::WasmLocalDecl Decl; - Decl.Count = readVaruint32(Ptr); - Decl.Type = readVarint7(Ptr); + Decl.Count = readVaruint32(Ctx); + Decl.Type = readUint8(Ctx); Function.Locals.push_back(Decl); } - uint32_t BodySize = FunctionEnd - Ptr; - Function.Body = ArrayRef<uint8_t>(Ptr, BodySize); - Ptr += BodySize; - assert(Ptr == FunctionEnd); + uint32_t BodySize = FunctionEnd - Ctx.Ptr; + Function.Body = ArrayRef<uint8_t>(Ctx.Ptr, BodySize); + // This will be set later when reading in the linking metadata section. + Function.Comdat = UINT32_MAX; + Ctx.Ptr += BodySize; + assert(Ctx.Ptr == FunctionEnd); Functions.push_back(Function); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Code section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { + uint32_t Count = readVaruint32(Ctx); ElemSegments.reserve(Count); while (Count--) { wasm::WasmElemSegment Segment; - Segment.TableIndex = readVaruint32(Ptr); + Segment.TableIndex = readVaruint32(Ctx); if (Segment.TableIndex != 0) { return make_error<GenericBinaryError>("Invalid TableIndex", object_error::parse_failed); } - if (Error Err = readInitExpr(Segment.Offset, Ptr)) + if (Error Err = readInitExpr(Segment.Offset, Ctx)) return Err; - uint32_t NumElems = readVaruint32(Ptr); + uint32_t NumElems = readVaruint32(Ctx); while (NumElems--) { - Segment.Functions.push_back(readVaruint32(Ptr)); + Segment.Functions.push_back(readVaruint32(Ctx)); } ElemSegments.push_back(Segment); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Elem section ended prematurely", object_error::parse_failed); return Error::success(); } -Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { - const uint8_t *Start = Ptr; - uint32_t Count = readVaruint32(Ptr); +Error WasmObjectFile::parseDataSection(ReadContext &Ctx) { + DataSection = Sections.size(); + uint32_t Count = readVaruint32(Ctx); DataSegments.reserve(Count); while (Count--) { WasmSegment Segment; - Segment.Data.MemoryIndex = readVaruint32(Ptr); - if (Error Err = readInitExpr(Segment.Data.Offset, Ptr)) + Segment.Data.MemoryIndex = readVaruint32(Ctx); + if (Error Err = readInitExpr(Segment.Data.Offset, Ctx)) return Err; - uint32_t Size = readVaruint32(Ptr); - Segment.Data.Content = ArrayRef<uint8_t>(Ptr, Size); + uint32_t Size = readVaruint32(Ctx); + if (Size > (size_t)(Ctx.End - Ctx.Ptr)) + return make_error<GenericBinaryError>("Invalid segment size", + object_error::parse_failed); + Segment.Data.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size); + // The rest of these Data fields are set later, when reading in the linking + // metadata section. Segment.Data.Alignment = 0; Segment.Data.Flags = 0; - Segment.SectionOffset = Ptr - Start; - Ptr += Size; + Segment.Data.Comdat = UINT32_MAX; + Segment.SectionOffset = Ctx.Ptr - Ctx.Start; + Ctx.Ptr += Size; DataSegments.push_back(Segment); } - if (Ptr != End) + if (Ctx.Ptr != Ctx.End) return make_error<GenericBinaryError>("Data section ended prematurely", object_error::parse_failed); return Error::success(); } const uint8_t *WasmObjectFile::getPtr(size_t Offset) const { - return reinterpret_cast<const uint8_t *>(getData().substr(Offset, 1).data()); + return reinterpret_cast<const uint8_t *>(getData().data() + Offset); } const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { @@ -791,32 +1008,17 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { uint32_t Result = SymbolRef::SF_None; const WasmSymbol &Sym = getWasmSymbol(Symb); - DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); - if (Sym.isWeak()) + LLVM_DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); + if (Sym.isBindingWeak()) Result |= SymbolRef::SF_Weak; - if (!Sym.isLocal()) + if (!Sym.isBindingLocal()) Result |= SymbolRef::SF_Global; if (Sym.isHidden()) Result |= SymbolRef::SF_Hidden; - - switch (Sym.Type) { - case WasmSymbol::SymbolType::FUNCTION_IMPORT: - Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable; - break; - case WasmSymbol::SymbolType::FUNCTION_EXPORT: - Result |= SymbolRef::SF_Executable; - break; - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: - Result |= SymbolRef::SF_Executable; - Result |= SymbolRef::SF_FormatSpecific; - break; - case WasmSymbol::SymbolType::GLOBAL_IMPORT: + if (!Sym.isDefined()) Result |= SymbolRef::SF_Undefined; - break; - case WasmSymbol::SymbolType::GLOBAL_EXPORT: - break; - } - + if (Sym.isTypeFunction()) + Result |= SymbolRef::SF_Executable; return Result; } @@ -841,7 +1043,7 @@ const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { } Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { - return getWasmSymbol(Symb).Name; + return getWasmSymbol(Symb).Info.Name; } Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { @@ -849,20 +1051,20 @@ Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { } uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol& Sym) const { - switch (Sym.Type) { - case WasmSymbol::SymbolType::FUNCTION_IMPORT: - case WasmSymbol::SymbolType::GLOBAL_IMPORT: - case WasmSymbol::SymbolType::FUNCTION_EXPORT: - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: - return Sym.ElementIndex; - case WasmSymbol::SymbolType::GLOBAL_EXPORT: { - uint32_t GlobalIndex = Sym.ElementIndex - NumImportedGlobals; - assert(GlobalIndex < Globals.size()); - const wasm::WasmGlobal& Global = Globals[GlobalIndex]; - // WasmSymbols correspond only to I32_CONST globals - assert(Global.InitExpr.Opcode == wasm::WASM_OPCODE_I32_CONST); - return Global.InitExpr.Value.Int32; + switch (Sym.Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + return Sym.Info.ElementIndex; + case wasm::WASM_SYMBOL_TYPE_DATA: { + // The value of a data symbol is the segment offset, plus the symbol + // offset within the segment. + uint32_t SegmentIndex = Sym.Info.DataRef.Segment; + const wasm::WasmDataSegment &Segment = DataSegments[SegmentIndex].Data; + assert(Segment.Offset.Opcode == wasm::WASM_OPCODE_I32_CONST); + return Segment.Offset.Value.Int32 + Sym.Info.DataRef.Offset; } + case wasm::WASM_SYMBOL_TYPE_SECTION: + return 0; } llvm_unreachable("invalid symbol type"); } @@ -885,14 +1087,15 @@ Expected<SymbolRef::Type> WasmObjectFile::getSymbolType(DataRefImpl Symb) const { const WasmSymbol &Sym = getWasmSymbol(Symb); - switch (Sym.Type) { - case WasmSymbol::SymbolType::FUNCTION_IMPORT: - case WasmSymbol::SymbolType::FUNCTION_EXPORT: - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + switch (Sym.Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: return SymbolRef::ST_Function; - case WasmSymbol::SymbolType::GLOBAL_IMPORT: - case WasmSymbol::SymbolType::GLOBAL_EXPORT: + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + return SymbolRef::ST_Other; + case wasm::WASM_SYMBOL_TYPE_DATA: return SymbolRef::ST_Data; + case wasm::WASM_SYMBOL_TYPE_SECTION: + return SymbolRef::ST_Debug; } llvm_unreachable("Unknown WasmSymbol::SymbolType"); @@ -901,8 +1104,28 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const { Expected<section_iterator> WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { + const WasmSymbol& Sym = getWasmSymbol(Symb); + if (Sym.isUndefined()) + return section_end(); + DataRefImpl Ref; - Ref.d.a = getWasmSymbol(Symb).Section; + switch (Sym.Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + Ref.d.a = CodeSection; + break; + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + Ref.d.a = GlobalSection; + break; + case wasm::WASM_SYMBOL_TYPE_DATA: + Ref.d.a = DataSection; + break; + case wasm::WASM_SYMBOL_TYPE_SECTION: { + Ref.d.a = Sym.Info.ElementIndex; + break; + } + default: + llvm_unreachable("Unknown WasmSymbol::SymbolType"); + } return section_iterator(SectionRef(Ref, this)); } @@ -1004,10 +1227,14 @@ uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const { return Rel.Offset; } -symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { - llvm_unreachable("not yet implemented"); - SymbolRef Ref; - return symbol_iterator(Ref); +symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + if (Rel.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) + return symbol_end(); + DataRefImpl Sym; + Sym.d.a = Rel.Index; + Sym.d.b = 0; + return symbol_iterator(SymbolRef(Sym, this)); } uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const { diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index 271224ec6312..1b7282f13db0 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -334,7 +334,7 @@ private: void writeDirectoryTree(); void writeDirectoryStringTable(); void writeFirstSectionRelocations(); - std::unique_ptr<MemoryBuffer> OutputBuffer; + std::unique_ptr<WritableMemoryBuffer> OutputBuffer; char *BufferStart; uint64_t CurrentOffset = 0; COFF::MachineTypes MachineType; @@ -360,7 +360,7 @@ WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( Data(Parser.getData()), StringTable(Parser.getStringTable()) { performFileLayout(); - OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); + OutputBuffer = WritableMemoryBuffer::getNewMemBuffer(FileSize); } void WindowsResourceCOFFWriter::performFileLayout() { @@ -425,7 +425,7 @@ static std::time_t getTime() { } std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() { - BufferStart = const_cast<char *>(OutputBuffer->getBufferStart()); + BufferStart = OutputBuffer->getBufferStart(); writeCOFFHeader(); writeFirstSectionHeader(); |