diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:12:21 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:12:21 +0000 |
commit | eb1ff93d02b5f17b6b409e83c6d9be585f4a04b3 (patch) | |
tree | 7490b4a8943293f251ad733465936e6ec302b3e9 /wasm | |
parent | bafea25f368c63f0b39789906adfed6e39219e64 (diff) |
Notes
Diffstat (limited to 'wasm')
-rw-r--r-- | wasm/CMakeLists.txt | 26 | ||||
-rw-r--r-- | wasm/Config.h | 51 | ||||
-rw-r--r-- | wasm/Driver.cpp | 321 | ||||
-rw-r--r-- | wasm/InputFiles.cpp | 303 | ||||
-rw-r--r-- | wasm/InputFiles.h | 153 | ||||
-rw-r--r-- | wasm/InputSegment.cpp | 25 | ||||
-rw-r--r-- | wasm/InputSegment.h | 74 | ||||
-rw-r--r-- | wasm/Options.td | 103 | ||||
-rw-r--r-- | wasm/OutputSections.cpp | 333 | ||||
-rw-r--r-- | wasm/OutputSections.h | 138 | ||||
-rw-r--r-- | wasm/OutputSegment.h | 56 | ||||
-rw-r--r-- | wasm/SymbolTable.cpp | 245 | ||||
-rw-r--r-- | wasm/SymbolTable.h | 72 | ||||
-rw-r--r-- | wasm/Symbols.cpp | 114 | ||||
-rw-r--r-- | wasm/Symbols.h | 128 | ||||
-rw-r--r-- | wasm/Writer.cpp | 724 | ||||
-rw-r--r-- | wasm/Writer.h | 21 | ||||
-rw-r--r-- | wasm/WriterUtils.cpp | 215 | ||||
-rw-r--r-- | wasm/WriterUtils.h | 78 |
19 files changed, 3180 insertions, 0 deletions
diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt new file mode 100644 index 0000000000000..19b0d168437ca --- /dev/null +++ b/wasm/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(WasmOptionsTableGen) + +add_lld_library(lldWasm + Driver.cpp + InputFiles.cpp + InputSegment.cpp + OutputSections.cpp + SymbolTable.cpp + Symbols.cpp + Writer.cpp + WriterUtils.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + BinaryFormat + Core + Demangle + Object + Option + Support + + LINK_LIBS + lldCommon + ) diff --git a/wasm/Config.h b/wasm/Config.h new file mode 100644 index 0000000000000..82f49ce175bb4 --- /dev/null +++ b/wasm/Config.h @@ -0,0 +1,51 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_CONFIG_H +#define LLD_WASM_CONFIG_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/Wasm.h" + +#include "Symbols.h" + +using llvm::wasm::WasmGlobal; + +namespace lld { +namespace wasm { + +struct Configuration { + bool AllowUndefined; + bool CheckSignatures; + bool Demangle; + bool EmitRelocs; + bool ImportMemory; + bool Relocatable; + bool StripAll; + bool StripDebug; + uint32_t GlobalBase; + uint32_t InitialMemory; + uint32_t MaxMemory; + uint32_t ZStackSize; + llvm::StringRef Entry; + llvm::StringRef OutputFile; + + llvm::StringSet<> AllowUndefinedSymbols; + std::vector<llvm::StringRef> SearchPaths; + Symbol *StackPointerSymbol = nullptr; +}; + +// The only instance of Configuration struct. +extern Configuration *Config; + +} // namespace wasm +} // namespace lld + +#endif diff --git a/wasm/Driver.cpp b/wasm/Driver.cpp new file mode 100644 index 0000000000000..97ec262be3081 --- /dev/null +++ b/wasm/Driver.cpp @@ -0,0 +1,321 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/Driver.h" +#include "Config.h" +#include "SymbolTable.h" +#include "Writer.h" +#include "lld/Common/Args.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Threads.h" +#include "lld/Common/Version.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" + +using namespace llvm; +using namespace llvm::sys; +using namespace llvm::wasm; + +using namespace lld; +using namespace lld::wasm; + +namespace { + +// Parses command line options. +class WasmOptTable : public llvm::opt::OptTable { +public: + WasmOptTable(); + llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); +}; + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +class LinkerDriver { +public: + void link(ArrayRef<const char *> ArgsArr); + +private: + void createFiles(llvm::opt::InputArgList &Args); + void addFile(StringRef Path); + void addLibrary(StringRef Name); + std::vector<InputFile *> Files; +}; + +} // anonymous namespace + +Configuration *lld::wasm::Config; + +bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly, + raw_ostream &Error) { + errorHandler().LogName = Args[0]; + errorHandler().ErrorOS = &Error; + errorHandler().ColorDiagnostics = Error.has_colors(); + errorHandler().ErrorLimitExceededMsg = + "too many errors emitted, stopping now (use " + "-error-limit=0 to see all errors)"; + + Config = make<Configuration>(); + Symtab = make<SymbolTable>(); + + LinkerDriver().link(Args); + + // Exit immediately if we don't need to return to the caller. + // This saves time because the overhead of calling destructors + // for all globally-allocated objects is not negligible. + if (CanExitEarly) + exitLld(errorCount() ? 1 : 0); + + freeArena(); + return !errorCount(); +} + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const opt::OptTable::Info OptInfo[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ + {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \ + X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#include "Options.inc" +#undef OPTION +}; + +// Set color diagnostics according to -color-diagnostics={auto,always,never} +// or -no-color-diagnostics flags. +static void handleColorDiagnostics(opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!Arg) + return; + + if (Arg->getOption().getID() == OPT_color_diagnostics) + errorHandler().ColorDiagnostics = true; + else if (Arg->getOption().getID() == OPT_no_color_diagnostics) + errorHandler().ColorDiagnostics = false; + else { + StringRef S = Arg->getValue(); + if (S == "always") + errorHandler().ColorDiagnostics = true; + if (S == "never") + errorHandler().ColorDiagnostics = false; + if (S != "auto") + error("unknown option: -color-diagnostics=" + S); + } +} + +// Find a file by concatenating given paths. +static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) { + SmallString<128> S; + path::append(S, Path1, Path2); + if (fs::exists(S)) + return S.str().str(); + return None; +} + +// Inject a new undefined symbol into the link. This will cause the link to +// fail unless this symbol can be found. +static void addSyntheticUndefinedFunction(StringRef Name, + const WasmSignature *Type) { + log("injecting undefined func: " + Name); + Symtab->addUndefinedFunction(Name, Type); +} + +static void printHelp(const char *Argv0) { + WasmOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false); +} + +WasmOptTable::WasmOptTable() : OptTable(OptInfo) {} + +opt::InputArgList WasmOptTable::parse(ArrayRef<const char *> Argv) { + SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); + + unsigned MissingIndex; + unsigned MissingCount; + opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); + + handleColorDiagnostics(Args); + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + error("unknown argument: " + Arg->getSpelling()); + return Args; +} + +void LinkerDriver::addFile(StringRef Path) { + Optional<MemoryBufferRef> Buffer = readFile(Path); + if (!Buffer.hasValue()) + return; + MemoryBufferRef MBRef = *Buffer; + + if (identify_magic(MBRef.getBuffer()) == file_magic::archive) + Files.push_back(make<ArchiveFile>(MBRef)); + else + Files.push_back(make<ObjFile>(MBRef)); +} + +// Add a given library by searching it from input search paths. +void LinkerDriver::addLibrary(StringRef Name) { + for (StringRef Dir : Config->SearchPaths) { + if (Optional<std::string> S = findFile(Dir, "lib" + Name + ".a")) { + addFile(*S); + return; + } + } + + error("unable to find library -l" + Name); +} + +void LinkerDriver::createFiles(opt::InputArgList &Args) { + for (auto *Arg : Args) { + switch (Arg->getOption().getUnaliasedOption().getID()) { + case OPT_l: + addLibrary(Arg->getValue()); + break; + case OPT_INPUT: + addFile(Arg->getValue()); + break; + } + } + + if (Files.empty()) + error("no input files"); +} + +static StringRef getEntry(opt::InputArgList &Args, StringRef Default) { + auto *Arg = Args.getLastArg(OPT_entry, OPT_no_entry); + if (!Arg) + return Default; + if (Arg->getOption().getID() == OPT_no_entry) + return ""; + return Arg->getValue(); +} + +void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { + WasmOptTable Parser; + opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + + // Handle --help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + + // Parse and evaluate -mllvm options. + std::vector<const char *> V; + V.push_back("wasm-ld (LLVM option parsing)"); + for (auto *Arg : Args.filtered(OPT_mllvm)) + V.push_back(Arg->getValue()); + cl::ParseCommandLineOptions(V.size(), V.data()); + + errorHandler().ErrorLimit = args::getInteger(Args, OPT_error_limit, 20); + + if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) { + outs() << getLLDVersion() << "\n"; + return; + } + + Config->AllowUndefined = Args.hasArg(OPT_allow_undefined); + Config->CheckSignatures = + Args.hasFlag(OPT_check_signatures, OPT_no_check_signatures, false); + Config->EmitRelocs = Args.hasArg(OPT_emit_relocs); + Config->Entry = getEntry(Args, Args.hasArg(OPT_relocatable) ? "" : "_start"); + Config->ImportMemory = Args.hasArg(OPT_import_memory); + Config->OutputFile = Args.getLastArgValue(OPT_o); + Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->SearchPaths = args::getStrings(Args, OPT_L); + Config->StripAll = Args.hasArg(OPT_strip_all); + Config->StripDebug = Args.hasArg(OPT_strip_debug); + errorHandler().Verbose = Args.hasArg(OPT_verbose); + ThreadsEnabled = Args.hasFlag(OPT_threads, OPT_no_threads, true); + if (Config->Relocatable) + Config->EmitRelocs = true; + + Config->InitialMemory = args::getInteger(Args, OPT_initial_memory, 0); + Config->GlobalBase = args::getInteger(Args, OPT_global_base, 1024); + Config->MaxMemory = args::getInteger(Args, OPT_max_memory, 0); + Config->ZStackSize = + args::getZOptionValue(Args, OPT_z, "stack-size", WasmPageSize); + + if (auto *Arg = Args.getLastArg(OPT_allow_undefined_file)) + if (Optional<MemoryBufferRef> Buf = readFile(Arg->getValue())) + for (StringRef Sym : args::getLines(*Buf)) + Config->AllowUndefinedSymbols.insert(Sym); + + if (Config->OutputFile.empty()) + error("no output file specified"); + + if (!Args.hasArg(OPT_INPUT)) + error("no input files"); + + if (Config->Relocatable && !Config->Entry.empty()) + error("entry point specified for relocatable output file"); + if (Config->Relocatable && Args.hasArg(OPT_undefined)) + error("undefined symbols specified for relocatable output file"); + + if (!Config->Relocatable) { + if (!Config->Entry.empty()) { + static WasmSignature Signature = {{}, WASM_TYPE_NORESULT}; + addSyntheticUndefinedFunction(Config->Entry, &Signature); + } + + // Handle the `--undefined <sym>` options. + for (StringRef S : args::getStrings(Args, OPT_undefined)) + addSyntheticUndefinedFunction(S, nullptr); + + Config->StackPointerSymbol = Symtab->addDefinedGlobal("__stack_pointer"); + } + + createFiles(Args); + if (errorCount()) + return; + + // Add all files to the symbol table. This will add almost all + // symbols that we need to the symbol table. + for (InputFile *F : Files) + Symtab->addFile(F); + + // Make sure we have resolved all symbols. + if (!Config->Relocatable && !Config->AllowUndefined) { + Symtab->reportRemainingUndefines(); + } else { + // When we allow undefined symbols we cannot include those defined in + // -u/--undefined since these undefined symbols have only names and no + // function signature, which means they cannot be written to the final + // output. + for (StringRef S : args::getStrings(Args, OPT_undefined)) { + Symbol *Sym = Symtab->find(S); + if (!Sym->isDefined()) + error("function forced with --undefined not found: " + Sym->getName()); + } + } + if (errorCount()) + return; + + if (!Config->Entry.empty() && !Symtab->find(Config->Entry)->isDefined()) + error("entry point not found: " + Config->Entry); + if (errorCount()) + return; + + // Write the result to the file. + writeResult(); +} diff --git a/wasm/InputFiles.cpp b/wasm/InputFiles.cpp new file mode 100644 index 0000000000000..e7463da39db97 --- /dev/null +++ b/wasm/InputFiles.cpp @@ -0,0 +1,303 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" + +#include "Config.h" +#include "InputSegment.h" +#include "SymbolTable.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "lld" + +using namespace lld; +using namespace lld::wasm; + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::wasm; + +Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { + log("Loading: " + Path); + + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) { + error("cannot open " + Path + ": " + EC.message()); + return None; + } + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; + MemoryBufferRef MBRef = MB->getMemBufferRef(); + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership + + return MBRef; +} + +void ObjFile::dumpInfo() const { + log("reloc info for: " + getName() + "\n" + + " FunctionIndexOffset : " + Twine(FunctionIndexOffset) + "\n" + + " NumFunctionImports : " + Twine(NumFunctionImports()) + "\n" + + " NumGlobalImports : " + Twine(NumGlobalImports()) + "\n"); +} + +bool ObjFile::isImportedFunction(uint32_t Index) const { + return Index < NumFunctionImports(); +} + +Symbol *ObjFile::getFunctionSymbol(uint32_t Index) const { + return FunctionSymbols[Index]; +} + +Symbol *ObjFile::getTableSymbol(uint32_t Index) const { + return TableSymbols[Index]; +} + +Symbol *ObjFile::getGlobalSymbol(uint32_t Index) const { + return GlobalSymbols[Index]; +} + +uint32_t ObjFile::getRelocatedAddress(uint32_t Index) const { + return getGlobalSymbol(Index)->getVirtualAddress(); +} + +uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const { + Symbol *Sym = getFunctionSymbol(Original); + uint32_t Index = Sym->getOutputIndex(); + DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": " + << Original << " -> " << Index << "\n"); + return Index; +} + +uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const { + return TypeMap[Original]; +} + +uint32_t ObjFile::relocateTableIndex(uint32_t Original) const { + Symbol *Sym = getTableSymbol(Original); + uint32_t Index = Sym->getTableIndex(); + DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original + << " -> " << Index << "\n"); + return Index; +} + +uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const { + Symbol *Sym = getGlobalSymbol(Original); + uint32_t Index = Sym->getOutputIndex(); + DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original + << " -> " << Index << "\n"); + return Index; +} + +void ObjFile::parse() { + // Parse a memory buffer as a wasm file. + DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); + std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); + + auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); + if (!Obj) + fatal(toString(this) + ": not a wasm file"); + if (!Obj->isRelocatableObject()) + fatal(toString(this) + ": not a relocatable wasm file"); + + Bin.release(); + WasmObj.reset(Obj); + + // Find the code and data sections. Wasm objects can have at most one code + // and one data section. + for (const SectionRef &Sec : WasmObj->sections()) { + const WasmSection &Section = WasmObj->getWasmSection(Sec); + if (Section.Type == WASM_SEC_CODE) + CodeSection = &Section; + else if (Section.Type == WASM_SEC_DATA) + DataSection = &Section; + } + + initializeSymbols(); +} + +// Return the InputSegment in which a given symbol is defined. +InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) { + uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym); + for (InputSegment *Segment : Segments) { + if (Address >= Segment->startVA() && Address < Segment->endVA()) { + DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> " + << Segment->getName() << "\n"); + + return Segment; + } + } + error("symbol not found in any segment: " + WasmSym.Name); + return nullptr; +} + +static void copyRelocationsRange(std::vector<WasmRelocation> &To, + ArrayRef<WasmRelocation> From, size_t Start, + size_t End) { + for (const WasmRelocation &R : From) + if (R.Offset >= Start && R.Offset < End) + To.push_back(R); +} + +void ObjFile::initializeSymbols() { + Symbols.reserve(WasmObj->getNumberOfSymbols()); + + for (const WasmImport &Import : WasmObj->imports()) { + switch (Import.Kind) { + case WASM_EXTERNAL_FUNCTION: + ++FunctionImports; + break; + case WASM_EXTERNAL_GLOBAL: + ++GlobalImports; + break; + } + } + + FunctionSymbols.resize(FunctionImports + WasmObj->functions().size()); + GlobalSymbols.resize(GlobalImports + WasmObj->globals().size()); + + for (const WasmSegment &S : WasmObj->dataSegments()) { + InputSegment *Seg = make<InputSegment>(&S, this); + copyRelocationsRange(Seg->Relocations, DataSection->Relocations, + Seg->getInputSectionOffset(), + Seg->getInputSectionOffset() + Seg->getSize()); + Segments.emplace_back(Seg); + } + + // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols + // in the object + for (const SymbolRef &Sym : WasmObj->symbols()) { + const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); + Symbol *S; + switch (WasmSym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + S = createUndefined(WasmSym); + break; + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + S = createDefined(WasmSym, getSegment(WasmSym)); + break; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + S = createDefined(WasmSym); + break; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + // These are for debugging only, no need to create linker symbols for them + continue; + } + + Symbols.push_back(S); + if (WasmSym.isFunction()) { + DEBUG(dbgs() << "Function: " << WasmSym.ElementIndex << " -> " + << toString(*S) << "\n"); + FunctionSymbols[WasmSym.ElementIndex] = S; + } else { + DEBUG(dbgs() << "Global: " << WasmSym.ElementIndex << " -> " + << toString(*S) << "\n"); + GlobalSymbols[WasmSym.ElementIndex] = S; + } + } + + // Populate `TableSymbols` with all symbols that are called indirectly + uint32_t SegmentCount = WasmObj->elements().size(); + if (SegmentCount) { + if (SegmentCount > 1) + fatal(getName() + ": contains more than one element segment"); + const WasmElemSegment &Segment = WasmObj->elements()[0]; + if (Segment.Offset.Opcode != WASM_OPCODE_I32_CONST) + fatal(getName() + ": unsupported element segment"); + if (Segment.TableIndex != 0) + fatal(getName() + ": unsupported table index in elem segment"); + if (Segment.Offset.Value.Int32 != 0) + fatal(getName() + ": unsupported element segment offset"); + TableSymbols.reserve(Segment.Functions.size()); + for (uint64_t FunctionIndex : Segment.Functions) + TableSymbols.push_back(getFunctionSymbol(FunctionIndex)); + } + + DEBUG(dbgs() << "TableSymbols: " << TableSymbols.size() << "\n"); + DEBUG(dbgs() << "Functions : " << FunctionSymbols.size() << "\n"); + DEBUG(dbgs() << "Globals : " << GlobalSymbols.size() << "\n"); +} + +Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { + return Symtab->addUndefined(this, &Sym); +} + +Symbol *ObjFile::createDefined(const WasmSymbol &Sym, + const InputSegment *Segment) { + Symbol *S; + if (Sym.isLocal()) { + S = make<Symbol>(Sym.Name, true); + Symbol::Kind Kind; + if (Sym.Type == WasmSymbol::SymbolType::FUNCTION_EXPORT) + Kind = Symbol::Kind::DefinedFunctionKind; + else if (Sym.Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) + Kind = Symbol::Kind::DefinedGlobalKind; + else + llvm_unreachable("invalid local symbol type"); + S->update(Kind, this, &Sym, Segment); + return S; + } + return Symtab->addDefined(this, &Sym, Segment); +} + +void ArchiveFile::parse() { + // Parse a MemoryBufferRef as an archive file. + DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); + File = CHECK(Archive::create(MB), toString(this)); + + // Read the symbol table to construct Lazy symbols. + int Count = 0; + for (const Archive::Symbol &Sym : File->symbols()) { + Symtab->addLazy(this, &Sym); + ++Count; + } + DEBUG(dbgs() << "Read " << Count << " symbols\n"); +} + +void ArchiveFile::addMember(const Archive::Symbol *Sym) { + const Archive::Child &C = + CHECK(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); + + // Don't try to load the same member twice (this can happen when members + // mutually reference each other). + if (!Seen.insert(C.getChildOffset()).second) + return; + + DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); + DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); + + MemoryBufferRef MB = + CHECK(C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + Sym->getName()); + + if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) { + error("unknown file type: " + MB.getBufferIdentifier()); + return; + } + + InputFile *Obj = make<ObjFile>(MB); + Obj->ParentName = ParentName; + Symtab->addFile(Obj); +} + +// Returns a string in the format of "foo.o" or "foo.a(bar.o)". +std::string lld::toString(const wasm::InputFile *File) { + if (!File) + return "<internal>"; + + if (File->ParentName.empty()) + return File->getName(); + + return (File->ParentName + "(" + File->getName() + ")").str(); +} diff --git a/wasm/InputFiles.h b/wasm/InputFiles.h new file mode 100644 index 0000000000000..158cc53cafb1f --- /dev/null +++ b/wasm/InputFiles.h @@ -0,0 +1,153 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_INPUT_FILES_H +#define LLD_WASM_INPUT_FILES_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/MemoryBuffer.h" + +#include "WriterUtils.h" + +#include <vector> + +using llvm::object::Archive; +using llvm::object::WasmObjectFile; +using llvm::object::WasmSection; +using llvm::object::WasmSymbol; +using llvm::wasm::WasmImport; + +namespace lld { +namespace wasm { + +class Symbol; +class InputSegment; + +class InputFile { +public: + enum Kind { + ObjectKind, + ArchiveKind, + }; + + virtual ~InputFile() {} + + // Returns the filename. + StringRef getName() const { return MB.getBufferIdentifier(); } + + // Reads a file (the constructor doesn't do that). + virtual void parse() = 0; + + Kind kind() const { return FileKind; } + + // An archive file name if this file is created from an archive. + StringRef ParentName; + +protected: + InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + MemoryBufferRef MB; + +private: + const Kind FileKind; +}; + +// .a file (ar archive) +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + + void addMember(const Archive::Symbol *Sym); + + void parse() override; + +private: + std::unique_ptr<Archive> File; + llvm::DenseSet<uint64_t> Seen; +}; + +// .o file (wasm object file) +class ObjFile : public InputFile { +public: + explicit ObjFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } + + void parse() override; + + // Returns the underlying wasm file. + const WasmObjectFile *getWasmObj() const { return WasmObj.get(); } + + void dumpInfo() const; + + uint32_t relocateTypeIndex(uint32_t Original) const; + uint32_t relocateFunctionIndex(uint32_t Original) const; + uint32_t relocateGlobalIndex(uint32_t Original) const; + uint32_t relocateTableIndex(uint32_t Original) const; + uint32_t getRelocatedAddress(uint32_t Index) const; + + // Returns true if the given function index is an imported function, + // as opposed to the locally defined function. + bool isImportedFunction(uint32_t Index) const; + + size_t NumFunctionImports() const { return FunctionImports; } + size_t NumGlobalImports() const { return GlobalImports; } + + int32_t FunctionIndexOffset = 0; + const WasmSection *CodeSection = nullptr; + std::vector<OutputRelocation> CodeRelocations; + int32_t CodeOffset = 0; + const WasmSection *DataSection = nullptr; + + std::vector<uint32_t> TypeMap; + std::vector<InputSegment *> Segments; + + ArrayRef<Symbol *> getSymbols() { return Symbols; } + ArrayRef<Symbol *> getTableSymbols() { return TableSymbols; } + +private: + Symbol *createDefined(const WasmSymbol &Sym, + const InputSegment *Segment = nullptr); + Symbol *createUndefined(const WasmSymbol &Sym); + void initializeSymbols(); + InputSegment *getSegment(const WasmSymbol &WasmSym); + Symbol *getFunctionSymbol(uint32_t FunctionIndex) const; + Symbol *getTableSymbol(uint32_t TableIndex) const; + Symbol *getGlobalSymbol(uint32_t GlobalIndex) const; + + // List of all symbols referenced or defined by this file. + std::vector<Symbol *> Symbols; + + // List of all function symbols indexed by the function index space + std::vector<Symbol *> FunctionSymbols; + + // List of all global symbols indexed by the global index space + std::vector<Symbol *> GlobalSymbols; + + // List of all indirect symbols indexed by table index space. + std::vector<Symbol *> TableSymbols; + + uint32_t GlobalImports = 0; + uint32_t FunctionImports = 0; + std::unique_ptr<WasmObjectFile> WasmObj; +}; + +// Opens a given file. +llvm::Optional<MemoryBufferRef> readFile(StringRef Path); + +} // namespace wasm + +std::string toString(const wasm::InputFile *File); + +} // namespace lld + +#endif diff --git a/wasm/InputSegment.cpp b/wasm/InputSegment.cpp new file mode 100644 index 0000000000000..6509143862595 --- /dev/null +++ b/wasm/InputSegment.cpp @@ -0,0 +1,25 @@ +//===- InputSegment.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputSegment.h" +#include "OutputSegment.h" +#include "lld/Common/LLVM.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace lld::wasm; + +uint32_t InputSegment::translateVA(uint32_t Address) const { + assert(Address >= startVA() && Address < endVA()); + int32_t Delta = OutputSeg->StartVA + OutputSegmentOffset - startVA(); + DEBUG(dbgs() << "translateVA: " << getName() << " Delta=" << Delta + << " Address=" << Address << "\n"); + return Address + Delta; +} diff --git a/wasm/InputSegment.h b/wasm/InputSegment.h new file mode 100644 index 0000000000000..64124b1ebc2ce --- /dev/null +++ b/wasm/InputSegment.h @@ -0,0 +1,74 @@ +//===- InputSegment.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Represents a WebAssembly data segment which can be included as part of +// an output data segments. Note that in WebAssembly, unlike ELF and other +// formats, used the term "data segment" to refer to the continous regions of +// memory that make on the data section. See: +// https://webassembly.github.io/spec/syntax/modules.html#syntax-data +// +// For example, by default, clang will produce a separate data section for +// each global variable. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_INPUT_SEGMENT_H +#define LLD_WASM_INPUT_SEGMENT_H + +#include "lld/Common/ErrorHandler.h" +#include "llvm/Object/Wasm.h" + +using llvm::object::WasmSegment; +using llvm::wasm::WasmRelocation; + +namespace lld { +namespace wasm { + +class ObjFile; +class OutputSegment; + +class InputSegment { +public: + InputSegment(const WasmSegment *Seg, const ObjFile *F) + : Segment(Seg), File(F) {} + + // Translate an offset in the input segment to an offset in the output + // segment. + uint32_t translateVA(uint32_t Address) const; + + const OutputSegment *getOutputSegment() const { return OutputSeg; } + + uint32_t getOutputSegmentOffset() const { return OutputSegmentOffset; } + + uint32_t getInputSectionOffset() const { return Segment->SectionOffset; } + + void setOutputSegment(const OutputSegment *Segment, uint32_t Offset) { + OutputSeg = Segment; + OutputSegmentOffset = Offset; + } + + uint32_t getSize() const { return Segment->Data.Content.size(); } + uint32_t getAlignment() const { return Segment->Data.Alignment; } + uint32_t startVA() const { return Segment->Data.Offset.Value.Int32; } + uint32_t endVA() const { return startVA() + getSize(); } + StringRef getName() const { return Segment->Data.Name; } + + const WasmSegment *Segment; + const ObjFile *File; + std::vector<WasmRelocation> Relocations; + +protected: + const OutputSegment *OutputSeg = nullptr; + uint32_t OutputSegmentOffset = 0; +}; + +} // namespace wasm +} // namespace lld + +#endif // LLD_WASM_INPUT_SEGMENT_H diff --git a/wasm/Options.td b/wasm/Options.td new file mode 100644 index 0000000000000..df0c6d7080725 --- /dev/null +++ b/wasm/Options.td @@ -0,0 +1,103 @@ +include "llvm/Option/OptParser.td" + +// For options whose names are multiple letters, either one dash or +// two can precede the option name except those that start with 'o'. +class F<string name>: Flag<["--", "-"], name>; +class J<string name>: Joined<["--", "-"], name>; +class S<string name>: Separate<["--", "-"], name>; + +multiclass Eq<string name> { + def "": Separate<["--", "-"], name>; + def _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>; +} + +def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Add a directory to the library search path">; + +def color_diagnostics: F<"color-diagnostics">, + HelpText<"Use colors in diagnostics">; + +def color_diagnostics_eq: J<"color-diagnostics=">, + HelpText<"Use colors in diagnostics">; + +// The follow flags are shared with the ELF linker +def help: F<"help">, HelpText<"Print option help">; + +def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">, + HelpText<"Root name of library to use">; + +def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">; + +def no_threads: F<"no-threads">, + HelpText<"Do not run the linker multi-threaded">; + +def no_color_diagnostics: F<"no-color-diagnostics">, + HelpText<"Do not use colors in diagnostics">; + +def no_check_signatures: F<"no-check-signatures">, HelpText<"Don't check function signatures">; + +def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, + HelpText<"Path to file to write output">; + +def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; + +def check_signatures: F<"check-signatures">, HelpText<"Check function signatures">; + +def v: Flag<["-"], "v">, HelpText<"Display the version number">; + +def version: F<"version">, HelpText<"Display the version number and exit">; + +def verbose: F<"verbose">, HelpText<"Verbose mode">; + +def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; + +def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">; + +def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; + +def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; + +defm undefined: Eq<"undefined">, + HelpText<"Force undefined symbol during linking">; + +def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, + HelpText<"Linker option extensions">; + +def entry: S<"entry">, MetaVarName<"<entry>">, + HelpText<"Name of entry point symbol">; + +def no_entry: F<"no-entry">, + HelpText<"Do not output any entry point">; + +def error_limit: J<"error-limit=">, + HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; + +// The follow flags are unique to wasm + +def global_base: J<"global-base=">, + HelpText<"Where to start to place global data">; + +def initial_memory: J<"initial-memory=">, + HelpText<"Initial size of the linear memory">; + +def max_memory: J<"max-memory=">, + HelpText<"Maximum size of the linear memory">; + +def import_memory: F<"import-memory">, + HelpText<"Import memory from the environment">; + +def allow_undefined: F<"allow-undefined">, + HelpText<"Allow undefined symbols in linked binary">; + +def allow_undefined_file: J<"allow-undefined-file=">, + HelpText<"Allow symbols listed in <file> to be undefined in linked binary">; + +def allow_undefined_file_s: Separate<["-"], "allow-undefined-file">, Alias<allow_undefined_file>; + +// Aliases +def alias_initial_memory_i: Flag<["-"], "i">, Alias<initial_memory>; +def alias_max_memory_m: Flag<["-"], "m">, Alias<max_memory>; +def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>; +def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; +def alias_entry_entry: J<"entry=">, Alias<entry>; +def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>; diff --git a/wasm/OutputSections.cpp b/wasm/OutputSections.cpp new file mode 100644 index 0000000000000..e5253640b9dbe --- /dev/null +++ b/wasm/OutputSections.cpp @@ -0,0 +1,333 @@ +//===- OutputSections.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OutputSections.h" + +#include "Config.h" +#include "InputFiles.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Threads.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/LEB128.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace llvm::wasm; +using namespace lld; +using namespace lld::wasm; + +enum class RelocEncoding { + Uleb128, + Sleb128, + I32, +}; + +static StringRef sectionTypeToString(uint32_t SectionType) { + switch (SectionType) { + case WASM_SEC_CUSTOM: + return "CUSTOM"; + case WASM_SEC_TYPE: + return "TYPE"; + case WASM_SEC_IMPORT: + return "IMPORT"; + case WASM_SEC_FUNCTION: + return "FUNCTION"; + case WASM_SEC_TABLE: + return "TABLE"; + case WASM_SEC_MEMORY: + return "MEMORY"; + case WASM_SEC_GLOBAL: + return "GLOBAL"; + case WASM_SEC_EXPORT: + return "EXPORT"; + case WASM_SEC_START: + return "START"; + case WASM_SEC_ELEM: + return "ELEM"; + case WASM_SEC_CODE: + return "CODE"; + case WASM_SEC_DATA: + return "DATA"; + default: + fatal("invalid section type"); + } +} + +std::string lld::toString(OutputSection *Section) { + std::string rtn = sectionTypeToString(Section->Type); + if (!Section->Name.empty()) + rtn += "(" + Section->Name + ")"; + return rtn; +} + +static void applyRelocation(uint8_t *Buf, const OutputRelocation &Reloc) { + DEBUG(dbgs() << "write reloc: type=" << Reloc.Reloc.Type + << " index=" << Reloc.Reloc.Index << " value=" << Reloc.Value + << " offset=" << Reloc.Reloc.Offset << "\n"); + Buf += Reloc.Reloc.Offset; + int64_t ExistingValue; + switch (Reloc.Reloc.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + ExistingValue = decodeULEB128(Buf); + if (ExistingValue != Reloc.Reloc.Index) { + DEBUG(dbgs() << "existing value: " << decodeULEB128(Buf) << "\n"); + assert(decodeULEB128(Buf) == Reloc.Reloc.Index); + } + LLVM_FALLTHROUGH; + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + encodeULEB128(Reloc.Value, Buf, 5); + break; + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + ExistingValue = decodeSLEB128(Buf); + if (ExistingValue != Reloc.Reloc.Index) { + DEBUG(dbgs() << "existing value: " << decodeSLEB128(Buf) << "\n"); + assert(decodeSLEB128(Buf) == Reloc.Reloc.Index); + } + LLVM_FALLTHROUGH; + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + encodeSLEB128(static_cast<int32_t>(Reloc.Value), Buf, 5); + break; + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + support::endian::write32<support::little>(Buf, Reloc.Value); + break; + default: + llvm_unreachable("unknown relocation type"); + } +} + +static void applyRelocations(uint8_t *Buf, + ArrayRef<OutputRelocation> Relocs) { + log("applyRelocations: count=" + Twine(Relocs.size())); + for (const OutputRelocation &Reloc : Relocs) { + applyRelocation(Buf, Reloc); + } +} + +// Relocations contain an index into the function, global or table index +// space of the input file. This function takes a relocation and returns the +// relocated index (i.e. translates from the input index space to the output +// index space). +static uint32_t calcNewIndex(const ObjFile &File, const WasmRelocation &Reloc) { + switch (Reloc.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + return File.relocateTypeIndex(Reloc.Index); + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + return File.relocateFunctionIndex(Reloc.Index); + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + return File.relocateTableIndex(Reloc.Index); + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + return File.relocateGlobalIndex(Reloc.Index); + default: + llvm_unreachable("unknown relocation type"); + } +} + +// Take a vector of relocations from an input file and create output +// relocations based on them. Calculates the updated index and offset for +// each relocation as well as the value to write out in the final binary. +static void calcRelocations(const ObjFile &File, + ArrayRef<WasmRelocation> Relocs, + std::vector<OutputRelocation> &OutputRelocs, + int32_t OutputOffset) { + log("calcRelocations: " + File.getName() + " offset=" + Twine(OutputOffset)); + for (const WasmRelocation &Reloc : Relocs) { + OutputRelocation NewReloc; + NewReloc.Reloc = Reloc; + NewReloc.Reloc.Offset += OutputOffset; + DEBUG(dbgs() << "reloc: type=" << Reloc.Type << " index=" << Reloc.Index + << " offset=" << Reloc.Offset + << " newOffset=" << NewReloc.Reloc.Offset << "\n"); + + if (Config->EmitRelocs) + NewReloc.NewIndex = calcNewIndex(File, Reloc); + else + NewReloc.NewIndex = UINT32_MAX; + + switch (Reloc.Type) { + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + NewReloc.Value = File.getRelocatedAddress(Reloc.Index); + if (NewReloc.Value != UINT32_MAX) + NewReloc.Value += Reloc.Addend; + break; + default: + NewReloc.Value = calcNewIndex(File, Reloc); + break; + } + + OutputRelocs.emplace_back(NewReloc); + } +} + +void OutputSection::createHeader(size_t BodySize) { + raw_string_ostream OS(Header); + debugWrite(OS.tell(), + "section type [" + Twine(sectionTypeToString(Type)) + "]"); + writeUleb128(OS, Type, nullptr); + writeUleb128(OS, BodySize, "section size"); + OS.flush(); + log("createHeader: " + toString(this) + " body=" + Twine(BodySize) + + " total=" + Twine(getSize())); +} + +CodeSection::CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs) + : OutputSection(WASM_SEC_CODE), InputObjects(Objs) { + raw_string_ostream OS(CodeSectionHeader); + writeUleb128(OS, NumFunctions, "function count"); + OS.flush(); + BodySize = CodeSectionHeader.size(); + + for (ObjFile *File : InputObjects) { + if (!File->CodeSection) + continue; + + File->CodeOffset = BodySize; + ArrayRef<uint8_t> Content = File->CodeSection->Content; + unsigned HeaderSize = 0; + decodeULEB128(Content.data(), &HeaderSize); + + calcRelocations(*File, File->CodeSection->Relocations, + File->CodeRelocations, BodySize - HeaderSize); + + size_t PayloadSize = Content.size() - HeaderSize; + BodySize += PayloadSize; + } + + createHeader(BodySize); +} + +void CodeSection::writeTo(uint8_t *Buf) { + log("writing " + toString(this)); + log(" size=" + Twine(getSize())); + Buf += Offset; + + // Write section header + memcpy(Buf, Header.data(), Header.size()); + Buf += Header.size(); + + uint8_t *ContentsStart = Buf; + + // Write code section headers + memcpy(Buf, CodeSectionHeader.data(), CodeSectionHeader.size()); + Buf += CodeSectionHeader.size(); + + // Write code section bodies + parallelForEach(InputObjects, [ContentsStart](ObjFile *File) { + if (!File->CodeSection) + return; + + ArrayRef<uint8_t> Content(File->CodeSection->Content); + + // Payload doesn't include the initial header (function count) + unsigned HeaderSize = 0; + decodeULEB128(Content.data(), &HeaderSize); + + size_t PayloadSize = Content.size() - HeaderSize; + memcpy(ContentsStart + File->CodeOffset, Content.data() + HeaderSize, + PayloadSize); + + log("applying relocations for: " + File->getName()); + if (File->CodeRelocations.size()) + applyRelocations(ContentsStart, File->CodeRelocations); + }); +} + +uint32_t CodeSection::numRelocations() const { + uint32_t Count = 0; + for (ObjFile *File : InputObjects) + Count += File->CodeRelocations.size(); + return Count; +} + +void CodeSection::writeRelocations(raw_ostream &OS) const { + for (ObjFile *File : InputObjects) + for (const OutputRelocation &Reloc : File->CodeRelocations) + writeReloc(OS, Reloc); +} + +DataSection::DataSection(ArrayRef<OutputSegment *> Segments) + : OutputSection(WASM_SEC_DATA), Segments(Segments) { + raw_string_ostream OS(DataSectionHeader); + + writeUleb128(OS, Segments.size(), "data segment count"); + OS.flush(); + BodySize = DataSectionHeader.size(); + + for (OutputSegment *Segment : Segments) { + raw_string_ostream OS(Segment->Header); + writeUleb128(OS, 0, "memory index"); + writeUleb128(OS, WASM_OPCODE_I32_CONST, "opcode:i32const"); + writeSleb128(OS, Segment->StartVA, "memory offset"); + writeUleb128(OS, WASM_OPCODE_END, "opcode:end"); + writeUleb128(OS, Segment->Size, "segment size"); + OS.flush(); + Segment->setSectionOffset(BodySize); + BodySize += Segment->Header.size(); + log("Data segment: size=" + Twine(Segment->Size)); + for (const InputSegment *InputSeg : Segment->InputSegments) { + uint32_t InputOffset = InputSeg->getInputSectionOffset(); + uint32_t OutputOffset = Segment->getSectionOffset() + + Segment->Header.size() + + InputSeg->getOutputSegmentOffset(); + calcRelocations(*InputSeg->File, InputSeg->Relocations, Relocations, + OutputOffset - InputOffset); + } + BodySize += Segment->Size; + } + + createHeader(BodySize); +} + +void DataSection::writeTo(uint8_t *Buf) { + log("writing " + toString(this) + " size=" + Twine(getSize()) + + " body=" + Twine(BodySize)); + Buf += Offset; + + // Write section header + memcpy(Buf, Header.data(), Header.size()); + Buf += Header.size(); + + uint8_t *ContentsStart = Buf; + + // Write data section headers + memcpy(Buf, DataSectionHeader.data(), DataSectionHeader.size()); + + parallelForEach(Segments, [ContentsStart](const OutputSegment *Segment) { + // Write data segment header + uint8_t *SegStart = ContentsStart + Segment->getSectionOffset(); + memcpy(SegStart, Segment->Header.data(), Segment->Header.size()); + + // Write segment data payload + for (const InputSegment *Input : Segment->InputSegments) { + ArrayRef<uint8_t> Content(Input->Segment->Data.Content); + memcpy(SegStart + Segment->Header.size() + + Input->getOutputSegmentOffset(), + Content.data(), Content.size()); + } + }); + + applyRelocations(ContentsStart, Relocations); +} + +void DataSection::writeRelocations(raw_ostream &OS) const { + for (const OutputRelocation &Reloc : Relocations) + writeReloc(OS, Reloc); +} diff --git a/wasm/OutputSections.h b/wasm/OutputSections.h new file mode 100644 index 0000000000000..926101710cdf3 --- /dev/null +++ b/wasm/OutputSections.h @@ -0,0 +1,138 @@ +//===- OutputSections.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_OUTPUT_SECTIONS_H +#define LLD_WASM_OUTPUT_SECTIONS_H + +#include "InputSegment.h" +#include "WriterUtils.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" + +using llvm::raw_ostream; +using llvm::raw_string_ostream; + +namespace lld { + +namespace wasm { +class OutputSection; +} +std::string toString(wasm::OutputSection *Section); + +namespace wasm { + +class OutputSegment; +class ObjFile; + +class OutputSection { +public: + OutputSection(uint32_t Type, std::string Name = "") + : Type(Type), Name(Name) {} + + virtual ~OutputSection() = default; + + void setOffset(size_t NewOffset) { + log("setOffset: " + toString(this) + " -> " + Twine(NewOffset)); + Offset = NewOffset; + } + + void createHeader(size_t BodySize); + virtual size_t getSize() const = 0; + virtual void writeTo(uint8_t *Buf) = 0; + virtual void finalizeContents() {} + + std::string Header; + uint32_t Type; + std::string Name; + + virtual uint32_t numRelocations() const { return 0; } + virtual void writeRelocations(raw_ostream &OS) const {} + +protected: + size_t Offset = 0; +}; + +class SyntheticSection : public OutputSection { +public: + SyntheticSection(uint32_t Type, std::string Name = "") + : OutputSection(Type, Name), BodyOutputStream(Body) { + if (!Name.empty()) + writeStr(BodyOutputStream, Name); + } + + void writeTo(uint8_t *Buf) override { + assert(Offset); + log("writing " + toString(this)); + memcpy(Buf + Offset, Header.data(), Header.size()); + memcpy(Buf + Offset + Header.size(), Body.data(), Body.size()); + } + + size_t getSize() const override { return Header.size() + Body.size(); } + + void finalizeContents() override { + BodyOutputStream.flush(); + createHeader(Body.size()); + } + + raw_ostream &getStream() { return BodyOutputStream; } + + std::string Body; + +protected: + raw_string_ostream BodyOutputStream; +}; + +// Some synthetic sections (e.g. "name" and "linking") have subsections. +// Just like the synthetic sections themselves these need to be created before +// they can be written out (since they are preceded by their length). This +// class is used to create subsections and then write them into the stream +// of the parent section. +class SubSection : public SyntheticSection { +public: + explicit SubSection(uint32_t Type) : SyntheticSection(Type) {} + + void writeToStream(raw_ostream &OS) { + writeBytes(OS, Header.data(), Header.size()); + writeBytes(OS, Body.data(), Body.size()); + } +}; + +class CodeSection : public OutputSection { +public: + explicit CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs); + size_t getSize() const override { return Header.size() + BodySize; } + void writeTo(uint8_t *Buf) override; + uint32_t numRelocations() const override; + void writeRelocations(raw_ostream &OS) const override; + +protected: + ArrayRef<ObjFile *> InputObjects; + std::string CodeSectionHeader; + size_t BodySize = 0; +}; + +class DataSection : public OutputSection { +public: + explicit DataSection(ArrayRef<OutputSegment *> Segments); + size_t getSize() const override { return Header.size() + BodySize; } + void writeTo(uint8_t *Buf) override; + uint32_t numRelocations() const override { return Relocations.size(); } + void writeRelocations(raw_ostream &OS) const override; + +protected: + std::vector<OutputRelocation> Relocations; + ArrayRef<OutputSegment *> Segments; + std::string DataSectionHeader; + size_t BodySize = 0; +}; + +} // namespace wasm +} // namespace lld + +#endif // LLD_WASM_OUTPUT_SECTIONS_H diff --git a/wasm/OutputSegment.h b/wasm/OutputSegment.h new file mode 100644 index 0000000000000..1375aefae92ff --- /dev/null +++ b/wasm/OutputSegment.h @@ -0,0 +1,56 @@ +//===- OutputSegment.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_OUTPUT_SEGMENT_H +#define LLD_WASM_OUTPUT_SEGMENT_H + +#include "InputSegment.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Object/Wasm.h" + +namespace lld { +namespace wasm { + +class InputSegment; + +class OutputSegment { +public: + OutputSegment(StringRef N) : Name(N) {} + + void addInputSegment(InputSegment *Segment) { + Alignment = std::max(Alignment, Segment->getAlignment()); + InputSegments.push_back(Segment); + Size = llvm::alignTo(Size, Segment->getAlignment()); + Segment->setOutputSegment(this, Size); + Size += Segment->getSize(); + } + + uint32_t getSectionOffset() const { return SectionOffset; } + + void setSectionOffset(uint32_t Offset) { SectionOffset = Offset; } + + StringRef Name; + uint32_t Alignment = 0; + uint32_t StartVA = 0; + std::vector<const InputSegment *> InputSegments; + + // Sum of the size of the all the input segments + uint32_t Size = 0; + + // Segment header + std::string Header; + +private: + uint32_t SectionOffset = 0; +}; + +} // namespace wasm +} // namespace lld + +#endif // LLD_WASM_OUTPUT_SEGMENT_H diff --git a/wasm/SymbolTable.cpp b/wasm/SymbolTable.cpp new file mode 100644 index 0000000000000..d9a6fa1f04f50 --- /dev/null +++ b/wasm/SymbolTable.cpp @@ -0,0 +1,245 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SymbolTable.h" + +#include "Config.h" +#include "WriterUtils.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" + +#include <unordered_set> + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace lld; +using namespace lld::wasm; + +SymbolTable *lld::wasm::Symtab; + +void SymbolTable::addFile(InputFile *File) { + log("Processing: " + toString(File)); + File->parse(); + + if (auto *F = dyn_cast<ObjFile>(File)) + ObjectFiles.push_back(F); +} + +void SymbolTable::reportRemainingUndefines() { + std::unordered_set<Symbol *> Undefs; + for (Symbol *Sym : SymVector) { + if (Sym->isUndefined() && !Sym->isWeak() && + Config->AllowUndefinedSymbols.count(Sym->getName()) == 0) { + Undefs.insert(Sym); + } + } + + if (Undefs.empty()) + return; + + for (ObjFile *File : ObjectFiles) + for (Symbol *Sym : File->getSymbols()) + if (Undefs.count(Sym)) + error(toString(File) + ": undefined symbol: " + toString(*Sym)); + + for (Symbol *Sym : Undefs) + if (!Sym->getFile()) + error("undefined symbol: " + toString(*Sym)); +} + +Symbol *SymbolTable::find(StringRef Name) { + auto It = SymMap.find(CachedHashStringRef(Name)); + if (It == SymMap.end()) + return nullptr; + return It->second; +} + +std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { + Symbol *&Sym = SymMap[CachedHashStringRef(Name)]; + if (Sym) + return {Sym, false}; + Sym = make<Symbol>(Name, false); + SymVector.emplace_back(Sym); + return {Sym, true}; +} + +void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { + error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " + + toString(Existing->getFile()) + "\n>>> defined in " + + toString(NewFile)); +} + +// Get the signature for a given function symbol, either by looking +// it up in function sections (for defined functions), of the imports section +// (for imported functions). +static const WasmSignature *getFunctionSig(const ObjFile &Obj, + const WasmSymbol &Sym) { + DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n"); + const WasmObjectFile *WasmObj = Obj.getWasmObj(); + uint32_t FunctionType; + if (Obj.isImportedFunction(Sym.ElementIndex)) { + const WasmImport &Import = WasmObj->imports()[Sym.ImportIndex]; + FunctionType = Import.SigIndex; + } else { + uint32_t FuntionIndex = Sym.ElementIndex - Obj.NumFunctionImports(); + FunctionType = WasmObj->functionTypes()[FuntionIndex]; + } + return &WasmObj->types()[FunctionType]; +} + +// Check the type of new symbol matches that of the symbol is replacing. +// For functions this can also involve verifying that the signatures match. +static void checkSymbolTypes(const Symbol &Existing, const InputFile &F, + const WasmSymbol &New, + const WasmSignature *NewSig) { + if (Existing.isLazy()) + return; + + bool NewIsFunction = New.Type == WasmSymbol::SymbolType::FUNCTION_EXPORT || + New.Type == WasmSymbol::SymbolType::FUNCTION_IMPORT; + + // First check the symbol types match (i.e. either both are function + // symbols or both are data symbols). + if (Existing.isFunction() != NewIsFunction) { + error("symbol type mismatch: " + New.Name + "\n>>> defined as " + + (Existing.isFunction() ? "Function" : "Global") + " in " + + toString(Existing.getFile()) + "\n>>> defined as " + + (NewIsFunction ? "Function" : "Global") + " in " + F.getName()); + return; + } + + // For function symbols, optionally check the function signature matches too. + if (!NewIsFunction || !Config->CheckSignatures) + return; + // Skip the signature check if the existing function has no signature (e.g. + // if it is an undefined symbol generated by --undefined command line flag). + if (!Existing.hasFunctionType()) + return; + + DEBUG(dbgs() << "checkSymbolTypes: " << New.Name << "\n"); + assert(NewSig); + + const WasmSignature &OldSig = Existing.getFunctionType(); + if (*NewSig == OldSig) + return; + + error("function signature mismatch: " + New.Name + "\n>>> defined as " + + toString(OldSig) + " in " + toString(Existing.getFile()) + + "\n>>> defined as " + toString(*NewSig) + " in " + F.getName()); +} + +Symbol *SymbolTable::addDefinedGlobal(StringRef Name) { + DEBUG(dbgs() << "addDefinedGlobal: " << Name << "\n"); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) + S->update(Symbol::DefinedGlobalKind); + else if (!S->isGlobal()) + error("symbol type mismatch: " + Name); + return S; +} + +Symbol *SymbolTable::addDefined(InputFile *F, const WasmSymbol *Sym, + const InputSegment *Segment) { + DEBUG(dbgs() << "addDefined: " << Sym->Name << "\n"); + Symbol *S; + bool WasInserted; + Symbol::Kind Kind = Symbol::DefinedFunctionKind; + const WasmSignature *NewSig = nullptr; + if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) + Kind = Symbol::DefinedGlobalKind; + else + NewSig = getFunctionSig(*cast<ObjFile>(F), *Sym); + + std::tie(S, WasInserted) = insert(Sym->Name); + if (WasInserted) { + S->update(Kind, F, Sym, Segment, NewSig); + } else if (S->isLazy()) { + // The existing symbol is lazy. Replace it without checking types since + // lazy symbols don't have any type information. + DEBUG(dbgs() << "replacing existing lazy symbol: " << Sym->Name << "\n"); + S->update(Kind, F, Sym, Segment, NewSig); + } else if (!S->isDefined()) { + // The existing symbol table entry is undefined. The new symbol replaces + // it, after checking the type matches + DEBUG(dbgs() << "resolving existing undefined symbol: " << Sym->Name + << "\n"); + checkSymbolTypes(*S, *F, *Sym, NewSig); + S->update(Kind, F, Sym, Segment, NewSig); + } else if (Sym->isWeak()) { + // the new symbol is weak we can ignore it + DEBUG(dbgs() << "existing symbol takes precedence\n"); + } else if (S->isWeak()) { + // the new symbol is not weak and the existing symbol is, so we replace + // it + DEBUG(dbgs() << "replacing existing weak symbol\n"); + checkSymbolTypes(*S, *F, *Sym, NewSig); + S->update(Kind, F, Sym, Segment, NewSig); + } else { + // neither symbol is week. They conflict. + reportDuplicate(S, F); + } + return S; +} + +Symbol *SymbolTable::addUndefinedFunction(StringRef Name, + const WasmSignature *Type) { + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + S->update(Symbol::UndefinedFunctionKind, nullptr, nullptr, nullptr, Type); + } else if (!S->isFunction()) { + error("symbol type mismatch: " + Name); + } + return S; +} + +Symbol *SymbolTable::addUndefined(InputFile *F, const WasmSymbol *Sym) { + DEBUG(dbgs() << "addUndefined: " << Sym->Name << "\n"); + Symbol *S; + bool WasInserted; + Symbol::Kind Kind = Symbol::UndefinedFunctionKind; + const WasmSignature *NewSig = nullptr; + if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_IMPORT) + Kind = Symbol::UndefinedGlobalKind; + else + NewSig = getFunctionSig(*cast<ObjFile>(F), *Sym); + std::tie(S, WasInserted) = insert(Sym->Name); + if (WasInserted) { + S->update(Kind, F, Sym, nullptr, NewSig); + } else if (S->isLazy()) { + DEBUG(dbgs() << "resolved by existing lazy\n"); + auto *AF = cast<ArchiveFile>(S->getFile()); + AF->addMember(&S->getArchiveSymbol()); + } else if (S->isDefined()) { + DEBUG(dbgs() << "resolved by existing\n"); + checkSymbolTypes(*S, *F, *Sym, NewSig); + } + return S; +} + +void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol *Sym) { + DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n"); + StringRef Name = Sym->getName(); + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { + S->update(Symbol::LazyKind, F); + S->setArchiveSymbol(*Sym); + } else if (S->isUndefined()) { + // There is an existing undefined symbol. The can load from the + // archive. + DEBUG(dbgs() << "replacing existing undefined\n"); + F->addMember(Sym); + } +} diff --git a/wasm/SymbolTable.h b/wasm/SymbolTable.h new file mode 100644 index 0000000000000..e1e7da120b936 --- /dev/null +++ b/wasm/SymbolTable.h @@ -0,0 +1,72 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_SYMBOL_TABLE_H +#define LLD_WASM_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "Symbols.h" + +#include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" + +using llvm::object::WasmSymbol; +using llvm::wasm::WasmSignature; + +namespace lld { +namespace wasm { + +class InputSegment; + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in the +// add*() functions, which are called by input files as they are parsed. +// There is one add* function per symbol type. +class SymbolTable { +public: + void addFile(InputFile *File); + + std::vector<ObjFile *> ObjectFiles; + std::vector<Symbol *> SyntheticSymbols; + + void reportDuplicate(Symbol *Existing, InputFile *NewFile); + void reportRemainingUndefines(); + + ArrayRef<Symbol *> getSymbols() const { return SymVector; } + Symbol *find(StringRef Name); + + Symbol *addDefined(InputFile *F, const WasmSymbol *Sym, + const InputSegment *Segment = nullptr); + Symbol *addUndefined(InputFile *F, const WasmSymbol *Sym); + Symbol *addUndefinedFunction(StringRef Name, const WasmSignature *Type); + Symbol *addDefinedGlobal(StringRef Name); + void addLazy(ArchiveFile *F, const Archive::Symbol *Sym); + +private: + std::pair<Symbol *, bool> insert(StringRef Name); + + llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap; + std::vector<Symbol *> SymVector; +}; + +extern SymbolTable *Symtab; + +} // namespace wasm +} // namespace lld + +#endif diff --git a/wasm/Symbols.cpp b/wasm/Symbols.cpp new file mode 100644 index 0000000000000..6bf5459c26633 --- /dev/null +++ b/wasm/Symbols.cpp @@ -0,0 +1,114 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Symbols.h" + +#include "Config.h" +#include "InputFiles.h" +#include "InputSegment.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Strings.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace lld; +using namespace lld::wasm; + +uint32_t Symbol::getGlobalIndex() const { + assert(!Sym->isFunction()); + return Sym->ElementIndex; +} + +uint32_t Symbol::getFunctionIndex() const { + assert(Sym->isFunction()); + return Sym->ElementIndex; +} + +const WasmSignature &Symbol::getFunctionType() const { + assert(FunctionType != nullptr); + return *FunctionType; +} + +uint32_t Symbol::getVirtualAddress() const { + assert(isGlobal()); + DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n"); + if (isUndefined()) + return UINT32_MAX; + if (VirtualAddress.hasValue()) + return VirtualAddress.getValue(); + + assert(Sym != nullptr); + ObjFile *Obj = cast<ObjFile>(File); + const WasmGlobal &Global = + Obj->getWasmObj()->globals()[getGlobalIndex() - Obj->NumGlobalImports()]; + assert(Global.Type == llvm::wasm::WASM_TYPE_I32); + assert(Segment); + return Segment->translateVA(Global.InitExpr.Value.Int32); +} + +uint32_t Symbol::getOutputIndex() const { + if (isUndefined() && isWeak()) + return 0; + return OutputIndex.getValue(); +} + +void Symbol::setVirtualAddress(uint32_t Value) { + DEBUG(dbgs() << "setVirtualAddress " << Name << " -> " << Value << "\n"); + assert(!VirtualAddress.hasValue()); + VirtualAddress = Value; +} + +void Symbol::setOutputIndex(uint32_t Index) { + DEBUG(dbgs() << "setOutputIndex " << Name << " -> " << Index << "\n"); + assert(!OutputIndex.hasValue()); + OutputIndex = Index; +} + +void Symbol::setTableIndex(uint32_t Index) { + DEBUG(dbgs() << "setTableIndex " << Name << " -> " << Index << "\n"); + assert(!TableIndex.hasValue()); + TableIndex = Index; +} + +void Symbol::update(Kind K, InputFile *F, const WasmSymbol *WasmSym, + const InputSegment *Seg, const WasmSignature *Sig) { + SymbolKind = K; + File = F; + Sym = WasmSym; + Segment = Seg; + FunctionType = Sig; +} + +bool Symbol::isWeak() const { return Sym && Sym->isWeak(); } + +bool Symbol::isHidden() const { return Sym && Sym->isHidden(); } + +std::string lld::toString(const wasm::Symbol &Sym) { + if (Config->Demangle) + if (Optional<std::string> S = demangleItanium(Sym.getName())) + return "`" + *S + "'"; + return Sym.getName(); +} + +std::string lld::toString(wasm::Symbol::Kind Kind) { + switch (Kind) { + case wasm::Symbol::DefinedFunctionKind: + return "DefinedFunction"; + case wasm::Symbol::DefinedGlobalKind: + return "DefinedGlobal"; + case wasm::Symbol::UndefinedFunctionKind: + return "UndefinedFunction"; + case wasm::Symbol::UndefinedGlobalKind: + return "UndefinedGlobal"; + case wasm::Symbol::LazyKind: + return "LazyKind"; + } + llvm_unreachable("Invalid symbol kind!"); +} diff --git a/wasm/Symbols.h b/wasm/Symbols.h new file mode 100644 index 0000000000000..8194bcaca383a --- /dev/null +++ b/wasm/Symbols.h @@ -0,0 +1,128 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_SYMBOLS_H +#define LLD_WASM_SYMBOLS_H + +#include "lld/Common/LLVM.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Wasm.h" + +using llvm::object::Archive; +using llvm::object::WasmSymbol; +using llvm::wasm::WasmExport; +using llvm::wasm::WasmImport; +using llvm::wasm::WasmSignature; + +namespace lld { +namespace wasm { + +class InputFile; +class InputSegment; + +class Symbol { +public: + enum Kind { + DefinedFunctionKind, + DefinedGlobalKind, + + LazyKind, + UndefinedFunctionKind, + UndefinedGlobalKind, + + LastDefinedKind = DefinedGlobalKind, + InvalidKind, + }; + + Symbol(StringRef Name, bool IsLocal) + : WrittenToSymtab(0), WrittenToNameSec(0), IsLocal(IsLocal), Name(Name) {} + + Kind getKind() const { return SymbolKind; } + + bool isLazy() const { return SymbolKind == LazyKind; } + bool isDefined() const { return SymbolKind <= LastDefinedKind; } + bool isUndefined() const { + return SymbolKind == UndefinedGlobalKind || + SymbolKind == UndefinedFunctionKind; + } + bool isFunction() const { + return SymbolKind == DefinedFunctionKind || + SymbolKind == UndefinedFunctionKind; + } + bool isGlobal() const { return !isFunction(); } + bool isLocal() const { return IsLocal; } + bool isWeak() const; + bool isHidden() const; + + // Returns the symbol name. + StringRef getName() const { return Name; } + + // Returns the file from which this symbol was created. + InputFile *getFile() const { return File; } + + uint32_t getGlobalIndex() const; + uint32_t getFunctionIndex() const; + + bool hasFunctionType() const { return FunctionType != nullptr; } + const WasmSignature &getFunctionType() const; + uint32_t getOutputIndex() const; + uint32_t getTableIndex() const { return TableIndex.getValue(); } + + // Returns the virtual address of a defined global. + // Only works for globals, not functions. + uint32_t getVirtualAddress() const; + + // Set the output index of the symbol (in the function or global index + // space of the output object. + void setOutputIndex(uint32_t Index); + + // Returns true if a table index has been set for this symbol + bool hasTableIndex() const { return TableIndex.hasValue(); } + + // Set the table index of the symbol + void setTableIndex(uint32_t Index); + + void setVirtualAddress(uint32_t VA); + + void update(Kind K, InputFile *F = nullptr, const WasmSymbol *Sym = nullptr, + const InputSegment *Segment = nullptr, + const WasmSignature *Sig = nullptr); + + void setArchiveSymbol(const Archive::Symbol &Sym) { ArchiveSymbol = Sym; } + const Archive::Symbol &getArchiveSymbol() { return ArchiveSymbol; } + + // This bit is used by Writer::writeNameSection() to prevent + // symbols from being written to the symbol table more than once. + unsigned WrittenToSymtab : 1; + unsigned WrittenToNameSec : 1; + +protected: + unsigned IsLocal : 1; + + StringRef Name; + Archive::Symbol ArchiveSymbol = {nullptr, 0, 0}; + Kind SymbolKind = InvalidKind; + InputFile *File = nullptr; + const WasmSymbol *Sym = nullptr; + const InputSegment *Segment = nullptr; + llvm::Optional<uint32_t> OutputIndex; + llvm::Optional<uint32_t> TableIndex; + llvm::Optional<uint32_t> VirtualAddress; + const WasmSignature *FunctionType; +}; + +} // namespace wasm + +// Returns a symbol name for an error message. +std::string toString(const wasm::Symbol &Sym); +std::string toString(wasm::Symbol::Kind Kind); + +} // namespace lld + +#endif diff --git a/wasm/Writer.cpp b/wasm/Writer.cpp new file mode 100644 index 0000000000000..61ac54a3e4b31 --- /dev/null +++ b/wasm/Writer.cpp @@ -0,0 +1,724 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" + +#include "Config.h" +#include "OutputSections.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "WriterUtils.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Threads.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/LEB128.h" + +#include <cstdarg> + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace llvm::wasm; +using namespace lld; +using namespace lld::wasm; + +static constexpr int kStackAlignment = 16; + +namespace { + +// Traits for using WasmSignature in a DenseMap. +struct WasmSignatureDenseMapInfo { + static WasmSignature getEmptyKey() { + WasmSignature Sig; + Sig.ReturnType = 1; + return Sig; + } + static WasmSignature getTombstoneKey() { + WasmSignature Sig; + Sig.ReturnType = 2; + return Sig; + } + static unsigned getHashValue(const WasmSignature &Sig) { + uintptr_t Value = 0; + Value += DenseMapInfo<int32_t>::getHashValue(Sig.ReturnType); + for (int32_t Param : Sig.ParamTypes) + Value += DenseMapInfo<int32_t>::getHashValue(Param); + return Value; + } + static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) { + return LHS == RHS; + } +}; + +// The writer writes a SymbolTable result to a file. +class Writer { +public: + void run(); + +private: + void openFile(); + + uint32_t getTypeIndex(const WasmSignature &Sig); + void assignSymbolIndexes(); + void calculateImports(); + void calculateOffsets(); + void calculateTypes(); + void createOutputSegments(); + void layoutMemory(); + void createHeader(); + void createSections(); + SyntheticSection *createSyntheticSection(uint32_t Type, + std::string Name = ""); + + // Builtin sections + void createTypeSection(); + void createFunctionSection(); + void createTableSection(); + void createGlobalSection(); + void createExportSection(); + void createImportSection(); + void createMemorySection(); + void createElemSection(); + void createStartSection(); + void createCodeSection(); + void createDataSection(); + + // Custom sections + void createRelocSections(); + void createLinkingSection(); + void createNameSection(); + + void writeHeader(); + void writeSections(); + + uint64_t FileSize = 0; + uint32_t DataSize = 0; + uint32_t NumFunctions = 0; + uint32_t NumMemoryPages = 0; + uint32_t InitialTableOffset = 0; + + std::vector<const WasmSignature *> Types; + DenseMap<WasmSignature, int32_t, WasmSignatureDenseMapInfo> TypeIndices; + std::vector<const Symbol *> FunctionImports; + std::vector<const Symbol *> GlobalImports; + std::vector<const Symbol *> DefinedGlobals; + std::vector<const Symbol *> IndirectFunctions; + + // Elements that are used to construct the final output + std::string Header; + std::vector<OutputSection *> OutputSections; + + std::unique_ptr<FileOutputBuffer> Buffer; + + std::vector<OutputSegment *> Segments; + llvm::SmallDenseMap<StringRef, OutputSegment *> SegmentMap; +}; + +} // anonymous namespace + +static void debugPrint(const char *fmt, ...) { + if (!errorHandler().Verbose) + return; + fprintf(stderr, "lld: "); + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void Writer::createImportSection() { + uint32_t NumImports = FunctionImports.size() + GlobalImports.size(); + if (Config->ImportMemory) + ++NumImports; + + if (NumImports == 0) + return; + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_IMPORT); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, NumImports, "import count"); + + for (const Symbol *Sym : FunctionImports) { + WasmImport Import; + Import.Module = "env"; + Import.Field = Sym->getName(); + Import.Kind = WASM_EXTERNAL_FUNCTION; + assert(TypeIndices.count(Sym->getFunctionType()) > 0); + Import.SigIndex = TypeIndices.lookup(Sym->getFunctionType()); + writeImport(OS, Import); + } + + if (Config->ImportMemory) { + WasmImport Import; + Import.Module = "env"; + Import.Field = "memory"; + Import.Kind = WASM_EXTERNAL_MEMORY; + Import.Memory.Flags = 0; + Import.Memory.Initial = NumMemoryPages; + writeImport(OS, Import); + } + + for (const Symbol *Sym : GlobalImports) { + WasmImport Import; + Import.Module = "env"; + Import.Field = Sym->getName(); + Import.Kind = WASM_EXTERNAL_GLOBAL; + Import.Global.Mutable = false; + Import.Global.Type = WASM_TYPE_I32; // Sym->getGlobalType(); + writeImport(OS, Import); + } +} + +void Writer::createTypeSection() { + SyntheticSection *Section = createSyntheticSection(WASM_SEC_TYPE); + raw_ostream &OS = Section->getStream(); + writeUleb128(OS, Types.size(), "type count"); + for (const WasmSignature *Sig : Types) { + writeSig(OS, *Sig); + } +} + +void Writer::createFunctionSection() { + if (!NumFunctions) + return; + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_FUNCTION); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, NumFunctions, "function count"); + for (ObjFile *File : Symtab->ObjectFiles) { + for (uint32_t Sig : File->getWasmObj()->functionTypes()) { + writeUleb128(OS, File->relocateTypeIndex(Sig), "sig index"); + } + } +} + +void Writer::createMemorySection() { + if (Config->ImportMemory) + return; + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_MEMORY); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, 1, "memory count"); + writeUleb128(OS, 0, "memory limits flags"); + writeUleb128(OS, NumMemoryPages, "initial pages"); +} + +void Writer::createGlobalSection() { + if (DefinedGlobals.empty()) + return; + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_GLOBAL); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, DefinedGlobals.size(), "global count"); + for (const Symbol *Sym : DefinedGlobals) { + WasmGlobal Global; + Global.Type = WASM_TYPE_I32; + Global.Mutable = Sym == Config->StackPointerSymbol; + Global.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + Global.InitExpr.Value.Int32 = Sym->getVirtualAddress(); + writeGlobal(OS, Global); + } +} + +void Writer::createTableSection() { + // Always output a table section, even if there are no indirect calls. + // There are two reasons for this: + // 1. For executables it is useful to have an empty table slot at 0 + // which can be filled with a null function call handler. + // 2. If we don't do this, any program that contains a call_indirect but + // no address-taken function will fail at validation time since it is + // a validation error to include a call_indirect instruction if there + // is not table. + uint32_t TableSize = InitialTableOffset + IndirectFunctions.size(); + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_TABLE); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, 1, "table count"); + writeSleb128(OS, WASM_TYPE_ANYFUNC, "table type"); + writeUleb128(OS, WASM_LIMITS_FLAG_HAS_MAX, "table flags"); + writeUleb128(OS, TableSize, "table initial size"); + writeUleb128(OS, TableSize, "table max size"); +} + +void Writer::createExportSection() { + bool ExportMemory = !Config->Relocatable && !Config->ImportMemory; + Symbol *EntrySym = Symtab->find(Config->Entry); + bool ExportEntry = !Config->Relocatable && EntrySym && EntrySym->isDefined(); + bool ExportHidden = Config->EmitRelocs; + + uint32_t NumExports = ExportMemory ? 1 : 0; + + std::vector<const Symbol *> SymbolExports; + if (ExportEntry) + SymbolExports.emplace_back(EntrySym); + + for (const Symbol *Sym : Symtab->getSymbols()) { + if (Sym->isUndefined() || Sym->isGlobal()) + continue; + if (Sym->isHidden() && !ExportHidden) + continue; + if (ExportEntry && Sym == EntrySym) + continue; + SymbolExports.emplace_back(Sym); + } + + for (const Symbol *Sym : DefinedGlobals) { + // Can't export the SP right now because it mutable and mutable globals + // connot be exported. + if (Sym == Config->StackPointerSymbol) + continue; + SymbolExports.emplace_back(Sym); + } + + NumExports += SymbolExports.size(); + if (!NumExports) + return; + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_EXPORT); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, NumExports, "export count"); + + if (ExportMemory) { + WasmExport MemoryExport; + MemoryExport.Name = "memory"; + MemoryExport.Kind = WASM_EXTERNAL_MEMORY; + MemoryExport.Index = 0; + writeExport(OS, MemoryExport); + } + + for (const Symbol *Sym : SymbolExports) { + log("Export: " + Sym->getName()); + WasmExport Export; + Export.Name = Sym->getName(); + Export.Index = Sym->getOutputIndex(); + if (Sym->isFunction()) + Export.Kind = WASM_EXTERNAL_FUNCTION; + else + Export.Kind = WASM_EXTERNAL_GLOBAL; + writeExport(OS, Export); + } +} + +void Writer::createStartSection() {} + +void Writer::createElemSection() { + if (IndirectFunctions.empty()) + return; + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_ELEM); + raw_ostream &OS = Section->getStream(); + + writeUleb128(OS, 1, "segment count"); + writeUleb128(OS, 0, "table index"); + WasmInitExpr InitExpr; + InitExpr.Opcode = WASM_OPCODE_I32_CONST; + InitExpr.Value.Int32 = InitialTableOffset; + writeInitExpr(OS, InitExpr); + writeUleb128(OS, IndirectFunctions.size(), "elem count"); + + uint32_t TableIndex = InitialTableOffset; + for (const Symbol *Sym : IndirectFunctions) { + assert(Sym->getTableIndex() == TableIndex); + writeUleb128(OS, Sym->getOutputIndex(), "function index"); + ++TableIndex; + } +} + +void Writer::createCodeSection() { + if (!NumFunctions) + return; + + log("createCodeSection"); + + auto Section = make<CodeSection>(NumFunctions, Symtab->ObjectFiles); + OutputSections.push_back(Section); +} + +void Writer::createDataSection() { + if (!Segments.size()) + return; + + log("createDataSection"); + auto Section = make<DataSection>(Segments); + OutputSections.push_back(Section); +} + +// Create reloctions sections in the final output. +// These are only created when relocatable output is requested. +void Writer::createRelocSections() { + log("createRelocSections"); + // Don't use iterator here since we are adding to OutputSection + size_t OrigSize = OutputSections.size(); + for (size_t i = 0; i < OrigSize; i++) { + OutputSection *S = OutputSections[i]; + const char *name; + uint32_t Count = S->numRelocations(); + if (!Count) + continue; + + if (S->Type == WASM_SEC_DATA) + name = "reloc.DATA"; + else if (S->Type == WASM_SEC_CODE) + name = "reloc.CODE"; + else + llvm_unreachable("relocations only support for code and data"); + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, name); + raw_ostream &OS = Section->getStream(); + writeUleb128(OS, S->Type, "reloc section"); + writeUleb128(OS, Count, "reloc count"); + S->writeRelocations(OS); + } +} + +// Create the custom "linking" section containing linker metadata. +// This is only created when relocatable output is requested. +void Writer::createLinkingSection() { + SyntheticSection *Section = + createSyntheticSection(WASM_SEC_CUSTOM, "linking"); + raw_ostream &OS = Section->getStream(); + + SubSection DataSizeSubSection(WASM_DATA_SIZE); + writeUleb128(DataSizeSubSection.getStream(), DataSize, "data size"); + DataSizeSubSection.finalizeContents(); + DataSizeSubSection.writeToStream(OS); + + if (Segments.size() && Config->Relocatable) { + SubSection SubSection(WASM_SEGMENT_INFO); + writeUleb128(SubSection.getStream(), Segments.size(), "num data segments"); + for (const OutputSegment *S : Segments) { + writeStr(SubSection.getStream(), S->Name, "segment name"); + writeUleb128(SubSection.getStream(), S->Alignment, "alignment"); + writeUleb128(SubSection.getStream(), 0, "flags"); + } + SubSection.finalizeContents(); + SubSection.writeToStream(OS); + } +} + +// Create the custom "name" section containing debug symbol names. +void Writer::createNameSection() { + // Create an array of all function sorted by function index space + std::vector<const Symbol *> Names; + + for (ObjFile *File : Symtab->ObjectFiles) { + Names.reserve(Names.size() + File->getSymbols().size()); + for (Symbol *S : File->getSymbols()) { + if (!S->isFunction() || S->isWeak() || S->WrittenToNameSec) + continue; + S->WrittenToNameSec = true; + Names.emplace_back(S); + } + } + + SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, "name"); + + std::sort(Names.begin(), Names.end(), [](const Symbol *A, const Symbol *B) { + return A->getOutputIndex() < B->getOutputIndex(); + }); + + SubSection FunctionSubsection(WASM_NAMES_FUNCTION); + raw_ostream &OS = FunctionSubsection.getStream(); + writeUleb128(OS, Names.size(), "name count"); + + // We have to iterate through the inputs twice so that all the imports + // appear first before any of the local function names. + for (const Symbol *S : Names) { + writeUleb128(OS, S->getOutputIndex(), "func index"); + writeStr(OS, S->getName(), "symbol name"); + } + + FunctionSubsection.finalizeContents(); + FunctionSubsection.writeToStream(Section->getStream()); +} + +void Writer::writeHeader() { + memcpy(Buffer->getBufferStart(), Header.data(), Header.size()); +} + +void Writer::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + parallelForEach(OutputSections, [Buf](OutputSection *S) { S->writeTo(Buf); }); +} + +// Fix the memory layout of the output binary. This assigns memory offsets +// to each of the input data sections as well as the explicit stack region. +void Writer::layoutMemory() { + uint32_t MemoryPtr = 0; + if (!Config->Relocatable) { + MemoryPtr = Config->GlobalBase; + debugPrint("mem: global base = %d\n", Config->GlobalBase); + } + + createOutputSegments(); + + // Static data comes first + for (OutputSegment *Seg : Segments) { + MemoryPtr = alignTo(MemoryPtr, Seg->Alignment); + Seg->StartVA = MemoryPtr; + debugPrint("mem: %-10s offset=%-8d size=%-4d align=%d\n", + Seg->Name.str().c_str(), MemoryPtr, Seg->Size, Seg->Alignment); + MemoryPtr += Seg->Size; + } + + DataSize = MemoryPtr; + if (!Config->Relocatable) + DataSize -= Config->GlobalBase; + debugPrint("mem: static data = %d\n", DataSize); + + // Stack comes after static data + if (!Config->Relocatable) { + MemoryPtr = alignTo(MemoryPtr, kStackAlignment); + if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment)) + error("stack size must be " + Twine(kStackAlignment) + "-byte aligned"); + debugPrint("mem: stack size = %d\n", Config->ZStackSize); + debugPrint("mem: stack base = %d\n", MemoryPtr); + MemoryPtr += Config->ZStackSize; + Config->StackPointerSymbol->setVirtualAddress(MemoryPtr); + debugPrint("mem: stack top = %d\n", MemoryPtr); + } + + uint32_t MemSize = alignTo(MemoryPtr, WasmPageSize); + NumMemoryPages = MemSize / WasmPageSize; + debugPrint("mem: total pages = %d\n", NumMemoryPages); +} + +SyntheticSection *Writer::createSyntheticSection(uint32_t Type, + std::string Name) { + auto Sec = make<SyntheticSection>(Type, Name); + log("createSection: " + toString(Sec)); + OutputSections.push_back(Sec); + return Sec; +} + +void Writer::createSections() { + // Known sections + createTypeSection(); + createImportSection(); + createFunctionSection(); + createTableSection(); + createMemorySection(); + createGlobalSection(); + createExportSection(); + createStartSection(); + createElemSection(); + createCodeSection(); + createDataSection(); + + // Custom sections + if (Config->EmitRelocs) + createRelocSections(); + createLinkingSection(); + if (!Config->StripDebug && !Config->StripAll) + createNameSection(); + + for (OutputSection *S : OutputSections) { + S->setOffset(FileSize); + S->finalizeContents(); + FileSize += S->getSize(); + } +} + +void Writer::calculateOffsets() { + for (ObjFile *File : Symtab->ObjectFiles) { + const WasmObjectFile *WasmFile = File->getWasmObj(); + + // Function Index + File->FunctionIndexOffset = + FunctionImports.size() - File->NumFunctionImports() + NumFunctions; + NumFunctions += WasmFile->functions().size(); + + // Memory + if (WasmFile->memories().size() > 1) + fatal(File->getName() + ": contains more than one memory"); + } +} + +void Writer::calculateImports() { + for (Symbol *Sym : Symtab->getSymbols()) { + if (!Sym->isUndefined() || Sym->isWeak()) + continue; + + if (Sym->isFunction()) { + Sym->setOutputIndex(FunctionImports.size()); + FunctionImports.push_back(Sym); + } else { + Sym->setOutputIndex(GlobalImports.size()); + GlobalImports.push_back(Sym); + } + } +} + +uint32_t Writer::getTypeIndex(const WasmSignature &Sig) { + auto Pair = TypeIndices.insert(std::make_pair(Sig, Types.size())); + if (Pair.second) + Types.push_back(&Sig); + return Pair.first->second; +} + +void Writer::calculateTypes() { + for (ObjFile *File : Symtab->ObjectFiles) { + File->TypeMap.reserve(File->getWasmObj()->types().size()); + for (const WasmSignature &Sig : File->getWasmObj()->types()) + File->TypeMap.push_back(getTypeIndex(Sig)); + } +} + +void Writer::assignSymbolIndexes() { + uint32_t GlobalIndex = GlobalImports.size(); + + if (Config->StackPointerSymbol) { + DefinedGlobals.emplace_back(Config->StackPointerSymbol); + Config->StackPointerSymbol->setOutputIndex(GlobalIndex++); + } + + if (Config->EmitRelocs) + DefinedGlobals.reserve(Symtab->getSymbols().size()); + + uint32_t TableIndex = InitialTableOffset; + + for (ObjFile *File : Symtab->ObjectFiles) { + DEBUG(dbgs() << "assignSymbolIndexes: " << File->getName() << "\n"); + + for (Symbol *Sym : File->getSymbols()) { + // Assign indexes for symbols defined with this file. + if (!Sym->isDefined() || File != Sym->getFile()) + continue; + if (Sym->isFunction()) { + auto *Obj = cast<ObjFile>(Sym->getFile()); + Sym->setOutputIndex(Obj->FunctionIndexOffset + + Sym->getFunctionIndex()); + } else if (Config->EmitRelocs) { + DefinedGlobals.emplace_back(Sym); + Sym->setOutputIndex(GlobalIndex++); + } + } + + for (Symbol *Sym : File->getTableSymbols()) { + if (!Sym->hasTableIndex()) { + Sym->setTableIndex(TableIndex++); + IndirectFunctions.emplace_back(Sym); + } + } + } +} + +static StringRef getOutputDataSegmentName(StringRef Name) { + if (Config->Relocatable) + return Name; + + for (StringRef V : + {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", + ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", + ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) { + StringRef Prefix = V.drop_back(); + if (Name.startswith(V) || Name == Prefix) + return Prefix; + } + + return Name; +} + +void Writer::createOutputSegments() { + for (ObjFile *File : Symtab->ObjectFiles) { + for (InputSegment *Segment : File->Segments) { + StringRef Name = getOutputDataSegmentName(Segment->getName()); + OutputSegment *&S = SegmentMap[Name]; + if (S == nullptr) { + DEBUG(dbgs() << "new segment: " << Name << "\n"); + S = make<OutputSegment>(Name); + Segments.push_back(S); + } + S->addInputSegment(Segment); + DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + } + } +} + +void Writer::run() { + if (!Config->Relocatable) + InitialTableOffset = 1; + + log("-- calculateTypes"); + calculateTypes(); + log("-- calculateImports"); + calculateImports(); + log("-- calculateOffsets"); + calculateOffsets(); + + if (errorHandler().Verbose) { + log("Defined Functions: " + Twine(NumFunctions)); + log("Defined Globals : " + Twine(DefinedGlobals.size())); + log("Function Imports : " + Twine(FunctionImports.size())); + log("Global Imports : " + Twine(GlobalImports.size())); + log("Total Imports : " + + Twine(FunctionImports.size() + GlobalImports.size())); + for (ObjFile *File : Symtab->ObjectFiles) + File->dumpInfo(); + } + + log("-- assignSymbolIndexes"); + assignSymbolIndexes(); + log("-- layoutMemory"); + layoutMemory(); + + createHeader(); + log("-- createSections"); + createSections(); + + log("-- openFile"); + openFile(); + if (errorCount()) + return; + + writeHeader(); + + log("-- writeSections"); + writeSections(); + if (errorCount()) + return; + + if (Error E = Buffer->commit()) + fatal("failed to write the output file: " + toString(std::move(E))); +} + +// Open a result file. +void Writer::openFile() { + log("writing: " + Config->OutputFile); + ::remove(Config->OutputFile.str().c_str()); + + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Config->OutputFile, FileSize, + FileOutputBuffer::F_executable); + + if (!BufferOrErr) + error("failed to open " + Config->OutputFile + ": " + + toString(BufferOrErr.takeError())); + else + Buffer = std::move(*BufferOrErr); +} + +void Writer::createHeader() { + raw_string_ostream OS(Header); + writeBytes(OS, WasmMagic, sizeof(WasmMagic), "wasm magic"); + writeU32(OS, WasmVersion, "wasm version"); + OS.flush(); + FileSize += Header.size(); +} + +void lld::wasm::writeResult() { Writer().run(); } diff --git a/wasm/Writer.h b/wasm/Writer.h new file mode 100644 index 0000000000000..a931ba9c29a89 --- /dev/null +++ b/wasm/Writer.h @@ -0,0 +1,21 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_WRITER_H +#define LLD_WASM_WRITER_H + +namespace lld { +namespace wasm { + +void writeResult(); + +} // namespace wasm +} // namespace lld + +#endif diff --git a/wasm/WriterUtils.cpp b/wasm/WriterUtils.cpp new file mode 100644 index 0000000000000..5bdf0d2e3f657 --- /dev/null +++ b/wasm/WriterUtils.cpp @@ -0,0 +1,215 @@ +//===- WriterUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "WriterUtils.h" + +#include "lld/Common/ErrorHandler.h" + +#include "llvm/Support/Debug.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/LEB128.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace llvm::wasm; +using namespace lld::wasm; + +static const char *valueTypeToString(int32_t Type) { + switch (Type) { + case WASM_TYPE_I32: + return "i32"; + case WASM_TYPE_I64: + return "i64"; + case WASM_TYPE_F32: + return "f32"; + case WASM_TYPE_F64: + return "f64"; + default: + llvm_unreachable("invalid value type"); + } +} + +namespace lld { + +void wasm::debugWrite(uint64_t offset, Twine msg) { + DEBUG(dbgs() << format(" | %08" PRIx64 ": ", offset) << msg << "\n"); +} + +void wasm::writeUleb128(raw_ostream &OS, uint32_t Number, const char *msg) { + if (msg) + debugWrite(OS.tell(), msg + formatv(" [{0:x}]", Number)); + encodeULEB128(Number, OS); +} + +void wasm::writeSleb128(raw_ostream &OS, int32_t Number, const char *msg) { + if (msg) + debugWrite(OS.tell(), msg + formatv(" [{0:x}]", Number)); + encodeSLEB128(Number, OS); +} + +void wasm::writeBytes(raw_ostream &OS, const char *bytes, size_t count, + const char *msg) { + if (msg) + debugWrite(OS.tell(), msg + formatv(" [data[{0}]]", count)); + OS.write(bytes, count); +} + +void wasm::writeStr(raw_ostream &OS, const StringRef String, const char *msg) { + if (msg) + debugWrite(OS.tell(), + msg + formatv(" [str[{0}]: {1}]", String.size(), String)); + writeUleb128(OS, String.size(), nullptr); + writeBytes(OS, String.data(), String.size()); +} + +void wasm::writeU8(raw_ostream &OS, uint8_t byte, const char *msg) { + OS << byte; +} + +void wasm::writeU32(raw_ostream &OS, uint32_t Number, const char *msg) { + debugWrite(OS.tell(), msg + formatv("[{0:x}]", Number)); + support::endian::Writer<support::little>(OS).write(Number); +} + +void wasm::writeValueType(raw_ostream &OS, int32_t Type, const char *msg) { + debugWrite(OS.tell(), msg + formatv("[type: {0}]", valueTypeToString(Type))); + writeSleb128(OS, Type, nullptr); +} + +void wasm::writeSig(raw_ostream &OS, const WasmSignature &Sig) { + writeSleb128(OS, WASM_TYPE_FUNC, "signature type"); + writeUleb128(OS, Sig.ParamTypes.size(), "param count"); + for (int32_t ParamType : Sig.ParamTypes) { + writeValueType(OS, ParamType, "param type"); + } + if (Sig.ReturnType == WASM_TYPE_NORESULT) { + writeUleb128(OS, 0, "result count"); + } else { + writeUleb128(OS, 1, "result count"); + writeValueType(OS, Sig.ReturnType, "result type"); + } +} + +void wasm::writeInitExpr(raw_ostream &OS, const WasmInitExpr &InitExpr) { + writeU8(OS, InitExpr.Opcode, "opcode"); + switch (InitExpr.Opcode) { + case WASM_OPCODE_I32_CONST: + writeSleb128(OS, InitExpr.Value.Int32, "literal (i32)"); + break; + case WASM_OPCODE_I64_CONST: + writeSleb128(OS, InitExpr.Value.Int64, "literal (i64)"); + break; + case WASM_OPCODE_GET_GLOBAL: + writeUleb128(OS, InitExpr.Value.Global, "literal (global index)"); + break; + default: + fatal("unknown opcode in init expr: " + Twine(InitExpr.Opcode)); + } + writeU8(OS, WASM_OPCODE_END, "opcode:end"); +} + +void wasm::writeLimits(raw_ostream &OS, const WasmLimits &Limits) { + writeUleb128(OS, Limits.Flags, "limits flags"); + writeUleb128(OS, Limits.Initial, "limits initial"); + if (Limits.Flags & WASM_LIMITS_FLAG_HAS_MAX) + writeUleb128(OS, Limits.Maximum, "limits max"); +} + +void wasm::writeGlobal(raw_ostream &OS, const WasmGlobal &Global) { + writeValueType(OS, Global.Type, "global type"); + writeUleb128(OS, Global.Mutable, "global mutable"); + writeInitExpr(OS, Global.InitExpr); +} + +void wasm::writeImport(raw_ostream &OS, const WasmImport &Import) { + writeStr(OS, Import.Module, "import module name"); + writeStr(OS, Import.Field, "import field name"); + writeU8(OS, Import.Kind, "import kind"); + switch (Import.Kind) { + case WASM_EXTERNAL_FUNCTION: + writeUleb128(OS, Import.SigIndex, "import sig index"); + break; + case WASM_EXTERNAL_GLOBAL: + writeValueType(OS, Import.Global.Type, "import global type"); + writeUleb128(OS, Import.Global.Mutable, "import global mutable"); + break; + case WASM_EXTERNAL_MEMORY: + writeLimits(OS, Import.Memory); + break; + default: + fatal("unsupported import type: " + Twine(Import.Kind)); + } +} + +void wasm::writeExport(raw_ostream &OS, const WasmExport &Export) { + writeStr(OS, Export.Name, "export name"); + writeU8(OS, Export.Kind, "export kind"); + switch (Export.Kind) { + case WASM_EXTERNAL_FUNCTION: + writeUleb128(OS, Export.Index, "function index"); + break; + case WASM_EXTERNAL_GLOBAL: + writeUleb128(OS, Export.Index, "global index"); + break; + case WASM_EXTERNAL_MEMORY: + writeUleb128(OS, Export.Index, "memory index"); + break; + default: + fatal("unsupported export type: " + Twine(Export.Kind)); + } +} + +void wasm::writeReloc(raw_ostream &OS, const OutputRelocation &Reloc) { + writeUleb128(OS, Reloc.Reloc.Type, "reloc type"); + writeUleb128(OS, Reloc.Reloc.Offset, "reloc offset"); + writeUleb128(OS, Reloc.NewIndex, "reloc index"); + + switch (Reloc.Reloc.Type) { + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + writeUleb128(OS, Reloc.Reloc.Addend, "reloc addend"); + break; + default: + break; + } +} + +} // namespace lld + +std::string lld::toString(ValType Type) { + switch (Type) { + case ValType::I32: + return "I32"; + case ValType::I64: + return "I64"; + case ValType::F32: + return "F32"; + case ValType::F64: + return "F64"; + } + llvm_unreachable("Invalid wasm::ValType"); +} + +std::string lld::toString(const WasmSignature &Sig) { + SmallString<128> S("("); + for (uint32_t Type : Sig.ParamTypes) { + if (S.size() != 1) + S += ", "; + S += toString(static_cast<ValType>(Type)); + } + S += ") -> "; + if (Sig.ReturnType == WASM_TYPE_NORESULT) + S += "void"; + else + S += toString(static_cast<ValType>(Sig.ReturnType)); + return S.str(); +} diff --git a/wasm/WriterUtils.h b/wasm/WriterUtils.h new file mode 100644 index 0000000000000..c1ed90793f78b --- /dev/null +++ b/wasm/WriterUtils.h @@ -0,0 +1,78 @@ +//===- WriterUtils.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_WRITERUTILS_H +#define LLD_WASM_WRITERUTILS_H + +#include "llvm/ADT/Twine.h" +#include "llvm/Object/Wasm.h" +#include "llvm/Support/raw_ostream.h" + +using llvm::raw_ostream; + +// Needed for WasmSignatureDenseMapInfo +inline bool operator==(const llvm::wasm::WasmSignature &LHS, + const llvm::wasm::WasmSignature &RHS) { + return LHS.ReturnType == RHS.ReturnType && LHS.ParamTypes == RHS.ParamTypes; +} + +inline bool operator!=(const llvm::wasm::WasmSignature &LHS, + const llvm::wasm::WasmSignature &RHS) { + return !(LHS == RHS); +} + +namespace lld { +namespace wasm { + +struct OutputRelocation { + llvm::wasm::WasmRelocation Reloc; + uint32_t NewIndex; + uint32_t Value; +}; + +void debugWrite(uint64_t offset, llvm::Twine msg); + +void writeUleb128(raw_ostream &OS, uint32_t Number, const char *msg); + +void writeSleb128(raw_ostream &OS, int32_t Number, const char *msg); + +void writeBytes(raw_ostream &OS, const char *bytes, size_t count, + const char *msg = nullptr); + +void writeStr(raw_ostream &OS, const llvm::StringRef String, + const char *msg = nullptr); + +void writeU8(raw_ostream &OS, uint8_t byte, const char *msg); + +void writeU32(raw_ostream &OS, uint32_t Number, const char *msg); + +void writeValueType(raw_ostream &OS, int32_t Type, const char *msg); + +void writeSig(raw_ostream &OS, const llvm::wasm::WasmSignature &Sig); + +void writeInitExpr(raw_ostream &OS, const llvm::wasm::WasmInitExpr &InitExpr); + +void writeLimits(raw_ostream &OS, const llvm::wasm::WasmLimits &Limits); + +void writeGlobal(raw_ostream &OS, const llvm::wasm::WasmGlobal &Global); + +void writeImport(raw_ostream &OS, const llvm::wasm::WasmImport &Import); + +void writeExport(raw_ostream &OS, const llvm::wasm::WasmExport &Export); + +void writeReloc(raw_ostream &OS, const OutputRelocation &Reloc); + +} // namespace wasm + +std::string toString(const llvm::wasm::ValType Type); +std::string toString(const llvm::wasm::WasmSignature &Sig); + +} // namespace lld + +#endif // LLD_WASM_WRITERUTILS_H |