diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:08:33 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:08:33 +0000 |
commit | 20d35e67e67f106f617c939725101223211659f0 (patch) | |
tree | 64eb963cbf5ba58765e0a6b64a440965d66a7a4d /wasm | |
parent | ae1a339de31cf4065777531959a11e55a2e5fa00 (diff) |
Notes
Diffstat (limited to 'wasm')
-rw-r--r-- | wasm/CMakeLists.txt | 16 | ||||
-rw-r--r-- | wasm/Config.h | 24 | ||||
-rw-r--r-- | wasm/Driver.cpp | 374 | ||||
-rw-r--r-- | wasm/InputChunks.cpp | 295 | ||||
-rw-r--r-- | wasm/InputChunks.h | 236 | ||||
-rw-r--r-- | wasm/InputFiles.cpp | 462 | ||||
-rw-r--r-- | wasm/InputFiles.h | 95 | ||||
-rw-r--r-- | wasm/InputGlobal.h | 59 | ||||
-rw-r--r-- | wasm/InputSegment.cpp | 25 | ||||
-rw-r--r-- | wasm/InputSegment.h | 76 | ||||
-rw-r--r-- | wasm/LTO.cpp | 155 | ||||
-rw-r--r-- | wasm/LTO.h | 57 | ||||
-rw-r--r-- | wasm/MarkLive.cpp | 118 | ||||
-rw-r--r-- | wasm/MarkLive.h | 21 | ||||
-rw-r--r-- | wasm/Options.td | 119 | ||||
-rw-r--r-- | wasm/OutputSections.cpp | 287 | ||||
-rw-r--r-- | wasm/OutputSections.h | 50 | ||||
-rw-r--r-- | wasm/OutputSegment.h | 28 | ||||
-rw-r--r-- | wasm/SymbolTable.cpp | 390 | ||||
-rw-r--r-- | wasm/SymbolTable.h | 43 | ||||
-rw-r--r-- | wasm/Symbols.cpp | 240 | ||||
-rw-r--r-- | wasm/Symbols.h | 337 | ||||
-rw-r--r-- | wasm/Writer.cpp | 954 | ||||
-rw-r--r-- | wasm/WriterUtils.cpp | 113 | ||||
-rw-r--r-- | wasm/WriterUtils.h | 46 |
25 files changed, 3308 insertions, 1312 deletions
diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 19b0d168437c..1a9e09b38429 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -2,10 +2,16 @@ set(LLVM_TARGET_DEFINITIONS Options.td) tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(WasmOptionsTableGen) +if(NOT LLD_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + add_lld_library(lldWasm Driver.cpp + InputChunks.cpp InputFiles.cpp - InputSegment.cpp + LTO.cpp + MarkLive.cpp OutputSections.cpp SymbolTable.cpp Symbols.cpp @@ -17,10 +23,16 @@ add_lld_library(lldWasm BinaryFormat Core Demangle + LTO + MC Object Option Support LINK_LIBS lldCommon - ) + + DEPENDS + WasmOptionsTableGen + ${tablegen_deps} + )
\ No newline at end of file diff --git a/wasm/Config.h b/wasm/Config.h index 82f49ce175bb..76a780567072 100644 --- a/wasm/Config.h +++ b/wasm/Config.h @@ -13,33 +13,43 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/Wasm.h" - -#include "Symbols.h" - -using llvm::wasm::WasmGlobal; +#include "llvm/Support/CachePruning.h" namespace lld { namespace wasm { struct Configuration { bool AllowUndefined; - bool CheckSignatures; + bool CompressRelocTargets; bool Demangle; - bool EmitRelocs; + bool DisableVerify; + bool ExportAll; + bool ExportTable; + bool GcSections; bool ImportMemory; + bool ImportTable; + bool MergeDataSegments; + bool PrintGcSections; bool Relocatable; + bool SaveTemps; bool StripAll; bool StripDebug; + bool StackFirst; uint32_t GlobalBase; uint32_t InitialMemory; uint32_t MaxMemory; uint32_t ZStackSize; + unsigned LTOPartitions; + unsigned LTOO; + unsigned Optimize; + unsigned ThinLTOJobs; llvm::StringRef Entry; llvm::StringRef OutputFile; + llvm::StringRef ThinLTOCacheDir; llvm::StringSet<> AllowUndefinedSymbols; std::vector<llvm::StringRef> SearchPaths; - Symbol *StackPointerSymbol = nullptr; + llvm::CachePruningPolicy ThinLTOCachePolicy; }; // The only instance of Configuration struct. diff --git a/wasm/Driver.cpp b/wasm/Driver.cpp index 97ec262be308..329b5ae80a9c 100644 --- a/wasm/Driver.cpp +++ b/wasm/Driver.cpp @@ -9,11 +9,15 @@ #include "lld/Common/Driver.h" #include "Config.h" +#include "InputChunks.h" +#include "InputGlobal.h" +#include "MarkLive.h" #include "SymbolTable.h" #include "Writer.h" #include "lld/Common/Args.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/Twine.h" @@ -22,6 +26,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/TargetSelect.h" + +#define DEBUG_TYPE "lld" using namespace llvm; using namespace llvm::sys; @@ -30,14 +37,9 @@ using namespace llvm::wasm; using namespace lld; using namespace lld::wasm; -namespace { +Configuration *lld::wasm::Config; -// Parses command line options. -class WasmOptTable : public llvm::opt::OptTable { -public: - WasmOptTable(); - llvm::opt::InputArgList parse(ArrayRef<const char *> Argv); -}; +namespace { // Create enum with OPT_xxx values for each option in Options.td enum { @@ -47,24 +49,36 @@ enum { #undef OPTION }; +// This function is called on startup. We need this for LTO since +// LTO calls LLVM functions to compile bitcode files to native code. +// Technically this can be delayed until we read bitcode files, but +// we don't bother to do lazily because the initialization is fast. +static void initLLVM() { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); +} + class LinkerDriver { public: void link(ArrayRef<const char *> ArgsArr); private: - void createFiles(llvm::opt::InputArgList &Args); + void createFiles(opt::InputArgList &Args); void addFile(StringRef Path); void addLibrary(StringRef Name); + + // True if we are in --whole-archive and --no-whole-archive. + bool InWholeArchive = false; + std::vector<InputFile *> Files; }; - } // anonymous namespace -Configuration *lld::wasm::Config; - bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Error) { - errorHandler().LogName = Args[0]; + errorHandler().LogName = sys::path::filename(Args[0]); errorHandler().ErrorOS = &Error; errorHandler().ColorDiagnostics = Error.has_colors(); errorHandler().ErrorLimitExceededMsg = @@ -74,6 +88,7 @@ bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly, Config = make<Configuration>(); Symtab = make<SymbolTable>(); + initLLVM(); LinkerDriver().link(Args); // Exit immediately if we don't need to return to the caller. @@ -86,8 +101,6 @@ bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly, return !errorCount(); } -// Create OptTable - // Create prefix string literals used in Options.td #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; #include "Options.inc" @@ -102,6 +115,14 @@ static const opt::OptTable::Info OptInfo[] = { #undef OPTION }; +namespace { +class WasmOptTable : public llvm::opt::OptTable { +public: + WasmOptTable() : OptTable(OptInfo) {} + opt::InputArgList parse(ArrayRef<const char *> Argv); +}; +} // namespace + // Set color diagnostics according to -color-diagnostics={auto,always,never} // or -no-color-diagnostics flags. static void handleColorDiagnostics(opt::InputArgList &Args) { @@ -109,19 +130,18 @@ static void handleColorDiagnostics(opt::InputArgList &Args) { OPT_no_color_diagnostics); if (!Arg) return; - - if (Arg->getOption().getID() == OPT_color_diagnostics) + if (Arg->getOption().getID() == OPT_color_diagnostics) { errorHandler().ColorDiagnostics = true; - else if (Arg->getOption().getID() == OPT_no_color_diagnostics) + } else if (Arg->getOption().getID() == OPT_no_color_diagnostics) { errorHandler().ColorDiagnostics = false; - else { + } else { StringRef S = Arg->getValue(); if (S == "always") errorHandler().ColorDiagnostics = true; - if (S == "never") + else if (S == "never") errorHandler().ColorDiagnostics = false; - if (S != "auto") - error("unknown option: -color-diagnostics=" + S); + else if (S != "auto") + error("unknown option: --color-diagnostics=" + S); } } @@ -134,25 +154,15 @@ static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) { return None; } -// Inject a new undefined symbol into the link. This will cause the link to -// fail unless this symbol can be found. -static void addSyntheticUndefinedFunction(StringRef Name, - const WasmSignature *Type) { - log("injecting undefined func: " + Name); - Symtab->addUndefinedFunction(Name, Type); -} - -static void printHelp(const char *Argv0) { - WasmOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false); -} - -WasmOptTable::WasmOptTable() : OptTable(OptInfo) {} - opt::InputArgList WasmOptTable::parse(ArrayRef<const char *> Argv) { SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); unsigned MissingIndex; unsigned MissingCount; + + // Expand response files (arguments in the form of @<filename>) + cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Vec); + opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount); handleColorDiagnostics(Args); @@ -161,16 +171,80 @@ opt::InputArgList WasmOptTable::parse(ArrayRef<const char *> Argv) { return Args; } +// Currently we allow a ".imports" to live alongside a library. This can +// be used to specify a list of symbols which can be undefined at link +// time (imported from the environment. For example libc.a include an +// import file that lists the syscall functions it relies on at runtime. +// In the long run this information would be better stored as a symbol +// attribute/flag in the object file itself. +// See: https://github.com/WebAssembly/tool-conventions/issues/35 +static void readImportFile(StringRef Filename) { + if (Optional<MemoryBufferRef> Buf = readFile(Filename)) + for (StringRef Sym : args::getLines(*Buf)) + Config->AllowUndefinedSymbols.insert(Sym); +} + +// Returns slices of MB by parsing MB as an archive file. +// Each slice consists of a member file in the archive. +std::vector<MemoryBufferRef> static getArchiveMembers( + MemoryBufferRef MB) { + std::unique_ptr<Archive> File = + CHECK(Archive::create(MB), + MB.getBufferIdentifier() + ": failed to parse archive"); + + std::vector<MemoryBufferRef> V; + Error Err = Error::success(); + for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { + Archive::Child C = + CHECK(COrErr, MB.getBufferIdentifier() + + ": could not get the child of the archive"); + MemoryBufferRef MBRef = + CHECK(C.getMemoryBufferRef(), + MB.getBufferIdentifier() + + ": could not get the buffer for a child of the archive"); + V.push_back(MBRef); + } + if (Err) + fatal(MB.getBufferIdentifier() + ": Archive::children failed: " + + toString(std::move(Err))); + + // Take ownership of memory buffers created for members of thin archives. + for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers()) + make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); + + return V; +} + void LinkerDriver::addFile(StringRef Path) { Optional<MemoryBufferRef> Buffer = readFile(Path); if (!Buffer.hasValue()) return; MemoryBufferRef MBRef = *Buffer; - if (identify_magic(MBRef.getBuffer()) == file_magic::archive) + switch (identify_magic(MBRef.getBuffer())) { + case file_magic::archive: { + // Handle -whole-archive. + if (InWholeArchive) { + for (MemoryBufferRef &M : getArchiveMembers(MBRef)) + Files.push_back(createObjectFile(M)); + return; + } + + SmallString<128> ImportFile = Path; + path::replace_extension(ImportFile, ".imports"); + if (fs::exists(ImportFile)) + readImportFile(ImportFile.str()); + Files.push_back(make<ArchiveFile>(MBRef)); - else - Files.push_back(make<ObjFile>(MBRef)); + return; + } + case file_magic::bitcode: + case file_magic::wasm_object: + Files.push_back(createObjectFile(MBRef)); + break; + default: + error("unknown file type: " + MBRef.getBufferIdentifier()); + } } // Add a given library by searching it from input search paths. @@ -194,11 +268,14 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { case OPT_INPUT: addFile(Arg->getValue()); break; + case OPT_whole_archive: + InWholeArchive = true; + break; + case OPT_no_whole_archive: + InWholeArchive = false; + break; } } - - if (Files.empty()) - error("no input files"); } static StringRef getEntry(opt::InputArgList &Args, StringRef Default) { @@ -210,13 +287,71 @@ static StringRef getEntry(opt::InputArgList &Args, StringRef Default) { return Arg->getValue(); } +static const uint8_t UnreachableFn[] = { + 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, + 0x00 /* opcode unreachable */, 0x0b /* opcode end */ +}; + +// For weak undefined functions, there may be "call" instructions that reference +// the symbol. In this case, we need to synthesise a dummy/stub function that +// will abort at runtime, so that relocations can still provided an operand to +// the call instruction that passes Wasm validation. +static void handleWeakUndefines() { + for (Symbol *Sym : Symtab->getSymbols()) { + if (!Sym->isUndefined() || !Sym->isWeak()) + continue; + auto *FuncSym = dyn_cast<FunctionSymbol>(Sym); + if (!FuncSym) + continue; + + // It is possible for undefined functions not to have a signature (eg. if + // added via "--undefined"), but weak undefined ones do have a signature. + assert(FuncSym->FunctionType); + const WasmSignature &Sig = *FuncSym->FunctionType; + + // Add a synthetic dummy for weak undefined functions. These dummies will + // be GC'd if not used as the target of any "call" instructions. + Optional<std::string> SymName = demangleItanium(Sym->getName()); + StringRef DebugName = + Saver.save("undefined function " + + (SymName ? StringRef(*SymName) : Sym->getName())); + SyntheticFunction *Func = + make<SyntheticFunction>(Sig, Sym->getName(), DebugName); + Func->setBody(UnreachableFn); + // Ensure it compares equal to the null pointer, and so that table relocs + // don't pull in the stub body (only call-operand relocs should do that). + Func->setTableIndex(0); + Symtab->SyntheticFunctions.emplace_back(Func); + // Hide our dummy to prevent export. + uint32_t Flags = WASM_SYMBOL_VISIBILITY_HIDDEN; + replaceSymbol<DefinedFunction>(Sym, Sym->getName(), Flags, nullptr, Func); + } +} + +// Force Sym to be entered in the output. Used for -u or equivalent. +static Symbol *addUndefined(StringRef Name) { + Symbol *S = Symtab->addUndefinedFunction(Name, 0, nullptr, nullptr); + + // Since symbol S may not be used inside the program, LTO may + // eliminate it. Mark the symbol as "used" to prevent it. + S->IsUsedInRegularObj = true; + + return S; +} + void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { WasmOptTable Parser; opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); // Handle --help if (Args.hasArg(OPT_help)) { - printHelp(ArgsArr[0]); + Parser.PrintHelp(outs(), ArgsArr[0], "LLVM Linker", false); + return; + } + + // Handle --version + if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) { + outs() << getLLDVersion() << "\n"; return; } @@ -229,26 +364,40 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { errorHandler().ErrorLimit = args::getInteger(Args, OPT_error_limit, 20); - if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) { - outs() << getLLDVersion() << "\n"; - return; - } - Config->AllowUndefined = Args.hasArg(OPT_allow_undefined); - Config->CheckSignatures = - Args.hasFlag(OPT_check_signatures, OPT_no_check_signatures, false); - Config->EmitRelocs = Args.hasArg(OPT_emit_relocs); + Config->Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, true); + Config->DisableVerify = Args.hasArg(OPT_disable_verify); Config->Entry = getEntry(Args, Args.hasArg(OPT_relocatable) ? "" : "_start"); + Config->ExportAll = Args.hasArg(OPT_export_all); + Config->ExportTable = Args.hasArg(OPT_export_table); + errorHandler().FatalWarnings = + Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); Config->ImportMemory = Args.hasArg(OPT_import_memory); + Config->ImportTable = Args.hasArg(OPT_import_table); + Config->LTOO = args::getInteger(Args, OPT_lto_O, 2); + Config->LTOPartitions = args::getInteger(Args, OPT_lto_partitions, 1); + Config->Optimize = args::getInteger(Args, OPT_O, 0); Config->OutputFile = Args.getLastArgValue(OPT_o); Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->GcSections = + Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, !Config->Relocatable); + Config->MergeDataSegments = + Args.hasFlag(OPT_merge_data_segments, OPT_no_merge_data_segments, + !Config->Relocatable); + Config->PrintGcSections = + Args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); + Config->SaveTemps = Args.hasArg(OPT_save_temps); Config->SearchPaths = args::getStrings(Args, OPT_L); Config->StripAll = Args.hasArg(OPT_strip_all); Config->StripDebug = Args.hasArg(OPT_strip_debug); + Config->StackFirst = Args.hasArg(OPT_stack_first); + Config->ThinLTOCacheDir = Args.getLastArgValue(OPT_thinlto_cache_dir); + Config->ThinLTOCachePolicy = CHECK( + parseCachePruningPolicy(Args.getLastArgValue(OPT_thinlto_cache_policy)), + "--thinlto-cache-policy: invalid cache policy"); + Config->ThinLTOJobs = args::getInteger(Args, OPT_thinlto_jobs, -1u); errorHandler().Verbose = Args.hasArg(OPT_verbose); ThreadsEnabled = Args.hasFlag(OPT_threads, OPT_no_threads, true); - if (Config->Relocatable) - Config->EmitRelocs = true; Config->InitialMemory = args::getInteger(Args, OPT_initial_memory, 0); Config->GlobalBase = args::getInteger(Args, OPT_global_base, 1024); @@ -256,33 +405,72 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { Config->ZStackSize = args::getZOptionValue(Args, OPT_z, "stack-size", WasmPageSize); + Config->CompressRelocTargets = Config->Optimize > 0 && !Config->Relocatable; + + if (Config->LTOO > 3) + error("invalid optimization level for LTO: " + Twine(Config->LTOO)); + if (Config->LTOPartitions == 0) + error("--lto-partitions: number of threads must be > 0"); + if (Config->ThinLTOJobs == 0) + error("--thinlto-jobs: number of threads must be > 0"); + if (auto *Arg = Args.getLastArg(OPT_allow_undefined_file)) - if (Optional<MemoryBufferRef> Buf = readFile(Arg->getValue())) - for (StringRef Sym : args::getLines(*Buf)) - Config->AllowUndefinedSymbols.insert(Sym); + readImportFile(Arg->getValue()); + + if (!Args.hasArg(OPT_INPUT)) { + error("no input files"); + return; + } if (Config->OutputFile.empty()) error("no output file specified"); - if (!Args.hasArg(OPT_INPUT)) - error("no input files"); + if (Config->ImportTable && Config->ExportTable) + error("--import-table and --export-table may not be used together"); - if (Config->Relocatable && !Config->Entry.empty()) - error("entry point specified for relocatable output file"); - if (Config->Relocatable && Args.hasArg(OPT_undefined)) - error("undefined symbols specified for relocatable output file"); + if (Config->Relocatable) { + if (!Config->Entry.empty()) + error("entry point specified for relocatable output file"); + if (Config->GcSections) + error("-r and --gc-sections may not be used together"); + if (Args.hasArg(OPT_undefined)) + error("-r -and --undefined may not be used together"); + } + Symbol *EntrySym = nullptr; if (!Config->Relocatable) { - if (!Config->Entry.empty()) { - static WasmSignature Signature = {{}, WASM_TYPE_NORESULT}; - addSyntheticUndefinedFunction(Config->Entry, &Signature); - } + llvm::wasm::WasmGlobal Global; + Global.Type = {WASM_TYPE_I32, true}; + Global.InitExpr.Value.Int32 = 0; + Global.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + Global.SymbolName = "__stack_pointer"; + InputGlobal *StackPointer = make<InputGlobal>(Global, nullptr); + StackPointer->Live = true; + + static WasmSignature NullSignature = {{}, WASM_TYPE_NORESULT}; + + // Add synthetic symbols before any others + WasmSym::CallCtors = Symtab->addSyntheticFunction( + "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN, + make<SyntheticFunction>(NullSignature, "__wasm_call_ctors")); + // TODO(sbc): Remove WASM_SYMBOL_VISIBILITY_HIDDEN when the mutable global + // spec proposal is implemented in all major browsers. + // See: https://github.com/WebAssembly/mutable-global + WasmSym::StackPointer = Symtab->addSyntheticGlobal( + "__stack_pointer", WASM_SYMBOL_VISIBILITY_HIDDEN, StackPointer); + WasmSym::HeapBase = Symtab->addSyntheticDataSymbol("__heap_base", 0); + WasmSym::DsoHandle = Symtab->addSyntheticDataSymbol( + "__dso_handle", WASM_SYMBOL_VISIBILITY_HIDDEN); + WasmSym::DataEnd = Symtab->addSyntheticDataSymbol("__data_end", 0); + + // For now, since we don't actually use the start function as the + // wasm start symbol, we don't need to care about it signature. + if (!Config->Entry.empty()) + EntrySym = addUndefined(Config->Entry); // Handle the `--undefined <sym>` options. - for (StringRef S : args::getStrings(Args, OPT_undefined)) - addSyntheticUndefinedFunction(S, nullptr); - - Config->StackPointerSymbol = Symtab->addDefinedGlobal("__stack_pointer"); + for (auto *Arg : Args.filtered(OPT_undefined)) + addUndefined(Arg->getValue()); } createFiles(Args); @@ -293,29 +481,59 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // symbols that we need to the symbol table. for (InputFile *F : Files) Symtab->addFile(F); + if (errorCount()) + return; + + // Add synthetic dummies for weak undefined functions. + if (!Config->Relocatable) + handleWeakUndefines(); + + // Handle --export. + for (auto *Arg : Args.filtered(OPT_export)) { + StringRef Name = Arg->getValue(); + Symbol *Sym = Symtab->find(Name); + if (Sym && Sym->isDefined()) + Sym->ForceExport = true; + else if (!Config->AllowUndefined) + error("symbol exported via --export not found: " + Name); + } + + // Do link-time optimization if given files are LLVM bitcode files. + // This compiles bitcode files into real object files. + Symtab->addCombinedLTOObject(); + if (errorCount()) + return; // Make sure we have resolved all symbols. if (!Config->Relocatable && !Config->AllowUndefined) { Symtab->reportRemainingUndefines(); } else { - // When we allow undefined symbols we cannot include those defined in - // -u/--undefined since these undefined symbols have only names and no - // function signature, which means they cannot be written to the final - // output. - for (StringRef S : args::getStrings(Args, OPT_undefined)) { - Symbol *Sym = Symtab->find(S); + // Even when using --allow-undefined we still want to report the absence of + // our initial set of undefined symbols (i.e. the entry point and symbols + // specified via --undefined). + // Part of the reason for this is that these function don't have signatures + // so which means they cannot be written as wasm function imports. + for (auto *Arg : Args.filtered(OPT_undefined)) { + Symbol *Sym = Symtab->find(Arg->getValue()); if (!Sym->isDefined()) - error("function forced with --undefined not found: " + Sym->getName()); + error("symbol forced with --undefined not found: " + Sym->getName()); } + if (EntrySym && !EntrySym->isDefined()) + error("entry symbol not defined (pass --no-entry to supress): " + + EntrySym->getName()); } if (errorCount()) return; - if (!Config->Entry.empty() && !Symtab->find(Config->Entry)->isDefined()) - error("entry point not found: " + Config->Entry); + if (EntrySym) + EntrySym->setHidden(false); + if (errorCount()) return; + // Do size optimizations: garbage collection + markLive(); + // Write the result to the file. writeResult(); } diff --git a/wasm/InputChunks.cpp b/wasm/InputChunks.cpp new file mode 100644 index 000000000000..fcefac7d99b8 --- /dev/null +++ b/wasm/InputChunks.cpp @@ -0,0 +1,295 @@ +//===- InputChunks.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputChunks.h" +#include "Config.h" +#include "OutputSegment.h" +#include "WriterUtils.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/LLVM.h" +#include "llvm/Support/LEB128.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace llvm::wasm; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::wasm; + +static StringRef ReloctTypeToString(uint8_t RelocType) { + switch (RelocType) { +#define WASM_RELOC(NAME, REL) case REL: return #NAME; +#include "llvm/BinaryFormat/WasmRelocs.def" +#undef WASM_RELOC + } + llvm_unreachable("unknown reloc type"); +} + +std::string lld::toString(const InputChunk *C) { + return (toString(C->File) + ":(" + C->getName() + ")").str(); +} + +StringRef InputChunk::getComdatName() const { + uint32_t Index = getComdat(); + if (Index == UINT32_MAX) + return StringRef(); + return File->getWasmObj()->linkingData().Comdats[Index]; +} + +void InputChunk::copyRelocations(const WasmSection &Section) { + if (Section.Relocations.empty()) + return; + size_t Start = getInputSectionOffset(); + size_t Size = getInputSize(); + for (const WasmRelocation &R : Section.Relocations) + if (R.Offset >= Start && R.Offset < Start + Size) + Relocations.push_back(R); +} + +void InputChunk::verifyRelocTargets() const { + for (const WasmRelocation &Rel : Relocations) { + uint32_t ExistingValue; + unsigned BytesRead = 0; + uint32_t Offset = Rel.Offset - getInputSectionOffset(); + const uint8_t *Loc = data().data() + Offset; + switch (Rel.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + ExistingValue = decodeULEB128(Loc, &BytesRead); + break; + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + ExistingValue = static_cast<uint32_t>(decodeSLEB128(Loc, &BytesRead)); + break; + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + ExistingValue = static_cast<uint32_t>(read32le(Loc)); + break; + default: + llvm_unreachable("unknown relocation type"); + } + + if (BytesRead && BytesRead != 5) + warn("expected LEB at relocation site be 5-byte padded"); + uint32_t ExpectedValue = File->calcExpectedValue(Rel); + if (ExpectedValue != ExistingValue) + warn("unexpected existing value for " + ReloctTypeToString(Rel.Type) + + ": existing=" + Twine(ExistingValue) + + " expected=" + Twine(ExpectedValue)); + } +} + +// Copy this input chunk to an mmap'ed output file and apply relocations. +void InputChunk::writeTo(uint8_t *Buf) const { + // Copy contents + memcpy(Buf + OutputOffset, data().data(), data().size()); + + // Apply relocations + if (Relocations.empty()) + return; + +#ifndef NDEBUG + verifyRelocTargets(); +#endif + + LLVM_DEBUG(dbgs() << "applying relocations: " << getName() + << " count=" << Relocations.size() << "\n"); + int32_t Off = OutputOffset - getInputSectionOffset(); + + for (const WasmRelocation &Rel : Relocations) { + uint8_t *Loc = Buf + Rel.Offset + Off; + uint32_t Value = File->calcNewValue(Rel); + LLVM_DEBUG(dbgs() << "apply reloc: type=" << ReloctTypeToString(Rel.Type) + << " addend=" << Rel.Addend << " index=" << Rel.Index + << " value=" << Value << " offset=" << Rel.Offset + << "\n"); + + switch (Rel.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + encodeULEB128(Value, Loc, 5); + break; + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + encodeSLEB128(static_cast<int32_t>(Value), Loc, 5); + break; + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + write32le(Loc, Value); + break; + default: + llvm_unreachable("unknown relocation type"); + } + } +} + +// Copy relocation entries to a given output stream. +// This function is used only when a user passes "-r". For a regular link, +// we consume relocations instead of copying them to an output file. +void InputChunk::writeRelocations(raw_ostream &OS) const { + if (Relocations.empty()) + return; + + int32_t Off = OutputOffset - getInputSectionOffset(); + LLVM_DEBUG(dbgs() << "writeRelocations: " << File->getName() + << " offset=" << Twine(Off) << "\n"); + + for (const WasmRelocation &Rel : Relocations) { + writeUleb128(OS, Rel.Type, "reloc type"); + writeUleb128(OS, Rel.Offset + Off, "reloc offset"); + writeUleb128(OS, File->calcNewIndex(Rel), "reloc index"); + + switch (Rel.Type) { + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + writeSleb128(OS, File->calcNewAddend(Rel), "reloc addend"); + break; + } + } +} + +void InputFunction::setFunctionIndex(uint32_t Index) { + LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << getName() + << " -> " << Index << "\n"); + assert(!hasFunctionIndex()); + FunctionIndex = Index; +} + +void InputFunction::setTableIndex(uint32_t Index) { + LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << getName() << " -> " + << Index << "\n"); + assert(!hasTableIndex()); + TableIndex = Index; +} + +// Write a relocation value without padding and return the number of bytes +// witten. +static unsigned writeCompressedReloc(uint8_t *Buf, const WasmRelocation &Rel, + uint32_t Value) { + switch (Rel.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + return encodeULEB128(Value, Buf); + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + return encodeSLEB128(static_cast<int32_t>(Value), Buf); + default: + llvm_unreachable("unexpected relocation type"); + } +} + +static unsigned getRelocWidthPadded(const WasmRelocation &Rel) { + switch (Rel.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + return 5; + default: + llvm_unreachable("unexpected relocation type"); + } +} + +static unsigned getRelocWidth(const WasmRelocation &Rel, uint32_t Value) { + uint8_t Buf[5]; + return writeCompressedReloc(Buf, Rel, Value); +} + +// Relocations of type LEB and SLEB in the code section are padded to 5 bytes +// so that a fast linker can blindly overwrite them without needing to worry +// about the number of bytes needed to encode the values. +// However, for optimal output the code section can be compressed to remove +// the padding then outputting non-relocatable files. +// In this case we need to perform a size calculation based on the value at each +// relocation. At best we end up saving 4 bytes for each relocation entry. +// +// This function only computes the final output size. It must be called +// before getSize() is used to calculate of layout of the code section. +void InputFunction::calculateSize() { + if (!File || !Config->CompressRelocTargets) + return; + + LLVM_DEBUG(dbgs() << "calculateSize: " << getName() << "\n"); + + const uint8_t *SecStart = File->CodeSection->Content.data(); + const uint8_t *FuncStart = SecStart + getInputSectionOffset(); + uint32_t FunctionSizeLength; + decodeULEB128(FuncStart, &FunctionSizeLength); + + uint32_t Start = getInputSectionOffset(); + uint32_t End = Start + Function->Size; + + uint32_t LastRelocEnd = Start + FunctionSizeLength; + for (WasmRelocation &Rel : Relocations) { + LLVM_DEBUG(dbgs() << " region: " << (Rel.Offset - LastRelocEnd) << "\n"); + CompressedFuncSize += Rel.Offset - LastRelocEnd; + CompressedFuncSize += getRelocWidth(Rel, File->calcNewValue(Rel)); + LastRelocEnd = Rel.Offset + getRelocWidthPadded(Rel); + } + LLVM_DEBUG(dbgs() << " final region: " << (End - LastRelocEnd) << "\n"); + CompressedFuncSize += End - LastRelocEnd; + + // Now we know how long the resulting function is we can add the encoding + // of its length + uint8_t Buf[5]; + CompressedSize = CompressedFuncSize + encodeULEB128(CompressedFuncSize, Buf); + + LLVM_DEBUG(dbgs() << " calculateSize orig: " << Function->Size << "\n"); + LLVM_DEBUG(dbgs() << " calculateSize new: " << CompressedSize << "\n"); +} + +// Override the default writeTo method so that we can (optionally) write the +// compressed version of the function. +void InputFunction::writeTo(uint8_t *Buf) const { + if (!File || !Config->CompressRelocTargets) + return InputChunk::writeTo(Buf); + + Buf += OutputOffset; + uint8_t *Orig = Buf; (void)Orig; + + const uint8_t *SecStart = File->CodeSection->Content.data(); + const uint8_t *FuncStart = SecStart + getInputSectionOffset(); + const uint8_t *End = FuncStart + Function->Size; + uint32_t Count; + decodeULEB128(FuncStart, &Count); + FuncStart += Count; + + LLVM_DEBUG(dbgs() << "write func: " << getName() << "\n"); + Buf += encodeULEB128(CompressedFuncSize, Buf); + const uint8_t *LastRelocEnd = FuncStart; + for (const WasmRelocation &Rel : Relocations) { + unsigned ChunkSize = (SecStart + Rel.Offset) - LastRelocEnd; + LLVM_DEBUG(dbgs() << " write chunk: " << ChunkSize << "\n"); + memcpy(Buf, LastRelocEnd, ChunkSize); + Buf += ChunkSize; + Buf += writeCompressedReloc(Buf, Rel, File->calcNewValue(Rel)); + LastRelocEnd = SecStart + Rel.Offset + getRelocWidthPadded(Rel); + } + + unsigned ChunkSize = End - LastRelocEnd; + LLVM_DEBUG(dbgs() << " write final chunk: " << ChunkSize << "\n"); + memcpy(Buf, LastRelocEnd, ChunkSize); + LLVM_DEBUG(dbgs() << " total: " << (Buf + ChunkSize - Orig) << "\n"); +} diff --git a/wasm/InputChunks.h b/wasm/InputChunks.h new file mode 100644 index 000000000000..526e29870b21 --- /dev/null +++ b/wasm/InputChunks.h @@ -0,0 +1,236 @@ +//===- InputChunks.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// An InputChunks represents an indivisible opaque region of a input wasm file. +// i.e. a single wasm data segment or a single wasm function. +// +// They are written directly to the mmap'd output file after which relocations +// are applied. Because each Chunk is independent they can be written in +// parallel. +// +// Chunks are also unit on which garbage collection (--gc-sections) operates. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_INPUT_CHUNKS_H +#define LLD_WASM_INPUT_CHUNKS_H + +#include "Config.h" +#include "InputFiles.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Object/Wasm.h" + +using llvm::object::WasmSection; +using llvm::object::WasmSegment; +using llvm::wasm::WasmFunction; +using llvm::wasm::WasmRelocation; +using llvm::wasm::WasmSignature; + +namespace llvm { +class raw_ostream; +} + +namespace lld { +namespace wasm { + +class ObjFile; +class OutputSegment; + +class InputChunk { +public: + enum Kind { DataSegment, Function, SyntheticFunction, Section }; + + Kind kind() const { return SectionKind; } + + virtual uint32_t getSize() const { return data().size(); } + + void copyRelocations(const WasmSection &Section); + + virtual void writeTo(uint8_t *SectionStart) const; + + ArrayRef<WasmRelocation> getRelocations() const { return Relocations; } + + virtual StringRef getName() const = 0; + virtual StringRef getDebugName() const = 0; + virtual uint32_t getComdat() const = 0; + StringRef getComdatName() const; + + size_t NumRelocations() const { return Relocations.size(); } + void writeRelocations(llvm::raw_ostream &OS) const; + + ObjFile *File; + int32_t OutputOffset = 0; + + // Signals that the section is part of the output. The garbage collector, + // and COMDAT handling can set a sections' Live bit. + // If GC is disabled, all sections start out as live by default. + unsigned Live : 1; + +protected: + InputChunk(ObjFile *F, Kind K) + : File(F), Live(!Config->GcSections), SectionKind(K) {} + virtual ~InputChunk() = default; + virtual ArrayRef<uint8_t> data() const = 0; + virtual uint32_t getInputSectionOffset() const = 0; + virtual uint32_t getInputSize() const { return getSize(); }; + + // Verifies the existing data at relocation targets matches our expectations. + // This is performed only debug builds as an extra sanity check. + void verifyRelocTargets() const; + + std::vector<WasmRelocation> Relocations; + Kind SectionKind; +}; + +// Represents a WebAssembly data segment which can be included as part of +// an output data segments. Note that in WebAssembly, unlike ELF and other +// formats, used the term "data segment" to refer to the continous regions of +// memory that make on the data section. See: +// https://webassembly.github.io/spec/syntax/modules.html#syntax-data +// +// For example, by default, clang will produce a separate data section for +// each global variable. +class InputSegment : public InputChunk { +public: + InputSegment(const WasmSegment &Seg, ObjFile *F) + : InputChunk(F, InputChunk::DataSegment), Segment(Seg) {} + + static bool classof(const InputChunk *C) { return C->kind() == DataSegment; } + + uint32_t getAlignment() const { return Segment.Data.Alignment; } + StringRef getName() const override { return Segment.Data.Name; } + StringRef getDebugName() const override { return StringRef(); } + uint32_t getComdat() const override { return Segment.Data.Comdat; } + + const OutputSegment *OutputSeg = nullptr; + int32_t OutputSegmentOffset = 0; + +protected: + ArrayRef<uint8_t> data() const override { return Segment.Data.Content; } + uint32_t getInputSectionOffset() const override { + return Segment.SectionOffset; + } + + const WasmSegment &Segment; +}; + +// Represents a single wasm function within and input file. These are +// combined to create the final output CODE section. +class InputFunction : public InputChunk { +public: + InputFunction(const WasmSignature &S, const WasmFunction *Func, ObjFile *F) + : InputChunk(F, InputChunk::Function), Signature(S), Function(Func) {} + + static bool classof(const InputChunk *C) { + return C->kind() == InputChunk::Function || + C->kind() == InputChunk::SyntheticFunction; + } + + void writeTo(uint8_t *SectionStart) const override; + StringRef getName() const override { return Function->SymbolName; } + StringRef getDebugName() const override { return Function->DebugName; } + uint32_t getComdat() const override { return Function->Comdat; } + uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); } + uint32_t getFunctionCodeOffset() const { return Function->CodeOffset; } + uint32_t getSize() const override { + if (Config->CompressRelocTargets && File) { + assert(CompressedSize); + return CompressedSize; + } + return data().size(); + } + uint32_t getFunctionIndex() const { return FunctionIndex.getValue(); } + bool hasFunctionIndex() const { return FunctionIndex.hasValue(); } + void setFunctionIndex(uint32_t Index); + uint32_t getTableIndex() const { return TableIndex.getValue(); } + bool hasTableIndex() const { return TableIndex.hasValue(); } + void setTableIndex(uint32_t Index); + + // The size of a given input function can depend on the values of the + // LEB relocations within it. This finalizeContents method is called after + // all the symbol values have be calcualted but before getSize() is ever + // called. + void calculateSize(); + + const WasmSignature &Signature; + +protected: + ArrayRef<uint8_t> data() const override { + assert(!Config->CompressRelocTargets); + return File->CodeSection->Content.slice(getInputSectionOffset(), + Function->Size); + } + + uint32_t getInputSize() const override { return Function->Size; } + + uint32_t getInputSectionOffset() const override { + return Function->CodeSectionOffset; + } + + const WasmFunction *Function; + llvm::Optional<uint32_t> FunctionIndex; + llvm::Optional<uint32_t> TableIndex; + uint32_t CompressedFuncSize = 0; + uint32_t CompressedSize = 0; +}; + +class SyntheticFunction : public InputFunction { +public: + SyntheticFunction(const WasmSignature &S, StringRef Name, + StringRef DebugName = {}) + : InputFunction(S, nullptr, nullptr), Name(Name), DebugName(DebugName) { + SectionKind = InputChunk::SyntheticFunction; + } + + static bool classof(const InputChunk *C) { + return C->kind() == InputChunk::SyntheticFunction; + } + + StringRef getName() const override { return Name; } + StringRef getDebugName() const override { return DebugName; } + uint32_t getComdat() const override { return UINT32_MAX; } + + void setBody(ArrayRef<uint8_t> Body_) { Body = Body_; } + +protected: + ArrayRef<uint8_t> data() const override { return Body; } + + StringRef Name; + StringRef DebugName; + ArrayRef<uint8_t> Body; +}; + +// Represents a single Wasm Section within an input file. +class InputSection : public InputChunk { +public: + InputSection(const WasmSection &S, ObjFile *F) + : InputChunk(F, InputChunk::Section), Section(S) { + assert(Section.Type == llvm::wasm::WASM_SEC_CUSTOM); + } + + StringRef getName() const override { return Section.Name; } + StringRef getDebugName() const override { return StringRef(); } + uint32_t getComdat() const override { return UINT32_MAX; } + +protected: + ArrayRef<uint8_t> data() const override { return Section.Content; } + + // Offset within the input section. This is only zero since this chunk + // type represents an entire input section, not part of one. + uint32_t getInputSectionOffset() const override { return 0; } + + const WasmSection &Section; +}; + +} // namespace wasm + +std::string toString(const wasm::InputChunk *); +} // namespace lld + +#endif // LLD_WASM_INPUT_CHUNKS_H diff --git a/wasm/InputFiles.cpp b/wasm/InputFiles.cpp index 1a1a6812c48e..53a24c3cffd4 100644 --- a/wasm/InputFiles.cpp +++ b/wasm/InputFiles.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" - #include "Config.h" -#include "InputSegment.h" +#include "InputChunks.h" +#include "InputGlobal.h" #include "SymbolTable.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" @@ -42,64 +42,126 @@ Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) { return MBRef; } -void ObjFile::dumpInfo() const { - log("reloc info for: " + getName() + "\n" + - " FunctionIndexOffset : " + Twine(FunctionIndexOffset) + "\n" + - " NumFunctionImports : " + Twine(NumFunctionImports()) + "\n" + - " NumGlobalImports : " + Twine(NumGlobalImports()) + "\n"); -} +InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) { + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::wasm_object) + return make<ObjFile>(MB); -bool ObjFile::isImportedFunction(uint32_t Index) const { - return Index < NumFunctionImports(); -} + if (Magic == file_magic::bitcode) + return make<BitcodeFile>(MB); -Symbol *ObjFile::getFunctionSymbol(uint32_t Index) const { - return FunctionSymbols[Index]; + fatal("unknown file type: " + MB.getBufferIdentifier()); } -Symbol *ObjFile::getTableSymbol(uint32_t Index) const { - return TableSymbols[Index]; -} - -Symbol *ObjFile::getGlobalSymbol(uint32_t Index) const { - return GlobalSymbols[Index]; -} - -uint32_t ObjFile::getRelocatedAddress(uint32_t Index) const { - return getGlobalSymbol(Index)->getVirtualAddress(); +void ObjFile::dumpInfo() const { + log("info for: " + getName() + + "\n Symbols : " + Twine(Symbols.size()) + + "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) + + "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals())); } -uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const { - Symbol *Sym = getFunctionSymbol(Original); - uint32_t Index = Sym->getOutputIndex(); - DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": " - << Original << " -> " << Index << "\n"); - return Index; +// Relocations contain either symbol or type indices. This function takes a +// relocation and returns relocated index (i.e. translates from the input +// sybmol/type space to the output symbol/type space). +uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const { + if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { + assert(TypeIsUsed[Reloc.Index]); + return TypeMap[Reloc.Index]; + } + return Symbols[Reloc.Index]->getOutputSymbolIndex(); } -uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const { - return TypeMap[Original]; +// Relocations can contain addend for combined sections. This function takes a +// relocation and returns updated addend by offset in the output section. +uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const { + switch (Reloc.Type) { + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + return Reloc.Addend; + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; + default: + llvm_unreachable("unexpected relocation type"); + } } -uint32_t ObjFile::relocateTableIndex(uint32_t Original) const { - Symbol *Sym = getTableSymbol(Original); - uint32_t Index = Sym->getTableIndex(); - DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original - << " -> " << Index << "\n"); - return Index; +// Calculate the value we expect to find at the relocation location. +// This is used as a sanity check before applying a relocation to a given +// location. It is useful for catching bugs in the compiler and linker. +uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const { + switch (Reloc.Type) { + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: { + const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; + return TableEntries[Sym.Info.ElementIndex]; + } + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: { + const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; + if (Sym.isUndefined()) + return 0; + const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment]; + return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset + + Reloc.Addend; + } + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { + return Sym->Function->getFunctionInputOffset() + + Sym->Function->getFunctionCodeOffset() + Reloc.Addend; + } + return 0; + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + return Reloc.Addend; + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + return Reloc.Index; + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { + const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index]; + return Sym.Info.ElementIndex; + } + default: + llvm_unreachable("unknown relocation type"); + } } -uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const { - Symbol *Sym = getGlobalSymbol(Original); - uint32_t Index = Sym->getOutputIndex(); - DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original - << " -> " << Index << "\n"); - return Index; +// Translate from the relocation's index into the final linked output value. +uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const { + switch (Reloc.Type) { + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + return getFunctionSymbol(Reloc.Index)->getTableIndex(); + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index))) + if (Sym->isLive()) + return Sym->getVirtualAddress() + Reloc.Addend; + return 0; + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + return TypeMap[Reloc.Index]; + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + return getFunctionSymbol(Reloc.Index)->getFunctionIndex(); + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + return getGlobalSymbol(Reloc.Index)->getGlobalIndex(); + case R_WEBASSEMBLY_FUNCTION_OFFSET_I32: + if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) { + return Sym->Function->OutputOffset + + Sym->Function->getFunctionCodeOffset() + Reloc.Addend; + } + return 0; + case R_WEBASSEMBLY_SECTION_OFFSET_I32: + return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend; + default: + llvm_unreachable("unknown relocation type"); + } } void ObjFile::parse() { // Parse a memory buffer as a wasm file. - DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); + LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this)); auto *Obj = dyn_cast<WasmObjectFile>(Bin.get()); @@ -111,156 +173,175 @@ void ObjFile::parse() { Bin.release(); WasmObj.reset(Obj); + // Build up a map of function indices to table indices for use when + // verifying the existing table index relocations + uint32_t TotalFunctions = + WasmObj->getNumImportedFunctions() + WasmObj->functions().size(); + TableEntries.resize(TotalFunctions); + for (const WasmElemSegment &Seg : WasmObj->elements()) { + if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST) + fatal(toString(this) + ": invalid table elements"); + uint32_t Offset = Seg.Offset.Value.Int32; + for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) { + + uint32_t FunctionIndex = Seg.Functions[Index]; + TableEntries[FunctionIndex] = Offset + Index; + } + } + // Find the code and data sections. Wasm objects can have at most one code // and one data section. + uint32_t SectionIndex = 0; for (const SectionRef &Sec : WasmObj->sections()) { const WasmSection &Section = WasmObj->getWasmSection(Sec); - if (Section.Type == WASM_SEC_CODE) + if (Section.Type == WASM_SEC_CODE) { CodeSection = &Section; - else if (Section.Type == WASM_SEC_DATA) + } else if (Section.Type == WASM_SEC_DATA) { DataSection = &Section; + } else if (Section.Type == WASM_SEC_CUSTOM) { + CustomSections.emplace_back(make<InputSection>(Section, this)); + CustomSections.back()->copyRelocations(Section); + CustomSectionsByIndex[SectionIndex] = CustomSections.back(); + } + SectionIndex++; } - initializeSymbols(); -} + TypeMap.resize(getWasmObj()->types().size()); + TypeIsUsed.resize(getWasmObj()->types().size(), false); -// Return the InputSegment in which a given symbol is defined. -InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) { - uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym); - for (InputSegment *Segment : Segments) { - if (Address >= Segment->startVA() && Address < Segment->endVA()) { - DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> " - << Segment->getName() << "\n"); + ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats; + UsedComdats.resize(Comdats.size()); + for (unsigned I = 0; I < Comdats.size(); ++I) + UsedComdats[I] = Symtab->addComdat(Comdats[I]); - return Segment; - } + // Populate `Segments`. + for (const WasmSegment &S : WasmObj->dataSegments()) { + InputSegment *Seg = make<InputSegment>(S, this); + Seg->copyRelocations(*DataSection); + Segments.emplace_back(Seg); + } + + // Populate `Functions`. + ArrayRef<WasmFunction> Funcs = WasmObj->functions(); + ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes(); + ArrayRef<WasmSignature> Types = WasmObj->types(); + Functions.reserve(Funcs.size()); + + for (size_t I = 0, E = Funcs.size(); I != E; ++I) { + InputFunction *F = + make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this); + F->copyRelocations(*CodeSection); + Functions.emplace_back(F); + } + + // Populate `Globals`. + for (const WasmGlobal &G : WasmObj->globals()) + Globals.emplace_back(make<InputGlobal>(G, this)); + + // Populate `Symbols` based on the WasmSymbols in the object. + Symbols.reserve(WasmObj->getNumberOfSymbols()); + for (const SymbolRef &Sym : WasmObj->symbols()) { + const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); + if (Symbol *Sym = createDefined(WasmSym)) + Symbols.push_back(Sym); + else + Symbols.push_back(createUndefined(WasmSym)); } - error("symbol not found in any segment: " + WasmSym.Name); - return nullptr; } -static void copyRelocationsRange(std::vector<WasmRelocation> &To, - ArrayRef<WasmRelocation> From, size_t Start, - size_t End) { - for (const WasmRelocation &R : From) - if (R.Offset >= Start && R.Offset < End) - To.push_back(R); +bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const { + uint32_t C = Chunk->getComdat(); + if (C == UINT32_MAX) + return false; + return !UsedComdats[C]; } -void ObjFile::initializeSymbols() { - Symbols.reserve(WasmObj->getNumberOfSymbols()); +FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const { + return cast<FunctionSymbol>(Symbols[Index]); +} - for (const WasmImport &Import : WasmObj->imports()) { - switch (Import.Kind) { - case WASM_EXTERNAL_FUNCTION: - ++FunctionImports; - break; - case WASM_EXTERNAL_GLOBAL: - ++GlobalImports; - break; - } - } +GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const { + return cast<GlobalSymbol>(Symbols[Index]); +} - FunctionSymbols.resize(FunctionImports + WasmObj->functions().size()); - GlobalSymbols.resize(GlobalImports + WasmObj->globals().size()); +SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const { + return cast<SectionSymbol>(Symbols[Index]); +} - for (const WasmSegment &S : WasmObj->dataSegments()) { - InputSegment *Seg = make<InputSegment>(&S, this); - copyRelocationsRange(Seg->Relocations, DataSection->Relocations, - Seg->getInputSectionOffset(), - Seg->getInputSectionOffset() + Seg->getSize()); - Segments.emplace_back(Seg); - } +DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const { + return cast<DataSymbol>(Symbols[Index]); +} - // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols - // in the object - for (const SymbolRef &Sym : WasmObj->symbols()) { - const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); - Symbol *S; - switch (WasmSym.Type) { - case WasmSymbol::SymbolType::FUNCTION_IMPORT: - case WasmSymbol::SymbolType::GLOBAL_IMPORT: - S = createUndefined(WasmSym); - break; - case WasmSymbol::SymbolType::GLOBAL_EXPORT: - S = createDefined(WasmSym, getSegment(WasmSym)); - break; - case WasmSymbol::SymbolType::FUNCTION_EXPORT: - S = createDefined(WasmSym); - break; - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: - // These are for debugging only, no need to create linker symbols for them - continue; - } +Symbol *ObjFile::createDefined(const WasmSymbol &Sym) { + if (!Sym.isDefined()) + return nullptr; - Symbols.push_back(S); - if (WasmSym.isFunction()) { - DEBUG(dbgs() << "Function: " << WasmSym.ElementIndex << " -> " - << toString(*S) << "\n"); - FunctionSymbols[WasmSym.ElementIndex] = S; - if (WasmSym.HasAltIndex) - FunctionSymbols[WasmSym.AltIndex] = S; - } else { - DEBUG(dbgs() << "Global: " << WasmSym.ElementIndex << " -> " - << toString(*S) << "\n"); - GlobalSymbols[WasmSym.ElementIndex] = S; - if (WasmSym.HasAltIndex) - GlobalSymbols[WasmSym.AltIndex] = S; + StringRef Name = Sym.Info.Name; + uint32_t Flags = Sym.Info.Flags; + + switch (Sym.Info.Kind) { + case WASM_SYMBOL_TYPE_FUNCTION: { + InputFunction *Func = + Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()]; + if (isExcludedByComdat(Func)) { + Func->Live = false; + return nullptr; } - } - DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I) - assert(FunctionSymbols[I] != nullptr); - for (size_t I = 0; I < GlobalSymbols.size(); ++I) - assert(GlobalSymbols[I] != nullptr);); - - // Populate `TableSymbols` with all symbols that are called indirectly - uint32_t SegmentCount = WasmObj->elements().size(); - if (SegmentCount) { - if (SegmentCount > 1) - fatal(getName() + ": contains more than one element segment"); - const WasmElemSegment &Segment = WasmObj->elements()[0]; - if (Segment.Offset.Opcode != WASM_OPCODE_I32_CONST) - fatal(getName() + ": unsupported element segment"); - if (Segment.TableIndex != 0) - fatal(getName() + ": unsupported table index in elem segment"); - if (Segment.Offset.Value.Int32 != 0) - fatal(getName() + ": unsupported element segment offset"); - TableSymbols.reserve(Segment.Functions.size()); - for (uint64_t FunctionIndex : Segment.Functions) - TableSymbols.push_back(getFunctionSymbol(FunctionIndex)); + if (Sym.isBindingLocal()) + return make<DefinedFunction>(Name, Flags, this, Func); + return Symtab->addDefinedFunction(Name, Flags, this, Func); } + case WASM_SYMBOL_TYPE_DATA: { + InputSegment *Seg = Segments[Sym.Info.DataRef.Segment]; + if (isExcludedByComdat(Seg)) { + Seg->Live = false; + return nullptr; + } - DEBUG(dbgs() << "TableSymbols: " << TableSymbols.size() << "\n"); - DEBUG(dbgs() << "Functions : " << FunctionSymbols.size() << "\n"); - DEBUG(dbgs() << "Globals : " << GlobalSymbols.size() << "\n"); -} + uint32_t Offset = Sym.Info.DataRef.Offset; + uint32_t Size = Sym.Info.DataRef.Size; -Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { - return Symtab->addUndefined(this, &Sym); + if (Sym.isBindingLocal()) + return make<DefinedData>(Name, Flags, this, Seg, Offset, Size); + return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size); + } + case WASM_SYMBOL_TYPE_GLOBAL: { + InputGlobal *Global = + Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()]; + if (Sym.isBindingLocal()) + return make<DefinedGlobal>(Name, Flags, this, Global); + return Symtab->addDefinedGlobal(Name, Flags, this, Global); + } + case WASM_SYMBOL_TYPE_SECTION: { + InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex]; + assert(Sym.isBindingLocal()); + return make<SectionSymbol>(Name, Flags, Section, this); + } + } + llvm_unreachable("unknown symbol kind"); } -Symbol *ObjFile::createDefined(const WasmSymbol &Sym, - const InputSegment *Segment) { - Symbol *S; - if (Sym.isLocal()) { - S = make<Symbol>(Sym.Name, true); - Symbol::Kind Kind; - if (Sym.Type == WasmSymbol::SymbolType::FUNCTION_EXPORT) - Kind = Symbol::Kind::DefinedFunctionKind; - else if (Sym.Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) - Kind = Symbol::Kind::DefinedGlobalKind; - else - llvm_unreachable("invalid local symbol type"); - S->update(Kind, this, &Sym, Segment); - return S; +Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) { + StringRef Name = Sym.Info.Name; + uint32_t Flags = Sym.Info.Flags; + + switch (Sym.Info.Kind) { + case WASM_SYMBOL_TYPE_FUNCTION: + return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType); + case WASM_SYMBOL_TYPE_DATA: + return Symtab->addUndefinedData(Name, Flags, this); + case WASM_SYMBOL_TYPE_GLOBAL: + return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType); + case WASM_SYMBOL_TYPE_SECTION: + llvm_unreachable("section symbols cannot be undefined"); } - return Symtab->addDefined(this, &Sym, Segment); + llvm_unreachable("unknown symbol kind"); } void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. - DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); + LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); File = CHECK(Archive::create(MB), toString(this)); // Read the symbol table to construct Lazy symbols. @@ -269,7 +350,7 @@ void ArchiveFile::parse() { Symtab->addLazy(this, &Sym); ++Count; } - DEBUG(dbgs() << "Read " << Count << " symbols\n"); + LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n"); } void ArchiveFile::addMember(const Archive::Symbol *Sym) { @@ -282,22 +363,59 @@ void ArchiveFile::addMember(const Archive::Symbol *Sym) { if (!Seen.insert(C.getChildOffset()).second) return; - DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); - DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); + LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n"); + LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); MemoryBufferRef MB = CHECK(C.getMemoryBufferRef(), "could not get the buffer for the member defining symbol " + Sym->getName()); - if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) { - error("unknown file type: " + MB.getBufferIdentifier()); + InputFile *Obj = createObjectFile(MB); + Obj->ArchiveName = getName(); + Symtab->addFile(Obj); +} + +static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { + switch (GvVisibility) { + case GlobalValue::DefaultVisibility: + return WASM_SYMBOL_VISIBILITY_DEFAULT; + case GlobalValue::HiddenVisibility: + case GlobalValue::ProtectedVisibility: + return WASM_SYMBOL_VISIBILITY_HIDDEN; + } + llvm_unreachable("unknown visibility"); +} + +static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym, + BitcodeFile &F) { + StringRef Name = Saver.save(ObjSym.getName()); + + uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; + Flags |= mapVisibility(ObjSym.getVisibility()); + + if (ObjSym.isUndefined()) { + if (ObjSym.isExecutable()) + return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr); + return Symtab->addUndefinedData(Name, Flags, &F); + } + + if (ObjSym.isExecutable()) + return Symtab->addDefinedFunction(Name, Flags, &F, nullptr); + return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0); +} + +void BitcodeFile::parse() { + Obj = check(lto::InputFile::create(MemoryBufferRef( + MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier())))); + Triple T(Obj->getTargetTriple()); + if (T.getArch() != Triple::wasm32) { + error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32"); return; } - InputFile *Obj = make<ObjFile>(MB); - Obj->ParentName = ParentName; - Symtab->addFile(Obj); + for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) + Symbols.push_back(createBitcodeSymbol(ObjSym, *this)); } // Returns a string in the format of "foo.o" or "foo.a(bar.o)". @@ -305,8 +423,8 @@ std::string lld::toString(const wasm::InputFile *File) { if (!File) return "<internal>"; - if (File->ParentName.empty()) + if (File->ArchiveName.empty()) return File->getName(); - return (File->ParentName + "(" + File->getName() + ")").str(); + return (File->ArchiveName + "(" + File->getName() + ")").str(); } diff --git a/wasm/InputFiles.h b/wasm/InputFiles.h index 158cc53cafb1..ec77446e6308 100644 --- a/wasm/InputFiles.h +++ b/wasm/InputFiles.h @@ -10,34 +10,46 @@ #ifndef LLD_WASM_INPUT_FILES_H #define LLD_WASM_INPUT_FILES_H +#include "Symbols.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/MemoryBuffer.h" - -#include "WriterUtils.h" - #include <vector> using llvm::object::Archive; using llvm::object::WasmObjectFile; using llvm::object::WasmSection; using llvm::object::WasmSymbol; +using llvm::wasm::WasmGlobal; using llvm::wasm::WasmImport; +using llvm::wasm::WasmRelocation; +using llvm::wasm::WasmSignature; + +namespace llvm { +namespace lto { +class InputFile; +} +} // namespace llvm namespace lld { namespace wasm { -class Symbol; +class InputChunk; +class InputFunction; class InputSegment; +class InputGlobal; +class InputSection; class InputFile { public: enum Kind { ObjectKind, ArchiveKind, + BitcodeKind, }; virtual ~InputFile() {} @@ -51,12 +63,17 @@ public: Kind kind() const { return FileKind; } // An archive file name if this file is created from an archive. - StringRef ParentName; + StringRef ArchiveName; + + ArrayRef<Symbol *> getSymbols() const { return Symbols; } protected: InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} MemoryBufferRef MB; + // List of all symbols referenced or defined by this file. + std::vector<Symbol *> Symbols; + private: const Kind FileKind; }; @@ -89,58 +106,54 @@ public: void dumpInfo() const; - uint32_t relocateTypeIndex(uint32_t Original) const; - uint32_t relocateFunctionIndex(uint32_t Original) const; - uint32_t relocateGlobalIndex(uint32_t Original) const; - uint32_t relocateTableIndex(uint32_t Original) const; - uint32_t getRelocatedAddress(uint32_t Index) const; - - // Returns true if the given function index is an imported function, - // as opposed to the locally defined function. - bool isImportedFunction(uint32_t Index) const; - - size_t NumFunctionImports() const { return FunctionImports; } - size_t NumGlobalImports() const { return GlobalImports; } + uint32_t calcNewIndex(const WasmRelocation &Reloc) const; + uint32_t calcNewValue(const WasmRelocation &Reloc) const; + uint32_t calcNewAddend(const WasmRelocation &Reloc) const; + uint32_t calcExpectedValue(const WasmRelocation &Reloc) const; - int32_t FunctionIndexOffset = 0; const WasmSection *CodeSection = nullptr; - std::vector<OutputRelocation> CodeRelocations; - int32_t CodeOffset = 0; const WasmSection *DataSection = nullptr; + // Maps input type indices to output type indices std::vector<uint32_t> TypeMap; + std::vector<bool> TypeIsUsed; + // Maps function indices to table indices + std::vector<uint32_t> TableEntries; + std::vector<bool> UsedComdats; std::vector<InputSegment *> Segments; + std::vector<InputFunction *> Functions; + std::vector<InputGlobal *> Globals; + std::vector<InputSection *> CustomSections; + llvm::DenseMap<uint32_t, InputSection *> CustomSectionsByIndex; - ArrayRef<Symbol *> getSymbols() { return Symbols; } - ArrayRef<Symbol *> getTableSymbols() { return TableSymbols; } + Symbol *getSymbol(uint32_t Index) const { return Symbols[Index]; } + FunctionSymbol *getFunctionSymbol(uint32_t Index) const; + DataSymbol *getDataSymbol(uint32_t Index) const; + GlobalSymbol *getGlobalSymbol(uint32_t Index) const; + SectionSymbol *getSectionSymbol(uint32_t Index) const; private: - Symbol *createDefined(const WasmSymbol &Sym, - const InputSegment *Segment = nullptr); + Symbol *createDefined(const WasmSymbol &Sym); Symbol *createUndefined(const WasmSymbol &Sym); - void initializeSymbols(); - InputSegment *getSegment(const WasmSymbol &WasmSym); - Symbol *getFunctionSymbol(uint32_t FunctionIndex) const; - Symbol *getTableSymbol(uint32_t TableIndex) const; - Symbol *getGlobalSymbol(uint32_t GlobalIndex) const; - // List of all symbols referenced or defined by this file. - std::vector<Symbol *> Symbols; - - // List of all function symbols indexed by the function index space - std::vector<Symbol *> FunctionSymbols; + bool isExcludedByComdat(InputChunk *Chunk) const; - // List of all global symbols indexed by the global index space - std::vector<Symbol *> GlobalSymbols; + std::unique_ptr<WasmObjectFile> WasmObj; +}; - // List of all indirect symbols indexed by table index space. - std::vector<Symbol *> TableSymbols; +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } - uint32_t GlobalImports = 0; - uint32_t FunctionImports = 0; - std::unique_ptr<WasmObjectFile> WasmObj; + void parse() override; + std::unique_ptr<llvm::lto::InputFile> Obj; }; +// Will report a fatal() error if the input buffer is not a valid bitcode +// or was object file. +InputFile *createObjectFile(MemoryBufferRef MB); + // Opens a given file. llvm::Optional<MemoryBufferRef> readFile(StringRef Path); diff --git a/wasm/InputGlobal.h b/wasm/InputGlobal.h new file mode 100644 index 000000000000..37d0ab903706 --- /dev/null +++ b/wasm/InputGlobal.h @@ -0,0 +1,59 @@ +//===- InputGlobal.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_INPUT_GLOBAL_H +#define LLD_WASM_INPUT_GLOBAL_H + +#include "Config.h" +#include "InputFiles.h" +#include "WriterUtils.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Object/Wasm.h" + +using llvm::wasm::WasmGlobal; +using llvm::wasm::WasmInitExpr; + +namespace lld { +namespace wasm { + +// Represents a single Wasm Global Variable within an input file. These are +// combined to form the final GLOBALS section. +class InputGlobal { +public: + InputGlobal(const WasmGlobal &G, ObjFile *F) + : File(F), Global(G), Live(!Config->GcSections) {} + + StringRef getName() const { return Global.SymbolName; } + const WasmGlobalType &getType() const { return Global.Type; } + + uint32_t getGlobalIndex() const { return GlobalIndex.getValue(); } + bool hasGlobalIndex() const { return GlobalIndex.hasValue(); } + void setGlobalIndex(uint32_t Index) { + assert(!hasGlobalIndex()); + GlobalIndex = Index; + } + + ObjFile *File; + WasmGlobal Global; + + bool Live = false; + +protected: + llvm::Optional<uint32_t> GlobalIndex; +}; + +} // namespace wasm + +inline std::string toString(const wasm::InputGlobal *G) { + return (toString(G->File) + ":(" + G->getName() + ")").str(); +} + +} // namespace lld + +#endif // LLD_WASM_INPUT_GLOBAL_H diff --git a/wasm/InputSegment.cpp b/wasm/InputSegment.cpp deleted file mode 100644 index 650914386259..000000000000 --- a/wasm/InputSegment.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===- InputSegment.cpp ---------------------------------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "InputSegment.h" -#include "OutputSegment.h" -#include "lld/Common/LLVM.h" - -#define DEBUG_TYPE "lld" - -using namespace llvm; -using namespace lld::wasm; - -uint32_t InputSegment::translateVA(uint32_t Address) const { - assert(Address >= startVA() && Address < endVA()); - int32_t Delta = OutputSeg->StartVA + OutputSegmentOffset - startVA(); - DEBUG(dbgs() << "translateVA: " << getName() << " Delta=" << Delta - << " Address=" << Address << "\n"); - return Address + Delta; -} diff --git a/wasm/InputSegment.h b/wasm/InputSegment.h deleted file mode 100644 index f70a3ded895e..000000000000 --- a/wasm/InputSegment.h +++ /dev/null @@ -1,76 +0,0 @@ -//===- InputSegment.h -------------------------------------------*- C++ -*-===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Represents a WebAssembly data segment which can be included as part of -// an output data segments. Note that in WebAssembly, unlike ELF and other -// formats, used the term "data segment" to refer to the continous regions of -// memory that make on the data section. See: -// https://webassembly.github.io/spec/syntax/modules.html#syntax-data -// -// For example, by default, clang will produce a separate data section for -// each global variable. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_WASM_INPUT_SEGMENT_H -#define LLD_WASM_INPUT_SEGMENT_H - -#include "WriterUtils.h" -#include "lld/Common/ErrorHandler.h" -#include "llvm/Object/Wasm.h" - -using llvm::object::WasmSegment; -using llvm::wasm::WasmRelocation; - -namespace lld { -namespace wasm { - -class ObjFile; -class OutputSegment; - -class InputSegment { -public: - InputSegment(const WasmSegment *Seg, const ObjFile *F) - : Segment(Seg), File(F) {} - - // Translate an offset in the input segment to an offset in the output - // segment. - uint32_t translateVA(uint32_t Address) const; - - const OutputSegment *getOutputSegment() const { return OutputSeg; } - - uint32_t getOutputSegmentOffset() const { return OutputSegmentOffset; } - - uint32_t getInputSectionOffset() const { return Segment->SectionOffset; } - - void setOutputSegment(const OutputSegment *Segment, uint32_t Offset) { - OutputSeg = Segment; - OutputSegmentOffset = Offset; - } - - uint32_t getSize() const { return Segment->Data.Content.size(); } - uint32_t getAlignment() const { return Segment->Data.Alignment; } - uint32_t startVA() const { return Segment->Data.Offset.Value.Int32; } - uint32_t endVA() const { return startVA() + getSize(); } - StringRef getName() const { return Segment->Data.Name; } - - const WasmSegment *Segment; - const ObjFile *File; - std::vector<WasmRelocation> Relocations; - std::vector<OutputRelocation> OutRelocations; - -protected: - const OutputSegment *OutputSeg = nullptr; - uint32_t OutputSegmentOffset = 0; -}; - -} // namespace wasm -} // namespace lld - -#endif // LLD_WASM_INPUT_SEGMENT_H diff --git a/wasm/LTO.cpp b/wasm/LTO.cpp new file mode 100644 index 000000000000..f15551da8b80 --- /dev/null +++ b/wasm/LTO.cpp @@ -0,0 +1,155 @@ +//===- LTO.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LTO.h" +#include "Config.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Strings.h" +#include "lld/Common/TargetOptionsCommandFlags.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Caching.h" +#include "llvm/LTO/Config.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <memory> +#include <string> +#include <system_error> +#include <vector> + +using namespace llvm; +using namespace llvm::object; + +using namespace lld; +using namespace lld::wasm; + +static std::unique_ptr<lto::LTO> createLTO() { + lto::Config C; + C.Options = InitTargetOptionsFromCodeGenFlags(); + + // Always emit a section per function/data with LTO. + C.Options.FunctionSections = true; + C.Options.DataSections = true; + + // Wasm currently only supports ThreadModel::Single + C.Options.ThreadModel = ThreadModel::Single; + + C.DisableVerify = Config->DisableVerify; + C.DiagHandler = diagnosticHandler; + C.OptLevel = Config->LTOO; + + if (Config->SaveTemps) + checkError(C.addSaveTemps(Config->OutputFile.str() + ".", + /*UseInputModulePath*/ true)); + + lto::ThinBackend Backend; + if (Config->ThinLTOJobs != -1U) + Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs); + return llvm::make_unique<lto::LTO>(std::move(C), Backend, + Config->LTOPartitions); +} + +BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} + +BitcodeCompiler::~BitcodeCompiler() = default; + +static void undefine(Symbol *S) { + if (isa<DefinedFunction>(S)) + replaceSymbol<UndefinedFunction>(S, S->getName(), 0); + else if (isa<DefinedData>(S)) + replaceSymbol<UndefinedData>(S, S->getName(), 0); + else + llvm_unreachable("unexpected symbol kind"); +} + +void BitcodeCompiler::add(BitcodeFile &F) { + lto::InputFile &Obj = *F.Obj; + unsigned SymNum = 0; + ArrayRef<Symbol *> Syms = F.getSymbols(); + std::vector<lto::SymbolResolution> Resols(Syms.size()); + + // Provide a resolution to the LTO API for each symbol. + for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) { + Symbol *Sym = Syms[SymNum]; + lto::SymbolResolution &R = Resols[SymNum]; + ++SymNum; + + // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile + // reports two symbols for module ASM defined. Without this check, lld + // flags an undefined in IR with a definition in ASM as prevailing. + // Once IRObjectFile is fixed to report only one symbol this hack can + // be removed. + R.Prevailing = !ObjSym.isUndefined() && Sym->getFile() == &F; + R.VisibleToRegularObj = Config->Relocatable || Sym->IsUsedInRegularObj || + (R.Prevailing && Sym->isExported()); + if (R.Prevailing) + undefine(Sym); + } + checkError(LTOObj->add(std::move(F.Obj), Resols)); +} + +// Merge all the bitcode files we have seen, codegen the result +// and return the resulting objects. +std::vector<StringRef> BitcodeCompiler::compile() { + unsigned MaxTasks = LTOObj->getMaxTasks(); + Buf.resize(MaxTasks); + Files.resize(MaxTasks); + + // The --thinlto-cache-dir option specifies the path to a directory in which + // to cache native object files for ThinLTO incremental builds. If a path was + // specified, configure LTO to use it as the cache directory. + lto::NativeObjectCache Cache; + if (!Config->ThinLTOCacheDir.empty()) + Cache = check( + lto::localCache(Config->ThinLTOCacheDir, + [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) { + Files[Task] = std::move(MB); + })); + + checkError(LTOObj->run( + [&](size_t Task) { + return llvm::make_unique<lto::NativeObjectStream>( + llvm::make_unique<raw_svector_ostream>(Buf[Task])); + }, + Cache)); + + if (!Config->ThinLTOCacheDir.empty()) + pruneCache(Config->ThinLTOCacheDir, Config->ThinLTOCachePolicy); + + std::vector<StringRef> Ret; + for (unsigned I = 0; I != MaxTasks; ++I) { + if (Buf[I].empty()) + continue; + if (Config->SaveTemps) { + if (I == 0) + saveBuffer(Buf[I], Config->OutputFile + ".lto.o"); + else + saveBuffer(Buf[I], Config->OutputFile + Twine(I) + ".lto.o"); + } + Ret.emplace_back(Buf[I].data(), Buf[I].size()); + } + + for (std::unique_ptr<MemoryBuffer> &File : Files) + if (File) + Ret.push_back(File->getBuffer()); + + return Ret; +} diff --git a/wasm/LTO.h b/wasm/LTO.h new file mode 100644 index 000000000000..cf726de5643a --- /dev/null +++ b/wasm/LTO.h @@ -0,0 +1,57 @@ +//===- LTO.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a way to combine bitcode files into one wasm +// file by compiling them using LLVM. +// +// If LTO is in use, your input files are not in regular wasm files +// but instead LLVM bitcode files. In that case, the linker has to +// convert bitcode files into the native format so that we can create +// a wasm file that contains native code. This file provides that +// functionality. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_LTO_H +#define LLD_WASM_LTO_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/SmallString.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace lto { +class LTO; +} +} // namespace llvm + +namespace lld { +namespace wasm { + +class BitcodeFile; +class InputFile; + +class BitcodeCompiler { +public: + BitcodeCompiler(); + ~BitcodeCompiler(); + + void add(BitcodeFile &F); + std::vector<StringRef> compile(); + +private: + std::unique_ptr<llvm::lto::LTO> LTOObj; + std::vector<SmallString<0>> Buf; + std::vector<std::unique_ptr<MemoryBuffer>> Files; +}; +} // namespace wasm +} // namespace lld + +#endif diff --git a/wasm/MarkLive.cpp b/wasm/MarkLive.cpp new file mode 100644 index 000000000000..dfaa712c3296 --- /dev/null +++ b/wasm/MarkLive.cpp @@ -0,0 +1,118 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements --gc-sections, which is a feature to remove unused +// chunks from the output. Unused chunks are those that are not reachable from +// known root symbols or chunks. This feature is implemented as a mark-sweep +// garbage collector. +// +// Here's how it works. Each InputChunk has a "Live" bit. The bit is off by +// default. Starting with the GC-roots, visit all reachable chunks and set their +// Live bits. The Writer will then ignore chunks whose Live bits are off, so +// that such chunk are not appear in the output. +// +//===----------------------------------------------------------------------===// + +#include "MarkLive.h" +#include "Config.h" +#include "InputChunks.h" +#include "InputGlobal.h" +#include "SymbolTable.h" +#include "Symbols.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace llvm::wasm; +using namespace lld; +using namespace lld::wasm; + +void lld::wasm::markLive() { + if (!Config->GcSections) + return; + + LLVM_DEBUG(dbgs() << "markLive\n"); + SmallVector<InputChunk *, 256> Q; + + auto Enqueue = [&](Symbol *Sym) { + if (!Sym || Sym->isLive()) + return; + LLVM_DEBUG(dbgs() << "markLive: " << Sym->getName() << "\n"); + Sym->markLive(); + if (InputChunk *Chunk = Sym->getChunk()) + Q.push_back(Chunk); + }; + + // Add GC root symbols. + if (!Config->Entry.empty()) + Enqueue(Symtab->find(Config->Entry)); + Enqueue(WasmSym::CallCtors); + + // We need to preserve any exported symbol + for (Symbol *Sym : Symtab->getSymbols()) + if (Sym->isExported()) + Enqueue(Sym); + + // The ctor functions are all used in the synthetic __wasm_call_ctors + // function, but since this function is created in-place it doesn't contain + // relocations which mean we have to manually mark the ctors. + for (const ObjFile *Obj : Symtab->ObjectFiles) { + const WasmLinkingData &L = Obj->getWasmObj()->linkingData(); + for (const WasmInitFunc &F : L.InitFunctions) + Enqueue(Obj->getFunctionSymbol(F.Symbol)); + } + + // Follow relocations to mark all reachable chunks. + while (!Q.empty()) { + InputChunk *C = Q.pop_back_val(); + + for (const WasmRelocation Reloc : C->getRelocations()) { + if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) + continue; + Symbol *Sym = C->File->getSymbol(Reloc.Index); + + // If the function has been assigned the special index zero in the table, + // the relocation doesn't pull in the function body, since the function + // won't actually go in the table (the runtime will trap attempts to call + // that index, since we don't use it). A function with a table index of + // zero is only reachable via "call", not via "call_indirect". The stub + // functions used for weak-undefined symbols have this behaviour (compare + // equal to null pointer, only reachable via direct call). + if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB || + Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32) { + FunctionSymbol *FuncSym = cast<FunctionSymbol>(Sym); + if (FuncSym->hasTableIndex() && FuncSym->getTableIndex() == 0) + continue; + } + + Enqueue(Sym); + } + } + + // Report garbage-collected sections. + if (Config->PrintGcSections) { + for (const ObjFile *Obj : Symtab->ObjectFiles) { + for (InputChunk *C : Obj->Functions) + if (!C->Live) + message("removing unused section " + toString(C)); + for (InputChunk *C : Obj->Segments) + if (!C->Live) + message("removing unused section " + toString(C)); + for (InputGlobal *G : Obj->Globals) + if (!G->Live) + message("removing unused section " + toString(G)); + } + for (InputChunk *C : Symtab->SyntheticFunctions) + if (!C->Live) + message("removing unused section " + toString(C)); + for (InputGlobal *G : Symtab->SyntheticGlobals) + if (!G->Live) + message("removing unused section " + toString(G)); + } +} diff --git a/wasm/MarkLive.h b/wasm/MarkLive.h new file mode 100644 index 000000000000..0b58f153ce45 --- /dev/null +++ b/wasm/MarkLive.h @@ -0,0 +1,21 @@ +//===- MarkLive.h -----------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_WASM_MARKLIVE_H +#define LLD_WASM_MARKLIVE_H + +namespace lld { +namespace wasm { + +void markLive(); + +} // namespace wasm +} // namespace lld + +#endif // LLD_WASM_MARKLIVE_H diff --git a/wasm/Options.td b/wasm/Options.td index df0c6d708072..43588a830e31 100644 --- a/wasm/Options.td +++ b/wasm/Options.td @@ -11,21 +11,47 @@ multiclass Eq<string name> { def _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>; } -def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, - HelpText<"Add a directory to the library search path">; +multiclass B<string name, string help1, string help2> { + def NAME: Flag<["--", "-"], name>, HelpText<help1>; + def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText<help2>; +} +// The follow flags are shared with the ELF linker def color_diagnostics: F<"color-diagnostics">, HelpText<"Use colors in diagnostics">; def color_diagnostics_eq: J<"color-diagnostics=">, - HelpText<"Use colors in diagnostics">; + HelpText<"Use colors in diagnostics; one of 'always', 'never', 'auto'">; + +defm demangle: B<"demangle", + "Demangle symbol names", + "Do not demangle symbol names">; + +def entry: S<"entry">, MetaVarName<"<entry>">, + HelpText<"Name of entry point symbol">; + +def error_limit: J<"error-limit=">, + HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; + +def fatal_warnings: F<"fatal-warnings">, + HelpText<"Treat warnings as errors">; + +defm gc_sections: B<"gc-sections", + "Enable garbage collection of unused sections", + "Disable garbage collection of unused sections">; + +defm merge_data_segments: B<"merge-data-segments", + "Enable merging data segments", + "Disable merging data segments">; -// The follow flags are shared with the ELF linker def help: F<"help">, HelpText<"Print option help">; def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">, HelpText<"Root name of library to use">; +def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Add a directory to the library search path">; + def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">; def no_threads: F<"no-threads">, @@ -34,70 +60,99 @@ def no_threads: F<"no-threads">, def no_color_diagnostics: F<"no-color-diagnostics">, HelpText<"Do not use colors in diagnostics">; -def no_check_signatures: F<"no-check-signatures">, HelpText<"Don't check function signatures">; +def no_fatal_warnings: F<"no-fatal-warnings">; def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, HelpText<"Path to file to write output">; -def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; +def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">; -def check_signatures: F<"check-signatures">, HelpText<"Check function signatures">; - -def v: Flag<["-"], "v">, HelpText<"Display the version number">; - -def version: F<"version">, HelpText<"Display the version number and exit">; - -def verbose: F<"verbose">, HelpText<"Verbose mode">; +defm print_gc_sections: B<"print-gc-sections", + "List removed unused sections", + "Do not list removed unused sections">; def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; -def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">; - def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; +def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; + defm undefined: Eq<"undefined">, HelpText<"Force undefined symbol during linking">; +def v: Flag<["-"], "v">, HelpText<"Display the version number">; + +def verbose: F<"verbose">, HelpText<"Verbose mode">; + +def version: F<"version">, HelpText<"Display the version number and exit">; + def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, HelpText<"Linker option extensions">; -def entry: S<"entry">, MetaVarName<"<entry>">, - HelpText<"Name of entry point symbol">; +// The follow flags are unique to wasm -def no_entry: F<"no-entry">, - HelpText<"Do not output any entry point">; +def allow_undefined: F<"allow-undefined">, + HelpText<"Allow undefined symbols in linked binary">; -def error_limit: J<"error-limit=">, - HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; +def allow_undefined_file: J<"allow-undefined-file=">, + HelpText<"Allow symbols listed in <file> to be undefined in linked binary">; -// The follow flags are unique to wasm +def allow_undefined_file_s: Separate<["-"], "allow-undefined-file">, + Alias<allow_undefined_file>; + +defm export: Eq<"export">, + HelpText<"Force a symbol to be exported">; + +def export_all: F<"export-all">, + HelpText<"Export all symbols (normally combined with --no-gc-sections)">; + +def export_table: F<"export-table">, + HelpText<"Export function table to the environment">; def global_base: J<"global-base=">, HelpText<"Where to start to place global data">; +def import_memory: F<"import-memory">, + HelpText<"Import memory from the environment">; + +def import_table: F<"import-table">, + HelpText<"Import function table from the environment">; + def initial_memory: J<"initial-memory=">, HelpText<"Initial size of the linear memory">; def max_memory: J<"max-memory=">, HelpText<"Maximum size of the linear memory">; -def import_memory: F<"import-memory">, - HelpText<"Import memory from the environment">; - -def allow_undefined: F<"allow-undefined">, - HelpText<"Allow undefined symbols in linked binary">; +def no_entry: F<"no-entry">, + HelpText<"Do not output any entry point">; -def allow_undefined_file: J<"allow-undefined-file=">, - HelpText<"Allow symbols listed in <file> to be undefined in linked binary">; +def stack_first: F<"stack-first">, + HelpText<"Place stack at start of linear memory rather than after data">; -def allow_undefined_file_s: Separate<["-"], "allow-undefined-file">, Alias<allow_undefined_file>; +defm whole_archive: B<"whole-archive", + "Force load of all members in a static library", + "Do not force load of all members in a static library (default)">; // Aliases +def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; +def alias_entry_entry: J<"entry=">, Alias<entry>; def alias_initial_memory_i: Flag<["-"], "i">, Alias<initial_memory>; def alias_max_memory_m: Flag<["-"], "m">, Alias<max_memory>; def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>; -def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; -def alias_entry_entry: J<"entry=">, Alias<entry>; def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>; + +// LTO-related options. +def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, + HelpText<"Optimization level for LTO">; +def lto_partitions: J<"lto-partitions=">, + HelpText<"Number of LTO codegen partitions">; +def disable_verify: F<"disable-verify">; +def save_temps: F<"save-temps">; +def thinlto_cache_dir: J<"thinlto-cache-dir=">, + HelpText<"Path to ThinLTO cached object file directory">; +defm thinlto_cache_policy: Eq<"thinlto-cache-policy">, + HelpText<"Pruning policy for the ThinLTO cache">; +def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; diff --git a/wasm/OutputSections.cpp b/wasm/OutputSections.cpp index a55538269065..256a9884f947 100644 --- a/wasm/OutputSections.cpp +++ b/wasm/OutputSections.cpp @@ -8,13 +8,11 @@ //===----------------------------------------------------------------------===// #include "OutputSections.h" - -#include "Config.h" +#include "InputChunks.h" #include "InputFiles.h" #include "OutputSegment.h" -#include "SymbolTable.h" +#include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Memory.h" #include "lld/Common/Threads.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/LEB128.h" @@ -26,12 +24,6 @@ using namespace llvm::wasm; using namespace lld; using namespace lld::wasm; -enum class RelocEncoding { - Uleb128, - Sleb128, - I32, -}; - static StringRef sectionTypeToString(uint32_t SectionType) { switch (SectionType) { case WASM_SEC_CUSTOM: @@ -63,159 +55,40 @@ static StringRef sectionTypeToString(uint32_t SectionType) { } } -std::string lld::toString(const OutputSection &Section) { - std::string rtn = Section.getSectionName(); - if (!Section.Name.empty()) - rtn += "(" + Section.Name + ")"; - return rtn; -} - -static void applyRelocation(uint8_t *Buf, const OutputRelocation &Reloc) { - DEBUG(dbgs() << "write reloc: type=" << Reloc.Reloc.Type - << " index=" << Reloc.Reloc.Index << " value=" << Reloc.Value - << " offset=" << Reloc.Reloc.Offset << "\n"); - Buf += Reloc.Reloc.Offset; - int64_t ExistingValue; - switch (Reloc.Reloc.Type) { - case R_WEBASSEMBLY_TYPE_INDEX_LEB: - case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: - ExistingValue = decodeULEB128(Buf); - if (ExistingValue != Reloc.Reloc.Index) { - DEBUG(dbgs() << "existing value: " << decodeULEB128(Buf) << "\n"); - assert(decodeULEB128(Buf) == Reloc.Reloc.Index); - } - LLVM_FALLTHROUGH; - case R_WEBASSEMBLY_MEMORY_ADDR_LEB: - case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: - encodeULEB128(Reloc.Value, Buf, 5); - break; - case R_WEBASSEMBLY_TABLE_INDEX_SLEB: - ExistingValue = decodeSLEB128(Buf); - if (ExistingValue != Reloc.Reloc.Index) { - DEBUG(dbgs() << "existing value: " << decodeSLEB128(Buf) << "\n"); - assert(decodeSLEB128(Buf) == Reloc.Reloc.Index); - } - LLVM_FALLTHROUGH; - case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: - encodeSLEB128(static_cast<int32_t>(Reloc.Value), Buf, 5); - break; - case R_WEBASSEMBLY_TABLE_INDEX_I32: - case R_WEBASSEMBLY_MEMORY_ADDR_I32: - support::endian::write32<support::little>(Buf, Reloc.Value); - break; - default: - llvm_unreachable("unknown relocation type"); - } -} - -static void applyRelocations(uint8_t *Buf, ArrayRef<OutputRelocation> Relocs) { - if (!Relocs.size()) - return; - log("applyRelocations: count=" + Twine(Relocs.size())); - for (const OutputRelocation &Reloc : Relocs) - applyRelocation(Buf, Reloc); -} - -// Relocations contain an index into the function, global or table index -// space of the input file. This function takes a relocation and returns the -// relocated index (i.e. translates from the input index space to the output -// index space). -static uint32_t calcNewIndex(const ObjFile &File, const WasmRelocation &Reloc) { - switch (Reloc.Type) { - case R_WEBASSEMBLY_TYPE_INDEX_LEB: - return File.relocateTypeIndex(Reloc.Index); - case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: - return File.relocateFunctionIndex(Reloc.Index); - case R_WEBASSEMBLY_TABLE_INDEX_I32: - case R_WEBASSEMBLY_TABLE_INDEX_SLEB: - return File.relocateTableIndex(Reloc.Index); - case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: - case R_WEBASSEMBLY_MEMORY_ADDR_LEB: - case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: - case R_WEBASSEMBLY_MEMORY_ADDR_I32: - return File.relocateGlobalIndex(Reloc.Index); - default: - llvm_unreachable("unknown relocation type"); - } -} - -// Take a vector of relocations from an input file and create output -// relocations based on them. Calculates the updated index and offset for -// each relocation as well as the value to write out in the final binary. -static void calcRelocations(const ObjFile &File, - ArrayRef<WasmRelocation> Relocs, - std::vector<OutputRelocation> &OutputRelocs, - int32_t OutputOffset) { - log("calcRelocations: " + File.getName() + " offset=" + Twine(OutputOffset)); - for (const WasmRelocation &Reloc : Relocs) { - OutputRelocation NewReloc; - NewReloc.Reloc = Reloc; - NewReloc.Reloc.Offset += OutputOffset; - DEBUG(dbgs() << "reloc: type=" << Reloc.Type << " index=" << Reloc.Index - << " offset=" << Reloc.Offset - << " newOffset=" << NewReloc.Reloc.Offset << "\n"); - - if (Config->EmitRelocs) - NewReloc.NewIndex = calcNewIndex(File, Reloc); - else - NewReloc.NewIndex = UINT32_MAX; - - switch (Reloc.Type) { - case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: - case R_WEBASSEMBLY_MEMORY_ADDR_I32: - case R_WEBASSEMBLY_MEMORY_ADDR_LEB: - NewReloc.Value = File.getRelocatedAddress(Reloc.Index); - if (NewReloc.Value != UINT32_MAX) - NewReloc.Value += Reloc.Addend; - break; - default: - NewReloc.Value = calcNewIndex(File, Reloc); - break; - } - - OutputRelocs.emplace_back(NewReloc); - } +// Returns a string, e.g. "FUNCTION(.text)". +std::string lld::toString(const OutputSection &Sec) { + if (!Sec.Name.empty()) + return (Sec.getSectionName() + "(" + Sec.Name + ")").str(); + return Sec.getSectionName(); } -std::string OutputSection::getSectionName() const { +StringRef OutputSection::getSectionName() const { return sectionTypeToString(Type); } -std::string SubSection::getSectionName() const { - return std::string("subsection <type=") + std::to_string(Type) + ">"; -} - void OutputSection::createHeader(size_t BodySize) { raw_string_ostream OS(Header); - debugWrite(OS.tell(), "section type [" + Twine(getSectionName()) + "]"); - writeUleb128(OS, Type, nullptr); + debugWrite(OS.tell(), "section type [" + getSectionName() + "]"); + encodeULEB128(Type, OS); writeUleb128(OS, BodySize, "section size"); OS.flush(); log("createHeader: " + toString(*this) + " body=" + Twine(BodySize) + " total=" + Twine(getSize())); } -CodeSection::CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs) - : OutputSection(WASM_SEC_CODE), InputObjects(Objs) { +CodeSection::CodeSection(ArrayRef<InputFunction *> Functions) + : OutputSection(WASM_SEC_CODE), Functions(Functions) { + assert(Functions.size() > 0); + raw_string_ostream OS(CodeSectionHeader); - writeUleb128(OS, NumFunctions, "function count"); + writeUleb128(OS, Functions.size(), "function count"); OS.flush(); BodySize = CodeSectionHeader.size(); - for (ObjFile *File : InputObjects) { - if (!File->CodeSection) - continue; - - File->CodeOffset = BodySize; - ArrayRef<uint8_t> Content = File->CodeSection->Content; - unsigned HeaderSize = 0; - decodeULEB128(Content.data(), &HeaderSize); - - calcRelocations(*File, File->CodeSection->Relocations, - File->CodeRelocations, BodySize - HeaderSize); - - size_t PayloadSize = Content.size() - HeaderSize; - BodySize += PayloadSize; + for (InputFunction *Func : Functions) { + Func->OutputOffset = BodySize; + Func->calculateSize(); + BodySize += Func->getSize(); } createHeader(BodySize); @@ -224,49 +97,32 @@ CodeSection::CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs) void CodeSection::writeTo(uint8_t *Buf) { log("writing " + toString(*this)); log(" size=" + Twine(getSize())); + log(" headersize=" + Twine(Header.size())); + log(" codeheadersize=" + Twine(CodeSectionHeader.size())); Buf += Offset; // Write section header memcpy(Buf, Header.data(), Header.size()); Buf += Header.size(); - uint8_t *ContentsStart = Buf; - // Write code section headers memcpy(Buf, CodeSectionHeader.data(), CodeSectionHeader.size()); - Buf += CodeSectionHeader.size(); // Write code section bodies - parallelForEach(InputObjects, [ContentsStart](ObjFile *File) { - if (!File->CodeSection) - return; - - ArrayRef<uint8_t> Content(File->CodeSection->Content); - - // Payload doesn't include the initial header (function count) - unsigned HeaderSize = 0; - decodeULEB128(Content.data(), &HeaderSize); - - size_t PayloadSize = Content.size() - HeaderSize; - memcpy(ContentsStart + File->CodeOffset, Content.data() + HeaderSize, - PayloadSize); - - log("applying relocations for: " + File->getName()); - applyRelocations(ContentsStart, File->CodeRelocations); - }); + parallelForEach(Functions, + [&](const InputChunk *Chunk) { Chunk->writeTo(Buf); }); } uint32_t CodeSection::numRelocations() const { uint32_t Count = 0; - for (ObjFile *File : InputObjects) - Count += File->CodeRelocations.size(); + for (const InputChunk *Func : Functions) + Count += Func->NumRelocations(); return Count; } void CodeSection::writeRelocations(raw_ostream &OS) const { - for (ObjFile *File : InputObjects) - for (const OutputRelocation &Reloc : File->CodeRelocations) - writeReloc(OS, Reloc); + for (const InputChunk *C : Functions) + C->writeRelocations(OS); } DataSection::DataSection(ArrayRef<OutputSegment *> Segments) @@ -285,18 +141,14 @@ DataSection::DataSection(ArrayRef<OutputSegment *> Segments) writeUleb128(OS, WASM_OPCODE_END, "opcode:end"); writeUleb128(OS, Segment->Size, "segment size"); OS.flush(); - Segment->setSectionOffset(BodySize); - BodySize += Segment->Header.size(); + + Segment->SectionOffset = BodySize; + BodySize += Segment->Header.size() + Segment->Size; log("Data segment: size=" + Twine(Segment->Size)); - for (InputSegment *InputSeg : Segment->InputSegments) { - uint32_t InputOffset = InputSeg->getInputSectionOffset(); - uint32_t OutputOffset = Segment->getSectionOffset() + - Segment->Header.size() + - InputSeg->getOutputSegmentOffset(); - calcRelocations(*InputSeg->File, InputSeg->Relocations, - InputSeg->OutRelocations, OutputOffset - InputOffset); - } - BodySize += Segment->Size; + + for (InputSegment *InputSeg : Segment->InputSegments) + InputSeg->OutputOffset = Segment->SectionOffset + Segment->Header.size() + + InputSeg->OutputSegmentOffset; } createHeader(BodySize); @@ -311,38 +163,77 @@ void DataSection::writeTo(uint8_t *Buf) { memcpy(Buf, Header.data(), Header.size()); Buf += Header.size(); - uint8_t *ContentsStart = Buf; - // Write data section headers memcpy(Buf, DataSectionHeader.data(), DataSectionHeader.size()); - parallelForEach(Segments, [ContentsStart](const OutputSegment *Segment) { + parallelForEach(Segments, [&](const OutputSegment *Segment) { // Write data segment header - uint8_t *SegStart = ContentsStart + Segment->getSectionOffset(); + uint8_t *SegStart = Buf + Segment->SectionOffset; memcpy(SegStart, Segment->Header.data(), Segment->Header.size()); // Write segment data payload - for (const InputSegment *Input : Segment->InputSegments) { - ArrayRef<uint8_t> Content(Input->Segment->Data.Content); - memcpy(SegStart + Segment->Header.size() + - Input->getOutputSegmentOffset(), - Content.data(), Content.size()); - applyRelocations(ContentsStart, Input->OutRelocations); - } + for (const InputChunk *Chunk : Segment->InputSegments) + Chunk->writeTo(Buf); }); } uint32_t DataSection::numRelocations() const { uint32_t Count = 0; for (const OutputSegment *Seg : Segments) - for (const InputSegment *InputSeg : Seg->InputSegments) - Count += InputSeg->OutRelocations.size(); + for (const InputChunk *InputSeg : Seg->InputSegments) + Count += InputSeg->NumRelocations(); return Count; } void DataSection::writeRelocations(raw_ostream &OS) const { for (const OutputSegment *Seg : Segments) - for (const InputSegment *InputSeg : Seg->InputSegments) - for (const OutputRelocation &Reloc : InputSeg->OutRelocations) - writeReloc(OS, Reloc); + for (const InputChunk *C : Seg->InputSegments) + C->writeRelocations(OS); +} + +CustomSection::CustomSection(std::string Name, + ArrayRef<InputSection *> InputSections) + : OutputSection(WASM_SEC_CUSTOM, Name), PayloadSize(0), + InputSections(InputSections) { + raw_string_ostream OS(NameData); + encodeULEB128(Name.size(), OS); + OS << Name; + OS.flush(); + + for (InputSection *Section : InputSections) { + Section->OutputOffset = PayloadSize; + PayloadSize += Section->getSize(); + } + + createHeader(PayloadSize + NameData.size()); +} + +void CustomSection::writeTo(uint8_t *Buf) { + log("writing " + toString(*this) + " size=" + Twine(getSize()) + + " chunks=" + Twine(InputSections.size())); + + assert(Offset); + Buf += Offset; + + // Write section header + memcpy(Buf, Header.data(), Header.size()); + Buf += Header.size(); + memcpy(Buf, NameData.data(), NameData.size()); + Buf += NameData.size(); + + // Write custom sections payload + parallelForEach(InputSections, + [&](const InputSection *Section) { Section->writeTo(Buf); }); +} + +uint32_t CustomSection::numRelocations() const { + uint32_t Count = 0; + for (const InputSection *InputSect : InputSections) + Count += InputSect->NumRelocations(); + return Count; +} + +void CustomSection::writeRelocations(raw_ostream &OS) const { + for (const InputSection *S : InputSections) + S->writeRelocations(OS); } diff --git a/wasm/OutputSections.h b/wasm/OutputSections.h index fc73f36ad286..189d6507c4b3 100644 --- a/wasm/OutputSections.h +++ b/wasm/OutputSections.h @@ -10,7 +10,7 @@ #ifndef LLD_WASM_OUTPUT_SECTIONS_H #define LLD_WASM_OUTPUT_SECTIONS_H -#include "InputSegment.h" +#include "InputChunks.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" #include "llvm/ADT/DenseMap.h" @@ -28,7 +28,6 @@ std::string toString(const wasm::OutputSection &Section); namespace wasm { class OutputSegment; -class ObjFile; class OutputSection { public: @@ -36,7 +35,7 @@ public: : Type(Type), Name(Name) {} virtual ~OutputSection() = default; - std::string getSectionName() const; + StringRef getSectionName() const; void setOffset(size_t NewOffset) { log("setOffset: " + toString(*this) + ": " + Twine(NewOffset)); Offset = NewOffset; @@ -61,7 +60,7 @@ public: SyntheticSection(uint32_t Type, std::string Name = "") : OutputSection(Type, Name), BodyOutputStream(Body) { if (!Name.empty()) - writeStr(BodyOutputStream, Name); + writeStr(BodyOutputStream, Name, "section name"); } void writeTo(uint8_t *Buf) override { @@ -86,32 +85,16 @@ protected: raw_string_ostream BodyOutputStream; }; -// Some synthetic sections (e.g. "name" and "linking") have subsections. -// Just like the synthetic sections themselves these need to be created before -// they can be written out (since they are preceded by their length). This -// class is used to create subsections and then write them into the stream -// of the parent section. -class SubSection : public SyntheticSection { -public: - explicit SubSection(uint32_t Type) : SyntheticSection(Type) {} - - std::string getSectionName() const; - void writeToStream(raw_ostream &OS) { - writeBytes(OS, Header.data(), Header.size()); - writeBytes(OS, Body.data(), Body.size()); - } -}; - class CodeSection : public OutputSection { public: - explicit CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs); + explicit CodeSection(ArrayRef<InputFunction *> Functions); size_t getSize() const override { return Header.size() + BodySize; } void writeTo(uint8_t *Buf) override; uint32_t numRelocations() const override; void writeRelocations(raw_ostream &OS) const override; protected: - ArrayRef<ObjFile *> InputObjects; + ArrayRef<InputFunction *> Functions; std::string CodeSectionHeader; size_t BodySize = 0; }; @@ -130,6 +113,29 @@ protected: size_t BodySize = 0; }; +// Represents a custom section in the output file. Wasm custom sections are +// used for storing user-defined metadata. Unlike the core sections types +// they are identified by their string name. +// The linker combines custom sections that have the same name by simply +// concatenating them. +// Note that some custom sections such as "name" and "linking" are handled +// separately and are instead synthesized by the linker. +class CustomSection : public OutputSection { +public: + CustomSection(std::string Name, ArrayRef<InputSection *> InputSections); + size_t getSize() const override { + return Header.size() + NameData.size() + PayloadSize; + } + void writeTo(uint8_t *Buf) override; + uint32_t numRelocations() const override; + void writeRelocations(raw_ostream &OS) const override; + +protected: + size_t PayloadSize; + ArrayRef<InputSection *> InputSections; + std::string NameData; +}; + } // namespace wasm } // namespace lld diff --git a/wasm/OutputSegment.h b/wasm/OutputSegment.h index a22c80234420..d5c89cd19f4c 100644 --- a/wasm/OutputSegment.h +++ b/wasm/OutputSegment.h @@ -10,7 +10,7 @@ #ifndef LLD_WASM_OUTPUT_SEGMENT_H #define LLD_WASM_OUTPUT_SEGMENT_H -#include "InputSegment.h" +#include "InputChunks.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Object/Wasm.h" @@ -21,21 +21,20 @@ class InputSegment; class OutputSegment { public: - OutputSegment(StringRef N) : Name(N) {} - - void addInputSegment(InputSegment *Segment) { - Alignment = std::max(Alignment, Segment->getAlignment()); - InputSegments.push_back(Segment); - Size = llvm::alignTo(Size, Segment->getAlignment()); - Segment->setOutputSegment(this, Size); - Size += Segment->getSize(); + OutputSegment(StringRef N, uint32_t Index) : Name(N), Index(Index) {} + + void addInputSegment(InputSegment *InSeg) { + Alignment = std::max(Alignment, InSeg->getAlignment()); + InputSegments.push_back(InSeg); + Size = llvm::alignTo(Size, InSeg->getAlignment()); + InSeg->OutputSeg = this; + InSeg->OutputSegmentOffset = Size; + Size += InSeg->getSize(); } - uint32_t getSectionOffset() const { return SectionOffset; } - - void setSectionOffset(uint32_t Offset) { SectionOffset = Offset; } - StringRef Name; + const uint32_t Index; + uint32_t SectionOffset = 0; uint32_t Alignment = 0; uint32_t StartVA = 0; std::vector<InputSegment *> InputSegments; @@ -45,9 +44,6 @@ public: // Segment header std::string Header; - -private: - uint32_t SectionOffset = 0; }; } // namespace wasm diff --git a/wasm/SymbolTable.cpp b/wasm/SymbolTable.cpp index 751008da0536..e1ba23769738 100644 --- a/wasm/SymbolTable.cpp +++ b/wasm/SymbolTable.cpp @@ -8,17 +8,18 @@ //===----------------------------------------------------------------------===// #include "SymbolTable.h" - #include "Config.h" +#include "InputChunks.h" +#include "InputGlobal.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" - -#include <unordered_set> +#include "llvm/ADT/SetVector.h" #define DEBUG_TYPE "lld" using namespace llvm; +using namespace llvm::wasm; using namespace lld; using namespace lld::wasm; @@ -28,17 +29,46 @@ void SymbolTable::addFile(InputFile *File) { log("Processing: " + toString(File)); File->parse(); - if (auto *F = dyn_cast<ObjFile>(File)) + // LLVM bitcode file + if (auto *F = dyn_cast<BitcodeFile>(File)) + BitcodeFiles.push_back(F); + else if (auto *F = dyn_cast<ObjFile>(File)) ObjectFiles.push_back(F); } +// This function is where all the optimizations of link-time +// optimization happens. When LTO is in use, some input files are +// not in native object file format but in the LLVM bitcode format. +// This function compiles bitcode files into a few big native files +// using LLVM functions and replaces bitcode symbols with the results. +// Because all bitcode files that the program consists of are passed +// to the compiler at once, it can do whole-program optimization. +void SymbolTable::addCombinedLTOObject() { + if (BitcodeFiles.empty()) + return; + + // Compile bitcode files and replace bitcode symbols. + LTO.reset(new BitcodeCompiler); + for (BitcodeFile *F : BitcodeFiles) + LTO->add(*F); + + for (StringRef Filename : LTO->compile()) { + auto *Obj = make<ObjFile>(MemoryBufferRef(Filename, "lto.tmp")); + Obj->parse(); + ObjectFiles.push_back(Obj); + } +} + void SymbolTable::reportRemainingUndefines() { - std::unordered_set<Symbol *> Undefs; + SetVector<Symbol *> Undefs; for (Symbol *Sym : SymVector) { - if (Sym->isUndefined() && !Sym->isWeak() && - Config->AllowUndefinedSymbols.count(Sym->getName()) == 0) { - Undefs.insert(Sym); - } + if (!Sym->isUndefined() || Sym->isWeak()) + continue; + if (Config->AllowUndefinedSymbols.count(Sym->getName()) != 0) + continue; + if (!Sym->IsUsedInRegularObj) + continue; + Undefs.insert(Sym); } if (Undefs.empty()) @@ -55,183 +85,281 @@ void SymbolTable::reportRemainingUndefines() { } Symbol *SymbolTable::find(StringRef Name) { - auto It = SymMap.find(CachedHashStringRef(Name)); - if (It == SymMap.end()) - return nullptr; - return It->second; + return SymMap.lookup(CachedHashStringRef(Name)); } std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { Symbol *&Sym = SymMap[CachedHashStringRef(Name)]; if (Sym) return {Sym, false}; - Sym = make<Symbol>(Name, false); + Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); + Sym->IsUsedInRegularObj = false; SymVector.emplace_back(Sym); return {Sym, true}; } -void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { - error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " + - toString(Existing->getFile()) + "\n>>> defined in " + - toString(NewFile)); +static void reportTypeError(const Symbol *Existing, const InputFile *File, + llvm::wasm::WasmSymbolType Type) { + error("symbol type mismatch: " + toString(*Existing) + "\n>>> defined as " + + toString(Existing->getWasmType()) + " in " + + toString(Existing->getFile()) + "\n>>> defined as " + toString(Type) + + " in " + toString(File)); } -// Get the signature for a given function symbol, either by looking -// it up in function sections (for defined functions), of the imports section -// (for imported functions). -static const WasmSignature *getFunctionSig(const ObjFile &Obj, - const WasmSymbol &Sym) { - DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n"); - const WasmObjectFile *WasmObj = Obj.getWasmObj(); - return &WasmObj->types()[Sym.FunctionType]; +static void checkFunctionType(Symbol *Existing, const InputFile *File, + const WasmSignature *NewSig) { + auto ExistingFunction = dyn_cast<FunctionSymbol>(Existing); + if (!ExistingFunction) { + reportTypeError(Existing, File, WASM_SYMBOL_TYPE_FUNCTION); + return; + } + + if (!NewSig) + return; + + const WasmSignature *OldSig = ExistingFunction->FunctionType; + if (!OldSig) { + ExistingFunction->FunctionType = NewSig; + return; + } + + if (*NewSig != *OldSig) + warn("function signature mismatch: " + Existing->getName() + + "\n>>> defined as " + toString(*OldSig) + " in " + + toString(Existing->getFile()) + "\n>>> defined as " + + toString(*NewSig) + " in " + toString(File)); } // Check the type of new symbol matches that of the symbol is replacing. // For functions this can also involve verifying that the signatures match. -static void checkSymbolTypes(const Symbol &Existing, const InputFile &F, - const WasmSymbol &New, - const WasmSignature *NewSig) { - if (Existing.isLazy()) +static void checkGlobalType(const Symbol *Existing, const InputFile *File, + const WasmGlobalType *NewType) { + if (!isa<GlobalSymbol>(Existing)) { + reportTypeError(Existing, File, WASM_SYMBOL_TYPE_GLOBAL); return; + } - bool NewIsFunction = New.Type == WasmSymbol::SymbolType::FUNCTION_EXPORT || - New.Type == WasmSymbol::SymbolType::FUNCTION_IMPORT; - - // First check the symbol types match (i.e. either both are function - // symbols or both are data symbols). - if (Existing.isFunction() != NewIsFunction) { - error("symbol type mismatch: " + New.Name + "\n>>> defined as " + - (Existing.isFunction() ? "Function" : "Global") + " in " + - toString(Existing.getFile()) + "\n>>> defined as " + - (NewIsFunction ? "Function" : "Global") + " in " + F.getName()); - return; + const WasmGlobalType *OldType = cast<GlobalSymbol>(Existing)->getGlobalType(); + if (*NewType != *OldType) { + error("Global type mismatch: " + Existing->getName() + "\n>>> defined as " + + toString(*OldType) + " in " + toString(Existing->getFile()) + + "\n>>> defined as " + toString(*NewType) + " in " + toString(File)); } +} - // For function symbols, optionally check the function signature matches too. - if (!NewIsFunction || !Config->CheckSignatures) - return; - // Skip the signature check if the existing function has no signature (e.g. - // if it is an undefined symbol generated by --undefined command line flag). - if (!Existing.hasFunctionType()) - return; +static void checkDataType(const Symbol *Existing, const InputFile *File) { + if (!isa<DataSymbol>(Existing)) + reportTypeError(Existing, File, WASM_SYMBOL_TYPE_DATA); +} - DEBUG(dbgs() << "checkSymbolTypes: " << New.Name << "\n"); - assert(NewSig); +DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name, + uint32_t Flags, + InputFunction *Function) { + LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << Name << "\n"); + assert(!find(Name)); + SyntheticFunctions.emplace_back(Function); + return replaceSymbol<DefinedFunction>(insert(Name).first, Name, Flags, + nullptr, Function); +} - const WasmSignature &OldSig = Existing.getFunctionType(); - if (*NewSig == OldSig) - return; +DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef Name, + uint32_t Flags) { + LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << Name << "\n"); + assert(!find(Name)); + return replaceSymbol<DefinedData>(insert(Name).first, Name, Flags); +} - error("function signature mismatch: " + New.Name + "\n>>> defined as " + - toString(OldSig) + " in " + toString(Existing.getFile()) + - "\n>>> defined as " + toString(*NewSig) + " in " + F.getName()); +DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef Name, uint32_t Flags, + InputGlobal *Global) { + LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << Name << " -> " << Global + << "\n"); + assert(!find(Name)); + SyntheticGlobals.emplace_back(Global); + return replaceSymbol<DefinedGlobal>(insert(Name).first, Name, Flags, nullptr, + Global); } -Symbol *SymbolTable::addDefinedGlobal(StringRef Name) { - DEBUG(dbgs() << "addDefinedGlobal: " << Name << "\n"); +static bool shouldReplace(const Symbol *Existing, InputFile *NewFile, + uint32_t NewFlags) { + // If existing symbol is undefined, replace it. + if (!Existing->isDefined()) { + LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " + << Existing->getName() << "\n"); + return true; + } + + // Now we have two defined symbols. If the new one is weak, we can ignore it. + if ((NewFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { + LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n"); + return false; + } + + // If the existing symbol is weak, we should replace it. + if (Existing->isWeak()) { + LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n"); + return true; + } + + // Neither symbol is week. They conflict. + error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " + + toString(Existing->getFile()) + "\n>>> defined in " + + toString(NewFile)); + return true; +} + +Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags, + InputFile *File, + InputFunction *Function) { + LLVM_DEBUG(dbgs() << "addDefinedFunction: " << Name << "\n"); Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); - if (WasInserted) - S->update(Symbol::DefinedGlobalKind); - else if (!S->isGlobal()) - error("symbol type mismatch: " + Name); + + if (!File || File->kind() == InputFile::ObjectKind) + S->IsUsedInRegularObj = true; + + if (WasInserted || S->isLazy()) { + replaceSymbol<DefinedFunction>(S, Name, Flags, File, Function); + return S; + } + + if (Function) + checkFunctionType(S, File, &Function->Signature); + + if (shouldReplace(S, File, Flags)) + replaceSymbol<DefinedFunction>(S, Name, Flags, File, Function); return S; } -Symbol *SymbolTable::addDefined(InputFile *F, const WasmSymbol *Sym, - const InputSegment *Segment) { - DEBUG(dbgs() << "addDefined: " << Sym->Name << "\n"); +Symbol *SymbolTable::addDefinedData(StringRef Name, uint32_t Flags, + InputFile *File, InputSegment *Segment, + uint32_t Address, uint32_t Size) { + LLVM_DEBUG(dbgs() << "addDefinedData:" << Name << " addr:" << Address + << "\n"); Symbol *S; bool WasInserted; - Symbol::Kind Kind = Symbol::DefinedFunctionKind; - const WasmSignature *NewSig = nullptr; - if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT) - Kind = Symbol::DefinedGlobalKind; - else - NewSig = getFunctionSig(*cast<ObjFile>(F), *Sym); + std::tie(S, WasInserted) = insert(Name); - std::tie(S, WasInserted) = insert(Sym->Name); - if (WasInserted) { - S->update(Kind, F, Sym, Segment, NewSig); - } else if (S->isLazy()) { - // The existing symbol is lazy. Replace it without checking types since - // lazy symbols don't have any type information. - DEBUG(dbgs() << "replacing existing lazy symbol: " << Sym->Name << "\n"); - S->update(Kind, F, Sym, Segment, NewSig); - } else if (!S->isDefined()) { - // The existing symbol table entry is undefined. The new symbol replaces - // it, after checking the type matches - DEBUG(dbgs() << "resolving existing undefined symbol: " << Sym->Name - << "\n"); - checkSymbolTypes(*S, *F, *Sym, NewSig); - S->update(Kind, F, Sym, Segment, NewSig); - } else if (Sym->isWeak()) { - // the new symbol is weak we can ignore it - DEBUG(dbgs() << "existing symbol takes precedence\n"); - } else if (S->isWeak()) { - // the new symbol is not weak and the existing symbol is, so we replace - // it - DEBUG(dbgs() << "replacing existing weak symbol\n"); - checkSymbolTypes(*S, *F, *Sym, NewSig); - S->update(Kind, F, Sym, Segment, NewSig); - } else { - // neither symbol is week. They conflict. - reportDuplicate(S, F); + if (!File || File->kind() == InputFile::ObjectKind) + S->IsUsedInRegularObj = true; + + if (WasInserted || S->isLazy()) { + replaceSymbol<DefinedData>(S, Name, Flags, File, Segment, Address, Size); + return S; } + + checkDataType(S, File); + + if (shouldReplace(S, File, Flags)) + replaceSymbol<DefinedData>(S, Name, Flags, File, Segment, Address, Size); return S; } -Symbol *SymbolTable::addUndefinedFunction(StringRef Name, - const WasmSignature *Type) { +Symbol *SymbolTable::addDefinedGlobal(StringRef Name, uint32_t Flags, + InputFile *File, InputGlobal *Global) { + LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << Name << "\n"); Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); - if (WasInserted) { - S->update(Symbol::UndefinedFunctionKind, nullptr, nullptr, nullptr, Type); - } else if (!S->isFunction()) { - error("symbol type mismatch: " + Name); + + if (!File || File->kind() == InputFile::ObjectKind) + S->IsUsedInRegularObj = true; + + if (WasInserted || S->isLazy()) { + replaceSymbol<DefinedGlobal>(S, Name, Flags, File, Global); + return S; } + + checkGlobalType(S, File, &Global->getType()); + + if (shouldReplace(S, File, Flags)) + replaceSymbol<DefinedGlobal>(S, Name, Flags, File, Global); return S; } -Symbol *SymbolTable::addUndefined(InputFile *F, const WasmSymbol *Sym) { - DEBUG(dbgs() << "addUndefined: " << Sym->Name << "\n"); +Symbol *SymbolTable::addUndefinedFunction(StringRef Name, uint32_t Flags, + InputFile *File, + const WasmSignature *Sig) { + LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name << "\n"); + Symbol *S; bool WasInserted; - Symbol::Kind Kind = Symbol::UndefinedFunctionKind; - const WasmSignature *NewSig = nullptr; - if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_IMPORT) - Kind = Symbol::UndefinedGlobalKind; + std::tie(S, WasInserted) = insert(Name); + + if (!File || File->kind() == InputFile::ObjectKind) + S->IsUsedInRegularObj = true; + + if (WasInserted) + replaceSymbol<UndefinedFunction>(S, Name, Flags, File, Sig); + else if (auto *Lazy = dyn_cast<LazySymbol>(S)) + Lazy->fetch(); else - NewSig = getFunctionSig(*cast<ObjFile>(F), *Sym); - std::tie(S, WasInserted) = insert(Sym->Name); - if (WasInserted) { - S->update(Kind, F, Sym, nullptr, NewSig); - } else if (S->isLazy()) { - DEBUG(dbgs() << "resolved by existing lazy\n"); - auto *AF = cast<ArchiveFile>(S->getFile()); - AF->addMember(&S->getArchiveSymbol()); - } else if (S->isDefined()) { - DEBUG(dbgs() << "resolved by existing\n"); - checkSymbolTypes(*S, *F, *Sym, NewSig); - } + checkFunctionType(S, File, Sig); + + return S; +} + +Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags, + InputFile *File) { + LLVM_DEBUG(dbgs() << "addUndefinedData: " << Name << "\n"); + + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + + if (!File || File->kind() == InputFile::ObjectKind) + S->IsUsedInRegularObj = true; + + if (WasInserted) + replaceSymbol<UndefinedData>(S, Name, Flags, File); + else if (auto *Lazy = dyn_cast<LazySymbol>(S)) + Lazy->fetch(); + else if (S->isDefined()) + checkDataType(S, File); return S; } -void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol *Sym) { - DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n"); +Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, uint32_t Flags, + InputFile *File, + const WasmGlobalType *Type) { + LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << Name << "\n"); + + Symbol *S; + bool WasInserted; + std::tie(S, WasInserted) = insert(Name); + + if (!File || File->kind() == InputFile::ObjectKind) + S->IsUsedInRegularObj = true; + + if (WasInserted) + replaceSymbol<UndefinedGlobal>(S, Name, Flags, File, Type); + else if (auto *Lazy = dyn_cast<LazySymbol>(S)) + Lazy->fetch(); + else if (S->isDefined()) + checkGlobalType(S, File, Type); + return S; +} + +void SymbolTable::addLazy(ArchiveFile *File, const Archive::Symbol *Sym) { + LLVM_DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n"); StringRef Name = Sym->getName(); + Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); + if (WasInserted) { - S->update(Symbol::LazyKind, F); - S->setArchiveSymbol(*Sym); - } else if (S->isUndefined()) { - // There is an existing undefined symbol. The can load from the - // archive. - DEBUG(dbgs() << "replacing existing undefined\n"); - F->addMember(Sym); + replaceSymbol<LazySymbol>(S, Name, File, *Sym); + return; + } + + // If there is an existing undefined symbol, load a new one from the archive. + if (S->isUndefined()) { + LLVM_DEBUG(dbgs() << "replacing existing undefined\n"); + File->addMember(Sym); } } + +bool SymbolTable::addComdat(StringRef Name) { + return Comdats.insert(CachedHashStringRef(Name)).second; +} diff --git a/wasm/SymbolTable.h b/wasm/SymbolTable.h index fbb74ed14796..26242e6cddd6 100644 --- a/wasm/SymbolTable.h +++ b/wasm/SymbolTable.h @@ -11,13 +11,13 @@ #define LLD_WASM_SYMBOL_TABLE_H #include "InputFiles.h" +#include "LTO.h" #include "Symbols.h" - #include "llvm/ADT/CachedHashString.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/raw_ostream.h" -using llvm::object::WasmSymbol; +using llvm::wasm::WasmGlobalType; using llvm::wasm::WasmSignature; namespace lld { @@ -40,27 +40,52 @@ class InputSegment; class SymbolTable { public: void addFile(InputFile *File); + void addCombinedLTOObject(); std::vector<ObjFile *> ObjectFiles; + std::vector<BitcodeFile *> BitcodeFiles; + std::vector<InputFunction *> SyntheticFunctions; + std::vector<InputGlobal *> SyntheticGlobals; - void reportDuplicate(Symbol *Existing, InputFile *NewFile); void reportRemainingUndefines(); ArrayRef<Symbol *> getSymbols() const { return SymVector; } Symbol *find(StringRef Name); - Symbol *addDefined(InputFile *F, const WasmSymbol *Sym, - const InputSegment *Segment = nullptr); - Symbol *addUndefined(InputFile *F, const WasmSymbol *Sym); - Symbol *addUndefinedFunction(StringRef Name, const WasmSignature *Type); - Symbol *addDefinedGlobal(StringRef Name); + Symbol *addDefinedFunction(StringRef Name, uint32_t Flags, InputFile *File, + InputFunction *Function); + Symbol *addDefinedData(StringRef Name, uint32_t Flags, InputFile *File, + InputSegment *Segment, uint32_t Address, + uint32_t Size); + Symbol *addDefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File, + InputGlobal *G); + + Symbol *addUndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File, + const WasmSignature *Signature); + Symbol *addUndefinedData(StringRef Name, uint32_t Flags, InputFile *File); + Symbol *addUndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File, + const WasmGlobalType *Type); + void addLazy(ArchiveFile *F, const Archive::Symbol *Sym); + bool addComdat(StringRef Name); + + DefinedData *addSyntheticDataSymbol(StringRef Name, uint32_t Flags); + DefinedGlobal *addSyntheticGlobal(StringRef Name, uint32_t Flags, + InputGlobal *Global); + DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags, + InputFunction *Function); + private: std::pair<Symbol *, bool> insert(StringRef Name); llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap; std::vector<Symbol *> SymVector; + + llvm::DenseSet<llvm::CachedHashStringRef> Comdats; + + // For LTO. + std::unique_ptr<BitcodeCompiler> LTO; }; extern SymbolTable *Symtab; diff --git a/wasm/Symbols.cpp b/wasm/Symbols.cpp index 6bf5459c2663..a11081cbcf77 100644 --- a/wasm/Symbols.cpp +++ b/wasm/Symbols.cpp @@ -8,92 +8,224 @@ //===----------------------------------------------------------------------===// #include "Symbols.h" - #include "Config.h" +#include "InputChunks.h" #include "InputFiles.h" -#include "InputSegment.h" +#include "InputGlobal.h" +#include "OutputSegment.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #define DEBUG_TYPE "lld" using namespace llvm; +using namespace llvm::wasm; using namespace lld; using namespace lld::wasm; -uint32_t Symbol::getGlobalIndex() const { - assert(!Sym->isFunction()); - return Sym->ElementIndex; +DefinedFunction *WasmSym::CallCtors; +DefinedData *WasmSym::DsoHandle; +DefinedData *WasmSym::DataEnd; +DefinedData *WasmSym::HeapBase; +DefinedGlobal *WasmSym::StackPointer; + +WasmSymbolType Symbol::getWasmType() const { + if (isa<FunctionSymbol>(this)) + return WASM_SYMBOL_TYPE_FUNCTION; + if (isa<DataSymbol>(this)) + return WASM_SYMBOL_TYPE_DATA; + if (isa<GlobalSymbol>(this)) + return WASM_SYMBOL_TYPE_GLOBAL; + if (isa<SectionSymbol>(this)) + return WASM_SYMBOL_TYPE_SECTION; + llvm_unreachable("invalid symbol kind"); +} + +InputChunk *Symbol::getChunk() const { + if (auto *F = dyn_cast<DefinedFunction>(this)) + return F->Function; + if (auto *D = dyn_cast<DefinedData>(this)) + return D->Segment; + return nullptr; +} + +bool Symbol::isLive() const { + if (auto *G = dyn_cast<DefinedGlobal>(this)) + return G->Global->Live; + if (InputChunk *C = getChunk()) + return C->Live; + return Referenced; } -uint32_t Symbol::getFunctionIndex() const { - assert(Sym->isFunction()); - return Sym->ElementIndex; +void Symbol::markLive() { + if (auto *G = dyn_cast<DefinedGlobal>(this)) + G->Global->Live = true; + if (InputChunk *C = getChunk()) + C->Live = true; + Referenced = true; } -const WasmSignature &Symbol::getFunctionType() const { - assert(FunctionType != nullptr); - return *FunctionType; +uint32_t Symbol::getOutputSymbolIndex() const { + assert(OutputSymbolIndex != INVALID_INDEX); + return OutputSymbolIndex; } -uint32_t Symbol::getVirtualAddress() const { - assert(isGlobal()); - DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n"); - if (isUndefined()) - return UINT32_MAX; - if (VirtualAddress.hasValue()) - return VirtualAddress.getValue(); +void Symbol::setOutputSymbolIndex(uint32_t Index) { + LLVM_DEBUG(dbgs() << "setOutputSymbolIndex " << Name << " -> " << Index + << "\n"); + assert(OutputSymbolIndex == INVALID_INDEX); + OutputSymbolIndex = Index; +} + +bool Symbol::isWeak() const { + return (Flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK; +} - assert(Sym != nullptr); - ObjFile *Obj = cast<ObjFile>(File); - const WasmGlobal &Global = - Obj->getWasmObj()->globals()[getGlobalIndex() - Obj->NumGlobalImports()]; - assert(Global.Type == llvm::wasm::WASM_TYPE_I32); - assert(Segment); - return Segment->translateVA(Global.InitExpr.Value.Int32); +bool Symbol::isLocal() const { + return (Flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_LOCAL; } -uint32_t Symbol::getOutputIndex() const { - if (isUndefined() && isWeak()) - return 0; - return OutputIndex.getValue(); +bool Symbol::isHidden() const { + return (Flags & WASM_SYMBOL_VISIBILITY_MASK) == WASM_SYMBOL_VISIBILITY_HIDDEN; } -void Symbol::setVirtualAddress(uint32_t Value) { - DEBUG(dbgs() << "setVirtualAddress " << Name << " -> " << Value << "\n"); - assert(!VirtualAddress.hasValue()); - VirtualAddress = Value; +void Symbol::setHidden(bool IsHidden) { + LLVM_DEBUG(dbgs() << "setHidden: " << Name << " -> " << IsHidden << "\n"); + Flags &= ~WASM_SYMBOL_VISIBILITY_MASK; + if (IsHidden) + Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN; + else + Flags |= WASM_SYMBOL_VISIBILITY_DEFAULT; } -void Symbol::setOutputIndex(uint32_t Index) { - DEBUG(dbgs() << "setOutputIndex " << Name << " -> " << Index << "\n"); - assert(!OutputIndex.hasValue()); - OutputIndex = Index; +bool Symbol::isExported() const { + if (!isDefined() || isLocal()) + return false; + + if (ForceExport || Config->ExportAll) + return true; + + return !isHidden(); } -void Symbol::setTableIndex(uint32_t Index) { - DEBUG(dbgs() << "setTableIndex " << Name << " -> " << Index << "\n"); - assert(!TableIndex.hasValue()); +uint32_t FunctionSymbol::getFunctionIndex() const { + if (auto *F = dyn_cast<DefinedFunction>(this)) + return F->Function->getFunctionIndex(); + assert(FunctionIndex != INVALID_INDEX); + return FunctionIndex; +} + +void FunctionSymbol::setFunctionIndex(uint32_t Index) { + LLVM_DEBUG(dbgs() << "setFunctionIndex " << Name << " -> " << Index << "\n"); + assert(FunctionIndex == INVALID_INDEX); + FunctionIndex = Index; +} + +bool FunctionSymbol::hasFunctionIndex() const { + if (auto *F = dyn_cast<DefinedFunction>(this)) + return F->Function->hasFunctionIndex(); + return FunctionIndex != INVALID_INDEX; +} + +uint32_t FunctionSymbol::getTableIndex() const { + if (auto *F = dyn_cast<DefinedFunction>(this)) + return F->Function->getTableIndex(); + assert(TableIndex != INVALID_INDEX); + return TableIndex; +} + +bool FunctionSymbol::hasTableIndex() const { + if (auto *F = dyn_cast<DefinedFunction>(this)) + return F->Function->hasTableIndex(); + return TableIndex != INVALID_INDEX; +} + +void FunctionSymbol::setTableIndex(uint32_t Index) { + // For imports, we set the table index here on the Symbol; for defined + // functions we set the index on the InputFunction so that we don't export + // the same thing twice (keeps the table size down). + if (auto *F = dyn_cast<DefinedFunction>(this)) { + F->Function->setTableIndex(Index); + return; + } + LLVM_DEBUG(dbgs() << "setTableIndex " << Name << " -> " << Index << "\n"); + assert(TableIndex == INVALID_INDEX); TableIndex = Index; } -void Symbol::update(Kind K, InputFile *F, const WasmSymbol *WasmSym, - const InputSegment *Seg, const WasmSignature *Sig) { - SymbolKind = K; - File = F; - Sym = WasmSym; - Segment = Seg; - FunctionType = Sig; +DefinedFunction::DefinedFunction(StringRef Name, uint32_t Flags, InputFile *F, + InputFunction *Function) + : FunctionSymbol(Name, DefinedFunctionKind, Flags, F, + Function ? &Function->Signature : nullptr), + Function(Function) {} + +uint32_t DefinedData::getVirtualAddress() const { + LLVM_DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n"); + if (Segment) + return Segment->OutputSeg->StartVA + Segment->OutputSegmentOffset + Offset; + return Offset; +} + +void DefinedData::setVirtualAddress(uint32_t Value) { + LLVM_DEBUG(dbgs() << "setVirtualAddress " << Name << " -> " << Value << "\n"); + assert(!Segment); + Offset = Value; +} + +uint32_t DefinedData::getOutputSegmentOffset() const { + LLVM_DEBUG(dbgs() << "getOutputSegmentOffset: " << getName() << "\n"); + return Segment->OutputSegmentOffset + Offset; +} + +uint32_t DefinedData::getOutputSegmentIndex() const { + LLVM_DEBUG(dbgs() << "getOutputSegmentIndex: " << getName() << "\n"); + return Segment->OutputSeg->Index; +} + +uint32_t GlobalSymbol::getGlobalIndex() const { + if (auto *F = dyn_cast<DefinedGlobal>(this)) + return F->Global->getGlobalIndex(); + assert(GlobalIndex != INVALID_INDEX); + return GlobalIndex; +} + +void GlobalSymbol::setGlobalIndex(uint32_t Index) { + LLVM_DEBUG(dbgs() << "setGlobalIndex " << Name << " -> " << Index << "\n"); + assert(GlobalIndex == INVALID_INDEX); + GlobalIndex = Index; +} + +bool GlobalSymbol::hasGlobalIndex() const { + if (auto *F = dyn_cast<DefinedGlobal>(this)) + return F->Global->hasGlobalIndex(); + return GlobalIndex != INVALID_INDEX; } -bool Symbol::isWeak() const { return Sym && Sym->isWeak(); } +DefinedGlobal::DefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File, + InputGlobal *Global) + : GlobalSymbol(Name, DefinedGlobalKind, Flags, File, + Global ? &Global->getType() : nullptr), + Global(Global) {} + +uint32_t SectionSymbol::getOutputSectionIndex() const { + LLVM_DEBUG(dbgs() << "getOutputSectionIndex: " << getName() << "\n"); + assert(OutputSectionIndex != INVALID_INDEX); + return OutputSectionIndex; +} + +void SectionSymbol::setOutputSectionIndex(uint32_t Index) { + LLVM_DEBUG(dbgs() << "setOutputSectionIndex: " << getName() << " -> " << Index + << "\n"); + assert(Index != INVALID_INDEX); + OutputSectionIndex = Index; +} -bool Symbol::isHidden() const { return Sym && Sym->isHidden(); } +void LazySymbol::fetch() { cast<ArchiveFile>(File)->addMember(&ArchiveSymbol); } std::string lld::toString(const wasm::Symbol &Sym) { if (Config->Demangle) if (Optional<std::string> S = demangleItanium(Sym.getName())) - return "`" + *S + "'"; + return *S; return Sym.getName(); } @@ -101,14 +233,20 @@ std::string lld::toString(wasm::Symbol::Kind Kind) { switch (Kind) { case wasm::Symbol::DefinedFunctionKind: return "DefinedFunction"; + case wasm::Symbol::DefinedDataKind: + return "DefinedData"; case wasm::Symbol::DefinedGlobalKind: return "DefinedGlobal"; case wasm::Symbol::UndefinedFunctionKind: return "UndefinedFunction"; + case wasm::Symbol::UndefinedDataKind: + return "UndefinedData"; case wasm::Symbol::UndefinedGlobalKind: return "UndefinedGlobal"; case wasm::Symbol::LazyKind: return "LazyKind"; + case wasm::Symbol::SectionKind: + return "SectionKind"; } - llvm_unreachable("Invalid symbol kind!"); + llvm_unreachable("invalid symbol kind"); } diff --git a/wasm/Symbols.h b/wasm/Symbols.h index 8194bcaca383..815cc97d22d1 100644 --- a/wasm/Symbols.h +++ b/wasm/Symbols.h @@ -10,53 +10,59 @@ #ifndef LLD_WASM_SYMBOLS_H #define LLD_WASM_SYMBOLS_H +#include "Config.h" #include "lld/Common/LLVM.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Wasm.h" using llvm::object::Archive; using llvm::object::WasmSymbol; -using llvm::wasm::WasmExport; -using llvm::wasm::WasmImport; +using llvm::wasm::WasmGlobal; +using llvm::wasm::WasmGlobalType; using llvm::wasm::WasmSignature; +using llvm::wasm::WasmSymbolType; namespace lld { namespace wasm { class InputFile; +class InputChunk; class InputSegment; +class InputFunction; +class InputGlobal; +class InputSection; +#define INVALID_INDEX UINT32_MAX + +// The base class for real symbol classes. class Symbol { public: enum Kind { DefinedFunctionKind, + DefinedDataKind, DefinedGlobalKind, - - LazyKind, + SectionKind, UndefinedFunctionKind, + UndefinedDataKind, UndefinedGlobalKind, - - LastDefinedKind = DefinedGlobalKind, - InvalidKind, + LazyKind, }; - Symbol(StringRef Name, bool IsLocal) - : WrittenToSymtab(0), WrittenToNameSec(0), IsLocal(IsLocal), Name(Name) {} + Kind kind() const { return SymbolKind; } - Kind getKind() const { return SymbolKind; } + bool isDefined() const { + return SymbolKind == DefinedFunctionKind || SymbolKind == DefinedDataKind || + SymbolKind == DefinedGlobalKind || SymbolKind == SectionKind; + } - bool isLazy() const { return SymbolKind == LazyKind; } - bool isDefined() const { return SymbolKind <= LastDefinedKind; } bool isUndefined() const { - return SymbolKind == UndefinedGlobalKind || - SymbolKind == UndefinedFunctionKind; + return SymbolKind == UndefinedFunctionKind || + SymbolKind == UndefinedDataKind || SymbolKind == UndefinedGlobalKind; } - bool isFunction() const { - return SymbolKind == DefinedFunctionKind || - SymbolKind == UndefinedFunctionKind; - } - bool isGlobal() const { return !isFunction(); } - bool isLocal() const { return IsLocal; } + + bool isLazy() const { return SymbolKind == LazyKind; } + + bool isLocal() const; bool isWeak() const; bool isHidden() const; @@ -66,57 +72,278 @@ public: // Returns the file from which this symbol was created. InputFile *getFile() const { return File; } - uint32_t getGlobalIndex() const; - uint32_t getFunctionIndex() const; + InputChunk *getChunk() const; - bool hasFunctionType() const { return FunctionType != nullptr; } - const WasmSignature &getFunctionType() const; - uint32_t getOutputIndex() const; - uint32_t getTableIndex() const { return TableIndex.getValue(); } + // Indicates that the section or import for this symbol will be included in + // the final image. + bool isLive() const; - // Returns the virtual address of a defined global. - // Only works for globals, not functions. - uint32_t getVirtualAddress() const; + // Marks the symbol's InputChunk as Live, so that it will be included in the + // final image. + void markLive(); + + void setHidden(bool IsHidden); - // Set the output index of the symbol (in the function or global index - // space of the output object. - void setOutputIndex(uint32_t Index); + // Get/set the index in the output symbol table. This is only used for + // relocatable output. + uint32_t getOutputSymbolIndex() const; + void setOutputSymbolIndex(uint32_t Index); - // Returns true if a table index has been set for this symbol - bool hasTableIndex() const { return TableIndex.hasValue(); } + WasmSymbolType getWasmType() const; + bool isExported() const; - // Set the table index of the symbol + // True if this symbol was referenced by a regular (non-bitcode) object. + unsigned IsUsedInRegularObj : 1; + unsigned ForceExport : 1; + +protected: + Symbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F) + : IsUsedInRegularObj(false), ForceExport(false), Name(Name), + SymbolKind(K), Flags(Flags), File(F), Referenced(!Config->GcSections) {} + + StringRef Name; + Kind SymbolKind; + uint32_t Flags; + InputFile *File; + uint32_t OutputSymbolIndex = INVALID_INDEX; + bool Referenced; +}; + +class FunctionSymbol : public Symbol { +public: + static bool classof(const Symbol *S) { + return S->kind() == DefinedFunctionKind || + S->kind() == UndefinedFunctionKind; + } + + // Get/set the table index void setTableIndex(uint32_t Index); + uint32_t getTableIndex() const; + bool hasTableIndex() const; + + // Get/set the function index + uint32_t getFunctionIndex() const; + void setFunctionIndex(uint32_t Index); + bool hasFunctionIndex() const; + const WasmSignature *FunctionType; + +protected: + FunctionSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F, + const WasmSignature *Type) + : Symbol(Name, K, Flags, F), FunctionType(Type) {} + + uint32_t TableIndex = INVALID_INDEX; + uint32_t FunctionIndex = INVALID_INDEX; +}; + +class DefinedFunction : public FunctionSymbol { +public: + DefinedFunction(StringRef Name, uint32_t Flags, InputFile *F, + InputFunction *Function); + + static bool classof(const Symbol *S) { + return S->kind() == DefinedFunctionKind; + } + + InputFunction *Function; +}; + +class UndefinedFunction : public FunctionSymbol { +public: + UndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File = nullptr, + const WasmSignature *Type = nullptr) + : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type) {} + + static bool classof(const Symbol *S) { + return S->kind() == UndefinedFunctionKind; + } +}; + +class SectionSymbol : public Symbol { +public: + static bool classof(const Symbol *S) { return S->kind() == SectionKind; } + + SectionSymbol(StringRef Name, uint32_t Flags, const InputSection *S, + InputFile *F = nullptr) + : Symbol(Name, SectionKind, Flags, F), Section(S) {} + + const InputSection *Section; + + uint32_t getOutputSectionIndex() const; + void setOutputSectionIndex(uint32_t Index); + +protected: + uint32_t OutputSectionIndex = INVALID_INDEX; +}; + +class DataSymbol : public Symbol { +public: + static bool classof(const Symbol *S) { + return S->kind() == DefinedDataKind || S->kind() == UndefinedDataKind; + } + +protected: + DataSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F) + : Symbol(Name, K, Flags, F) {} +}; + +class DefinedData : public DataSymbol { +public: + // Constructor for regular data symbols originating from input files. + DefinedData(StringRef Name, uint32_t Flags, InputFile *F, + InputSegment *Segment, uint32_t Offset, uint32_t Size) + : DataSymbol(Name, DefinedDataKind, Flags, F), Segment(Segment), + Offset(Offset), Size(Size) {} + + // Constructor for linker synthetic data symbols. + DefinedData(StringRef Name, uint32_t Flags) + : DataSymbol(Name, DefinedDataKind, Flags, nullptr) {} + + static bool classof(const Symbol *S) { return S->kind() == DefinedDataKind; } + + // Returns the output virtual address of a defined data symbol. + uint32_t getVirtualAddress() const; void setVirtualAddress(uint32_t VA); - void update(Kind K, InputFile *F = nullptr, const WasmSymbol *Sym = nullptr, - const InputSegment *Segment = nullptr, - const WasmSignature *Sig = nullptr); + // Returns the offset of a defined data symbol within its OutputSegment. + uint32_t getOutputSegmentOffset() const; + uint32_t getOutputSegmentIndex() const; + uint32_t getSize() const { return Size; } + + InputSegment *Segment = nullptr; + +protected: + uint32_t Offset = 0; + uint32_t Size = 0; +}; + +class UndefinedData : public DataSymbol { +public: + UndefinedData(StringRef Name, uint32_t Flags, InputFile *File = nullptr) + : DataSymbol(Name, UndefinedDataKind, Flags, File) {} + static bool classof(const Symbol *S) { + return S->kind() == UndefinedDataKind; + } +}; - void setArchiveSymbol(const Archive::Symbol &Sym) { ArchiveSymbol = Sym; } - const Archive::Symbol &getArchiveSymbol() { return ArchiveSymbol; } +class GlobalSymbol : public Symbol { +public: + static bool classof(const Symbol *S) { + return S->kind() == DefinedGlobalKind || S->kind() == UndefinedGlobalKind; + } - // This bit is used by Writer::writeNameSection() to prevent - // symbols from being written to the symbol table more than once. - unsigned WrittenToSymtab : 1; - unsigned WrittenToNameSec : 1; + const WasmGlobalType *getGlobalType() const { return GlobalType; } + + // Get/set the global index + uint32_t getGlobalIndex() const; + void setGlobalIndex(uint32_t Index); + bool hasGlobalIndex() const; protected: - unsigned IsLocal : 1; + GlobalSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F, + const WasmGlobalType *GlobalType) + : Symbol(Name, K, Flags, F), GlobalType(GlobalType) {} - StringRef Name; - Archive::Symbol ArchiveSymbol = {nullptr, 0, 0}; - Kind SymbolKind = InvalidKind; - InputFile *File = nullptr; - const WasmSymbol *Sym = nullptr; - const InputSegment *Segment = nullptr; - llvm::Optional<uint32_t> OutputIndex; - llvm::Optional<uint32_t> TableIndex; - llvm::Optional<uint32_t> VirtualAddress; - const WasmSignature *FunctionType; + // Explicit function type, needed for undefined or synthetic functions only. + // For regular defined globals this information comes from the InputChunk. + const WasmGlobalType *GlobalType; + uint32_t GlobalIndex = INVALID_INDEX; }; +class DefinedGlobal : public GlobalSymbol { +public: + DefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File, + InputGlobal *Global); + + static bool classof(const Symbol *S) { + return S->kind() == DefinedGlobalKind; + } + + InputGlobal *Global; +}; + +class UndefinedGlobal : public GlobalSymbol { +public: + UndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File = nullptr, + const WasmGlobalType *Type = nullptr) + : GlobalSymbol(Name, UndefinedGlobalKind, Flags, File, Type) {} + + static bool classof(const Symbol *S) { + return S->kind() == UndefinedGlobalKind; + } +}; + +class LazySymbol : public Symbol { +public: + LazySymbol(StringRef Name, InputFile *File, const Archive::Symbol &Sym) + : Symbol(Name, LazyKind, 0, File), ArchiveSymbol(Sym) {} + + static bool classof(const Symbol *S) { return S->kind() == LazyKind; } + void fetch(); + +private: + Archive::Symbol ArchiveSymbol; +}; + +// linker-generated symbols +struct WasmSym { + // __stack_pointer + // Global that holds the address of the top of the explicit value stack in + // linear memory. + static DefinedGlobal *StackPointer; + + // __data_end + // Symbol marking the end of the data and bss. + static DefinedData *DataEnd; + + // __heap_base + // Symbol marking the end of the data, bss and explicit stack. Any linear + // memory following this address is not used by the linked code and can + // therefore be used as a backing store for brk()/malloc() implementations. + static DefinedData *HeapBase; + + // __wasm_call_ctors + // Function that directly calls all ctors in priority order. + static DefinedFunction *CallCtors; + + // __dso_handle + // Symbol used in calls to __cxa_atexit to determine current DLL + static DefinedData *DsoHandle; +}; + +// A buffer class that is large enough to hold any Symbol-derived +// object. We allocate memory using this class and instantiate a symbol +// using the placement new. +union SymbolUnion { + alignas(DefinedFunction) char A[sizeof(DefinedFunction)]; + alignas(DefinedData) char B[sizeof(DefinedData)]; + alignas(DefinedGlobal) char C[sizeof(DefinedGlobal)]; + alignas(LazySymbol) char D[sizeof(LazySymbol)]; + alignas(UndefinedFunction) char E[sizeof(UndefinedFunction)]; + alignas(UndefinedData) char F[sizeof(UndefinedData)]; + alignas(UndefinedGlobal) char G[sizeof(UndefinedGlobal)]; + alignas(SectionSymbol) char I[sizeof(SectionSymbol)]; +}; + +template <typename T, typename... ArgT> +T *replaceSymbol(Symbol *S, ArgT &&... Arg) { + static_assert(std::is_trivially_destructible<T>(), + "Symbol types must be trivially destructible"); + static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small"); + static_assert(alignof(T) <= alignof(SymbolUnion), + "SymbolUnion not aligned enough"); + assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && + "Not a Symbol"); + + Symbol SymCopy = *S; + + T *S2 = new (S) T(std::forward<ArgT>(Arg)...); + S2->IsUsedInRegularObj = SymCopy.IsUsedInRegularObj; + S2->ForceExport = SymCopy.ForceExport; + return S2; +} + } // namespace wasm // Returns a symbol name for an error message. diff --git a/wasm/Writer.cpp b/wasm/Writer.cpp index e7dd49d52213..37ad32452a91 100644 --- a/wasm/Writer.cpp +++ b/wasm/Writer.cpp @@ -8,21 +8,28 @@ //===----------------------------------------------------------------------===// #include "Writer.h" - #include "Config.h" +#include "InputChunks.h" +#include "InputGlobal.h" #include "OutputSections.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Strings.h" #include "lld/Common/Threads.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Object/WasmTraits.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/LEB128.h" #include <cstdarg> +#include <map> #define DEBUG_TYPE "lld" @@ -32,31 +39,16 @@ using namespace lld; using namespace lld::wasm; static constexpr int kStackAlignment = 16; +static constexpr int kInitialTableOffset = 1; +static constexpr const char *kFunctionTableName = "__indirect_function_table"; namespace { -// Traits for using WasmSignature in a DenseMap. -struct WasmSignatureDenseMapInfo { - static WasmSignature getEmptyKey() { - WasmSignature Sig; - Sig.ReturnType = 1; - return Sig; - } - static WasmSignature getTombstoneKey() { - WasmSignature Sig; - Sig.ReturnType = 2; - return Sig; - } - static unsigned getHashValue(const WasmSignature &Sig) { - uintptr_t Value = 0; - Value += DenseMapInfo<int32_t>::getHashValue(Sig.ReturnType); - for (int32_t Param : Sig.ParamTypes) - Value += DenseMapInfo<int32_t>::getHashValue(Param); - return Value; - } - static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) { - return LHS == RHS; - } +// An init entry to be written to either the synthetic init func or the +// linking metadata. +struct WasmInitEntry { + const FunctionSymbol *Sym; + uint32_t Priority; }; // The writer writes a SymbolTable result to a file. @@ -67,17 +59,22 @@ public: private: void openFile(); - uint32_t getTypeIndex(const WasmSignature &Sig); - void assignSymbolIndexes(); + uint32_t lookupType(const WasmSignature &Sig); + uint32_t registerType(const WasmSignature &Sig); + + void createCtorFunction(); + void calculateInitFunctions(); + void assignIndexes(); void calculateImports(); - void calculateOffsets(); + void calculateExports(); + void calculateCustomSections(); + void assignSymtab(); void calculateTypes(); void createOutputSegments(); void layoutMemory(); void createHeader(); void createSections(); - SyntheticSection *createSyntheticSection(uint32_t Type, - std::string Name = ""); + SyntheticSection *createSyntheticSection(uint32_t Type, StringRef Name = ""); // Builtin sections void createTypeSection(); @@ -88,9 +85,9 @@ private: void createImportSection(); void createMemorySection(); void createElemSection(); - void createStartSection(); void createCodeSection(); void createDataSection(); + void createCustomSections(); // Custom sections void createRelocSections(); @@ -101,17 +98,24 @@ private: void writeSections(); uint64_t FileSize = 0; - uint32_t DataSize = 0; - uint32_t NumFunctions = 0; uint32_t NumMemoryPages = 0; - uint32_t InitialTableOffset = 0; + uint32_t MaxMemoryPages = 0; std::vector<const WasmSignature *> Types; - DenseMap<WasmSignature, int32_t, WasmSignatureDenseMapInfo> TypeIndices; - std::vector<const Symbol *> FunctionImports; - std::vector<const Symbol *> GlobalImports; - std::vector<const Symbol *> DefinedGlobals; - std::vector<const Symbol *> IndirectFunctions; + DenseMap<WasmSignature, int32_t> TypeIndices; + std::vector<const Symbol *> ImportedSymbols; + unsigned NumImportedFunctions = 0; + unsigned NumImportedGlobals = 0; + std::vector<WasmExport> Exports; + std::vector<const DefinedData *> DefinedFakeGlobals; + std::vector<InputGlobal *> InputGlobals; + std::vector<InputFunction *> InputFunctions; + std::vector<const FunctionSymbol *> IndirectFunctions; + std::vector<const Symbol *> SymtabEntries; + std::vector<WasmInitEntry> InitFunctions; + + llvm::StringMap<std::vector<InputSection *>> CustomSectionMapping; + llvm::StringMap<SectionSymbol *> CustomSectionSymbols; // Elements that are used to construct the final output std::string Header; @@ -125,20 +129,12 @@ private: } // anonymous namespace -static void debugPrint(const char *fmt, ...) { - if (!errorHandler().Verbose) - return; - fprintf(stderr, "lld: "); - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - void Writer::createImportSection() { - uint32_t NumImports = FunctionImports.size() + GlobalImports.size(); + uint32_t NumImports = ImportedSymbols.size(); if (Config->ImportMemory) ++NumImports; + if (Config->ImportTable) + ++NumImports; if (NumImports == 0) return; @@ -148,16 +144,6 @@ void Writer::createImportSection() { writeUleb128(OS, NumImports, "import count"); - for (const Symbol *Sym : FunctionImports) { - WasmImport Import; - Import.Module = "env"; - Import.Field = Sym->getName(); - Import.Kind = WASM_EXTERNAL_FUNCTION; - assert(TypeIndices.count(Sym->getFunctionType()) > 0); - Import.SigIndex = TypeIndices.lookup(Sym->getFunctionType()); - writeImport(OS, Import); - } - if (Config->ImportMemory) { WasmImport Import; Import.Module = "env"; @@ -165,16 +151,36 @@ void Writer::createImportSection() { Import.Kind = WASM_EXTERNAL_MEMORY; Import.Memory.Flags = 0; Import.Memory.Initial = NumMemoryPages; + if (MaxMemoryPages != 0) { + Import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX; + Import.Memory.Maximum = MaxMemoryPages; + } writeImport(OS, Import); } - for (const Symbol *Sym : GlobalImports) { + if (Config->ImportTable) { + uint32_t TableSize = kInitialTableOffset + IndirectFunctions.size(); + WasmImport Import; + Import.Module = "env"; + Import.Field = kFunctionTableName; + Import.Kind = WASM_EXTERNAL_TABLE; + Import.Table.ElemType = WASM_TYPE_ANYFUNC; + Import.Table.Limits = {WASM_LIMITS_FLAG_HAS_MAX, TableSize, TableSize}; + writeImport(OS, Import); + } + + for (const Symbol *Sym : ImportedSymbols) { WasmImport Import; Import.Module = "env"; Import.Field = Sym->getName(); - Import.Kind = WASM_EXTERNAL_GLOBAL; - Import.Global.Mutable = false; - Import.Global.Type = WASM_TYPE_I32; + if (auto *FunctionSym = dyn_cast<FunctionSymbol>(Sym)) { + Import.Kind = WASM_EXTERNAL_FUNCTION; + Import.SigIndex = lookupType(*FunctionSym->FunctionType); + } else { + auto *GlobalSym = cast<GlobalSymbol>(Sym); + Import.Kind = WASM_EXTERNAL_GLOBAL; + Import.Global = *GlobalSym->getGlobalType(); + } writeImport(OS, Import); } } @@ -188,16 +194,15 @@ void Writer::createTypeSection() { } void Writer::createFunctionSection() { - if (!NumFunctions) + if (InputFunctions.empty()) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_FUNCTION); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, NumFunctions, "function count"); - for (ObjFile *File : Symtab->ObjectFiles) - for (uint32_t Sig : File->getWasmObj()->functionTypes()) - writeUleb128(OS, File->relocateTypeIndex(Sig), "sig index"); + writeUleb128(OS, InputFunctions.size(), "function count"); + for (const InputFunction *Func : InputFunctions) + writeUleb128(OS, lookupType(Func->Signature), "sig index"); } void Writer::createMemorySection() { @@ -207,23 +212,29 @@ void Writer::createMemorySection() { SyntheticSection *Section = createSyntheticSection(WASM_SEC_MEMORY); raw_ostream &OS = Section->getStream(); + bool HasMax = MaxMemoryPages != 0; writeUleb128(OS, 1, "memory count"); - writeUleb128(OS, 0, "memory limits flags"); + writeUleb128(OS, HasMax ? static_cast<unsigned>(WASM_LIMITS_FLAG_HAS_MAX) : 0, + "memory limits flags"); writeUleb128(OS, NumMemoryPages, "initial pages"); + if (HasMax) + writeUleb128(OS, MaxMemoryPages, "max pages"); } void Writer::createGlobalSection() { - if (DefinedGlobals.empty()) + unsigned NumGlobals = InputGlobals.size() + DefinedFakeGlobals.size(); + if (NumGlobals == 0) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_GLOBAL); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, DefinedGlobals.size(), "global count"); - for (const Symbol *Sym : DefinedGlobals) { + writeUleb128(OS, NumGlobals, "global count"); + for (const InputGlobal *G : InputGlobals) + writeGlobal(OS, G->Global); + for (const DefinedData *Sym : DefinedFakeGlobals) { WasmGlobal Global; - Global.Type = WASM_TYPE_I32; - Global.Mutable = Sym == Config->StackPointerSymbol; + Global.Type = {WASM_TYPE_I32, false}; Global.InitExpr.Opcode = WASM_OPCODE_I32_CONST; Global.InitExpr.Value.Int32 = Sym->getVirtualAddress(); writeGlobal(OS, Global); @@ -231,88 +242,73 @@ void Writer::createGlobalSection() { } void Writer::createTableSection() { - // Always output a table section, even if there are no indirect calls. - // There are two reasons for this: + if (Config->ImportTable) + return; + + // Always output a table section (or table import), even if there are no + // indirect calls. There are two reasons for this: // 1. For executables it is useful to have an empty table slot at 0 // which can be filled with a null function call handler. // 2. If we don't do this, any program that contains a call_indirect but // no address-taken function will fail at validation time since it is // a validation error to include a call_indirect instruction if there // is not table. - uint32_t TableSize = InitialTableOffset + IndirectFunctions.size(); + uint32_t TableSize = kInitialTableOffset + IndirectFunctions.size(); SyntheticSection *Section = createSyntheticSection(WASM_SEC_TABLE); raw_ostream &OS = Section->getStream(); writeUleb128(OS, 1, "table count"); - writeSleb128(OS, WASM_TYPE_ANYFUNC, "table type"); - writeUleb128(OS, WASM_LIMITS_FLAG_HAS_MAX, "table flags"); - writeUleb128(OS, TableSize, "table initial size"); - writeUleb128(OS, TableSize, "table max size"); + WasmLimits Limits = {WASM_LIMITS_FLAG_HAS_MAX, TableSize, TableSize}; + writeTableType(OS, WasmTable{WASM_TYPE_ANYFUNC, Limits}); } void Writer::createExportSection() { - bool ExportMemory = !Config->Relocatable && !Config->ImportMemory; - Symbol *EntrySym = Symtab->find(Config->Entry); - bool ExportEntry = !Config->Relocatable && EntrySym && EntrySym->isDefined(); - bool ExportHidden = Config->EmitRelocs; - - uint32_t NumExports = ExportMemory ? 1 : 0; - - std::vector<const Symbol *> SymbolExports; - if (ExportEntry) - SymbolExports.emplace_back(EntrySym); - - for (const Symbol *Sym : Symtab->getSymbols()) { - if (Sym->isUndefined() || Sym->isGlobal()) - continue; - if (Sym->isHidden() && !ExportHidden) - continue; - if (ExportEntry && Sym == EntrySym) - continue; - SymbolExports.emplace_back(Sym); - } - - for (const Symbol *Sym : DefinedGlobals) { - // Can't export the SP right now because it mutable and mutable globals - // connot be exported. - if (Sym == Config->StackPointerSymbol) - continue; - SymbolExports.emplace_back(Sym); - } - - NumExports += SymbolExports.size(); - if (!NumExports) + if (!Exports.size()) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_EXPORT); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, NumExports, "export count"); + writeUleb128(OS, Exports.size(), "export count"); + for (const WasmExport &Export : Exports) + writeExport(OS, Export); +} - if (ExportMemory) { - WasmExport MemoryExport; - MemoryExport.Name = "memory"; - MemoryExport.Kind = WASM_EXTERNAL_MEMORY; - MemoryExport.Index = 0; - writeExport(OS, MemoryExport); +void Writer::calculateCustomSections() { + log("calculateCustomSections"); + bool StripDebug = Config->StripDebug || Config->StripAll; + for (ObjFile *File : Symtab->ObjectFiles) { + for (InputSection *Section : File->CustomSections) { + StringRef Name = Section->getName(); + // These custom sections are known the linker and synthesized rather than + // blindly copied + if (Name == "linking" || Name == "name" || Name.startswith("reloc.")) + continue; + // .. or it is a debug section + if (StripDebug && Name.startswith(".debug_")) + continue; + CustomSectionMapping[Name].push_back(Section); + } } +} - for (const Symbol *Sym : SymbolExports) { - log("Export: " + Sym->getName()); - WasmExport Export; - Export.Name = Sym->getName(); - Export.Index = Sym->getOutputIndex(); - if (Sym->isFunction()) - Export.Kind = WASM_EXTERNAL_FUNCTION; - else - Export.Kind = WASM_EXTERNAL_GLOBAL; - writeExport(OS, Export); +void Writer::createCustomSections() { + log("createCustomSections"); + for (auto &Pair : CustomSectionMapping) { + StringRef Name = Pair.first(); + + auto P = CustomSectionSymbols.find(Name); + if (P != CustomSectionSymbols.end()) { + uint32_t SectionIndex = OutputSections.size(); + P->second->setOutputSectionIndex(SectionIndex); + } + + LLVM_DEBUG(dbgs() << "createCustomSection: " << Name << "\n"); + OutputSections.push_back(make<CustomSection>(Name, Pair.second)); } } -void Writer::createStartSection() {} - void Writer::createElemSection() { if (IndirectFunctions.empty()) return; @@ -324,25 +320,25 @@ void Writer::createElemSection() { writeUleb128(OS, 0, "table index"); WasmInitExpr InitExpr; InitExpr.Opcode = WASM_OPCODE_I32_CONST; - InitExpr.Value.Int32 = InitialTableOffset; + InitExpr.Value.Int32 = kInitialTableOffset; writeInitExpr(OS, InitExpr); writeUleb128(OS, IndirectFunctions.size(), "elem count"); - uint32_t TableIndex = InitialTableOffset; - for (const Symbol *Sym : IndirectFunctions) { + uint32_t TableIndex = kInitialTableOffset; + for (const FunctionSymbol *Sym : IndirectFunctions) { assert(Sym->getTableIndex() == TableIndex); - writeUleb128(OS, Sym->getOutputIndex(), "function index"); + writeUleb128(OS, Sym->getFunctionIndex(), "function index"); ++TableIndex; } } void Writer::createCodeSection() { - if (!NumFunctions) + if (InputFunctions.empty()) return; log("createCodeSection"); - auto Section = make<CodeSection>(NumFunctions, Symtab->ObjectFiles); + auto Section = make<CodeSection>(InputFunctions); OutputSections.push_back(Section); } @@ -361,28 +357,68 @@ void Writer::createRelocSections() { log("createRelocSections"); // Don't use iterator here since we are adding to OutputSection size_t OrigSize = OutputSections.size(); - for (size_t i = 0; i < OrigSize; i++) { - OutputSection *S = OutputSections[i]; - const char *name; - uint32_t Count = S->numRelocations(); + for (size_t I = 0; I < OrigSize; I++) { + OutputSection *OSec = OutputSections[I]; + uint32_t Count = OSec->numRelocations(); if (!Count) continue; - if (S->Type == WASM_SEC_DATA) - name = "reloc.DATA"; - else if (S->Type == WASM_SEC_CODE) - name = "reloc.CODE"; + StringRef Name; + if (OSec->Type == WASM_SEC_DATA) + Name = "reloc.DATA"; + else if (OSec->Type == WASM_SEC_CODE) + Name = "reloc.CODE"; + else if (OSec->Type == WASM_SEC_CUSTOM) + Name = Saver.save("reloc." + OSec->Name); else - llvm_unreachable("relocations only supported for code and data"); + llvm_unreachable( + "relocations only supported for code, data, or custom sections"); - SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, name); + SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, Name); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, S->Type, "reloc section"); + writeUleb128(OS, I, "reloc section"); writeUleb128(OS, Count, "reloc count"); - S->writeRelocations(OS); + OSec->writeRelocations(OS); } } +static uint32_t getWasmFlags(const Symbol *Sym) { + uint32_t Flags = 0; + if (Sym->isLocal()) + Flags |= WASM_SYMBOL_BINDING_LOCAL; + if (Sym->isWeak()) + Flags |= WASM_SYMBOL_BINDING_WEAK; + if (Sym->isHidden()) + Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN; + if (Sym->isUndefined()) + Flags |= WASM_SYMBOL_UNDEFINED; + return Flags; +} + +// Some synthetic sections (e.g. "name" and "linking") have subsections. +// Just like the synthetic sections themselves these need to be created before +// they can be written out (since they are preceded by their length). This +// class is used to create subsections and then write them into the stream +// of the parent section. +class SubSection { +public: + explicit SubSection(uint32_t Type) : Type(Type) {} + + void writeTo(raw_ostream &To) { + OS.flush(); + writeUleb128(To, Type, "subsection type"); + writeUleb128(To, Body.size(), "subsection size"); + To.write(Body.data(), Body.size()); + } + +private: + uint32_t Type; + std::string Body; + +public: + raw_string_ostream OS{Body}; +}; + // Create the custom "linking" section containing linker metadata. // This is only created when relocatable output is requested. void Writer::createLinkingSection() { @@ -390,82 +426,145 @@ void Writer::createLinkingSection() { createSyntheticSection(WASM_SEC_CUSTOM, "linking"); raw_ostream &OS = Section->getStream(); - SubSection DataSizeSubSection(WASM_DATA_SIZE); - writeUleb128(DataSizeSubSection.getStream(), DataSize, "data size"); - DataSizeSubSection.finalizeContents(); - DataSizeSubSection.writeToStream(OS); + writeUleb128(OS, WasmMetadataVersion, "Version"); + + if (!SymtabEntries.empty()) { + SubSection Sub(WASM_SYMBOL_TABLE); + writeUleb128(Sub.OS, SymtabEntries.size(), "num symbols"); + + for (const Symbol *Sym : SymtabEntries) { + assert(Sym->isDefined() || Sym->isUndefined()); + WasmSymbolType Kind = Sym->getWasmType(); + uint32_t Flags = getWasmFlags(Sym); + + writeU8(Sub.OS, Kind, "sym kind"); + writeUleb128(Sub.OS, Flags, "sym flags"); + + if (auto *F = dyn_cast<FunctionSymbol>(Sym)) { + writeUleb128(Sub.OS, F->getFunctionIndex(), "index"); + if (Sym->isDefined()) + writeStr(Sub.OS, Sym->getName(), "sym name"); + } else if (auto *G = dyn_cast<GlobalSymbol>(Sym)) { + writeUleb128(Sub.OS, G->getGlobalIndex(), "index"); + if (Sym->isDefined()) + writeStr(Sub.OS, Sym->getName(), "sym name"); + } else if (isa<DataSymbol>(Sym)) { + writeStr(Sub.OS, Sym->getName(), "sym name"); + if (auto *DataSym = dyn_cast<DefinedData>(Sym)) { + writeUleb128(Sub.OS, DataSym->getOutputSegmentIndex(), "index"); + writeUleb128(Sub.OS, DataSym->getOutputSegmentOffset(), + "data offset"); + writeUleb128(Sub.OS, DataSym->getSize(), "data size"); + } + } else { + auto *S = cast<SectionSymbol>(Sym); + writeUleb128(Sub.OS, S->getOutputSectionIndex(), "sym section index"); + } + } - if (!Config->Relocatable) - return; + Sub.writeTo(OS); + } if (Segments.size()) { - SubSection SubSection(WASM_SEGMENT_INFO); - writeUleb128(SubSection.getStream(), Segments.size(), "num data segments"); + SubSection Sub(WASM_SEGMENT_INFO); + writeUleb128(Sub.OS, Segments.size(), "num data segments"); for (const OutputSegment *S : Segments) { - writeStr(SubSection.getStream(), S->Name, "segment name"); - writeUleb128(SubSection.getStream(), S->Alignment, "alignment"); - writeUleb128(SubSection.getStream(), 0, "flags"); + writeStr(Sub.OS, S->Name, "segment name"); + writeUleb128(Sub.OS, S->Alignment, "alignment"); + writeUleb128(Sub.OS, 0, "flags"); } - SubSection.finalizeContents(); - SubSection.writeToStream(OS); + Sub.writeTo(OS); } - std::vector<WasmInitFunc> InitFunctions; - for (ObjFile *File : Symtab->ObjectFiles) { - const WasmLinkingData &L = File->getWasmObj()->linkingData(); - InitFunctions.reserve(InitFunctions.size() + L.InitFunctions.size()); - for (const WasmInitFunc &F : L.InitFunctions) - InitFunctions.emplace_back(WasmInitFunc{ - F.Priority, File->relocateFunctionIndex(F.FunctionIndex)}); + if (!InitFunctions.empty()) { + SubSection Sub(WASM_INIT_FUNCS); + writeUleb128(Sub.OS, InitFunctions.size(), "num init functions"); + for (const WasmInitEntry &F : InitFunctions) { + writeUleb128(Sub.OS, F.Priority, "priority"); + writeUleb128(Sub.OS, F.Sym->getOutputSymbolIndex(), "function index"); + } + Sub.writeTo(OS); } - if (!InitFunctions.empty()) { - SubSection SubSection(WASM_INIT_FUNCS); - writeUleb128(SubSection.getStream(), InitFunctions.size(), - "num init functionsw"); - for (const WasmInitFunc &F : InitFunctions) { - writeUleb128(SubSection.getStream(), F.Priority, "priority"); - writeUleb128(SubSection.getStream(), F.FunctionIndex, "function index"); + struct ComdatEntry { + unsigned Kind; + uint32_t Index; + }; + std::map<StringRef, std::vector<ComdatEntry>> Comdats; + + for (const InputFunction *F : InputFunctions) { + StringRef Comdat = F->getComdatName(); + if (!Comdat.empty()) + Comdats[Comdat].emplace_back( + ComdatEntry{WASM_COMDAT_FUNCTION, F->getFunctionIndex()}); + } + for (uint32_t I = 0; I < Segments.size(); ++I) { + const auto &InputSegments = Segments[I]->InputSegments; + if (InputSegments.empty()) + continue; + StringRef Comdat = InputSegments[0]->getComdatName(); +#ifndef NDEBUG + for (const InputSegment *IS : InputSegments) + assert(IS->getComdatName() == Comdat); +#endif + if (!Comdat.empty()) + Comdats[Comdat].emplace_back(ComdatEntry{WASM_COMDAT_DATA, I}); + } + + if (!Comdats.empty()) { + SubSection Sub(WASM_COMDAT_INFO); + writeUleb128(Sub.OS, Comdats.size(), "num comdats"); + for (const auto &C : Comdats) { + writeStr(Sub.OS, C.first, "comdat name"); + writeUleb128(Sub.OS, 0, "comdat flags"); // flags for future use + writeUleb128(Sub.OS, C.second.size(), "num entries"); + for (const ComdatEntry &Entry : C.second) { + writeU8(Sub.OS, Entry.Kind, "entry kind"); + writeUleb128(Sub.OS, Entry.Index, "entry index"); + } } - SubSection.finalizeContents(); - SubSection.writeToStream(OS); + Sub.writeTo(OS); } } // Create the custom "name" section containing debug symbol names. void Writer::createNameSection() { - // Create an array of all function sorted by function index space - std::vector<const Symbol *> Names; + unsigned NumNames = NumImportedFunctions; + for (const InputFunction *F : InputFunctions) + if (!F->getName().empty() || !F->getDebugName().empty()) + ++NumNames; - for (ObjFile *File : Symtab->ObjectFiles) { - Names.reserve(Names.size() + File->getSymbols().size()); - for (Symbol *S : File->getSymbols()) { - if (!S->isFunction() || S->isWeak() || S->WrittenToNameSec) - continue; - S->WrittenToNameSec = true; - Names.emplace_back(S); - } - } + if (NumNames == 0) + return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, "name"); - std::sort(Names.begin(), Names.end(), [](const Symbol *A, const Symbol *B) { - return A->getOutputIndex() < B->getOutputIndex(); - }); - - SubSection FunctionSubsection(WASM_NAMES_FUNCTION); - raw_ostream &OS = FunctionSubsection.getStream(); - writeUleb128(OS, Names.size(), "name count"); - - // We have to iterate through the inputs twice so that all the imports - // appear first before any of the local function names. - for (const Symbol *S : Names) { - writeUleb128(OS, S->getOutputIndex(), "func index"); - writeStr(OS, S->getName(), "symbol name"); + SubSection Sub(WASM_NAMES_FUNCTION); + writeUleb128(Sub.OS, NumNames, "name count"); + + // Names must appear in function index order. As it happens ImportedSymbols + // and InputFunctions are numbered in order with imported functions coming + // first. + for (const Symbol *S : ImportedSymbols) { + if (auto *F = dyn_cast<FunctionSymbol>(S)) { + writeUleb128(Sub.OS, F->getFunctionIndex(), "func index"); + Optional<std::string> Name = demangleItanium(F->getName()); + writeStr(Sub.OS, Name ? StringRef(*Name) : F->getName(), "symbol name"); + } + } + for (const InputFunction *F : InputFunctions) { + if (!F->getName().empty()) { + writeUleb128(Sub.OS, F->getFunctionIndex(), "func index"); + if (!F->getDebugName().empty()) { + writeStr(Sub.OS, F->getDebugName(), "symbol name"); + } else { + Optional<std::string> Name = demangleItanium(F->getName()); + writeStr(Sub.OS, Name ? StringRef(*Name) : F->getName(), "symbol name"); + } + } } - FunctionSubsection.finalizeContents(); - FunctionSubsection.writeToStream(Section->getStream()); + Sub.writeTo(Section->getStream()); } void Writer::writeHeader() { @@ -479,48 +578,98 @@ void Writer::writeSections() { // Fix the memory layout of the output binary. This assigns memory offsets // to each of the input data sections as well as the explicit stack region. +// The default memory layout is as follows, from low to high. +// +// - initialized data (starting at Config->GlobalBase) +// - BSS data (not currently implemented in llvm) +// - explicit stack (Config->ZStackSize) +// - heap start / unallocated +// +// The --stack-first option means that stack is placed before any static data. +// This can be useful since it means that stack overflow traps immediately rather +// than overwriting global data, but also increases code size since all static +// data loads and stores requires larger offsets. void Writer::layoutMemory() { + createOutputSegments(); + uint32_t MemoryPtr = 0; - if (!Config->Relocatable) { + + auto PlaceStack = [&]() { + if (Config->Relocatable) + return; + MemoryPtr = alignTo(MemoryPtr, kStackAlignment); + if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment)) + error("stack size must be " + Twine(kStackAlignment) + "-byte aligned"); + log("mem: stack size = " + Twine(Config->ZStackSize)); + log("mem: stack base = " + Twine(MemoryPtr)); + MemoryPtr += Config->ZStackSize; + WasmSym::StackPointer->Global->Global.InitExpr.Value.Int32 = MemoryPtr; + log("mem: stack top = " + Twine(MemoryPtr)); + }; + + if (Config->StackFirst) { + PlaceStack(); + } else { MemoryPtr = Config->GlobalBase; - debugPrint("mem: global base = %d\n", Config->GlobalBase); + log("mem: global base = " + Twine(Config->GlobalBase)); } - createOutputSegments(); + uint32_t DataStart = MemoryPtr; + + // Arbitrarily set __dso_handle handle to point to the start of the data + // segments. + if (WasmSym::DsoHandle) + WasmSym::DsoHandle->setVirtualAddress(DataStart); - // Static data comes first for (OutputSegment *Seg : Segments) { MemoryPtr = alignTo(MemoryPtr, Seg->Alignment); Seg->StartVA = MemoryPtr; - debugPrint("mem: %-10s offset=%-8d size=%-4d align=%d\n", - Seg->Name.str().c_str(), MemoryPtr, Seg->Size, Seg->Alignment); + log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", Seg->Name, + MemoryPtr, Seg->Size, Seg->Alignment)); MemoryPtr += Seg->Size; } - DataSize = MemoryPtr; - if (!Config->Relocatable) - DataSize -= Config->GlobalBase; - debugPrint("mem: static data = %d\n", DataSize); + // TODO: Add .bss space here. + if (WasmSym::DataEnd) + WasmSym::DataEnd->setVirtualAddress(MemoryPtr); + + log("mem: static data = " + Twine(MemoryPtr - DataStart)); - // Stack comes after static data + if (!Config->StackFirst) + PlaceStack(); + + // Set `__heap_base` to directly follow the end of the stack or global data. + // The fact that this comes last means that a malloc/brk implementation + // can grow the heap at runtime. if (!Config->Relocatable) { - MemoryPtr = alignTo(MemoryPtr, kStackAlignment); - if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment)) - error("stack size must be " + Twine(kStackAlignment) + "-byte aligned"); - debugPrint("mem: stack size = %d\n", Config->ZStackSize); - debugPrint("mem: stack base = %d\n", MemoryPtr); - MemoryPtr += Config->ZStackSize; - Config->StackPointerSymbol->setVirtualAddress(MemoryPtr); - debugPrint("mem: stack top = %d\n", MemoryPtr); + WasmSym::HeapBase->setVirtualAddress(MemoryPtr); + log("mem: heap base = " + Twine(MemoryPtr)); } + if (Config->InitialMemory != 0) { + if (Config->InitialMemory != alignTo(Config->InitialMemory, WasmPageSize)) + error("initial memory must be " + Twine(WasmPageSize) + "-byte aligned"); + if (MemoryPtr > Config->InitialMemory) + error("initial memory too small, " + Twine(MemoryPtr) + " bytes needed"); + else + MemoryPtr = Config->InitialMemory; + } uint32_t MemSize = alignTo(MemoryPtr, WasmPageSize); NumMemoryPages = MemSize / WasmPageSize; - debugPrint("mem: total pages = %d\n", NumMemoryPages); + log("mem: total pages = " + Twine(NumMemoryPages)); + + if (Config->MaxMemory != 0) { + if (Config->MaxMemory != alignTo(Config->MaxMemory, WasmPageSize)) + error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned"); + if (MemoryPtr > Config->MaxMemory) + error("maximum memory too small, " + Twine(MemoryPtr) + " bytes needed"); + MaxMemoryPages = Config->MaxMemory / WasmPageSize; + log("mem: max pages = " + Twine(MaxMemoryPages)); + } } SyntheticSection *Writer::createSyntheticSection(uint32_t Type, - std::string Name) { + StringRef Name) { auto Sec = make<SyntheticSection>(Type, Name); log("createSection: " + toString(*Sec)); OutputSections.push_back(Sec); @@ -536,15 +685,16 @@ void Writer::createSections() { createMemorySection(); createGlobalSection(); createExportSection(); - createStartSection(); createElemSection(); createCodeSection(); createDataSection(); + createCustomSections(); // Custom sections - if (Config->EmitRelocs) + if (Config->Relocatable) { + createLinkingSection(); createRelocSections(); - createLinkingSection(); + } if (!Config->StripDebug && !Config->StripAll) createNameSection(); @@ -555,149 +705,336 @@ void Writer::createSections() { } } -void Writer::calculateOffsets() { - for (ObjFile *File : Symtab->ObjectFiles) { - const WasmObjectFile *WasmFile = File->getWasmObj(); - - // Function Index - File->FunctionIndexOffset = - FunctionImports.size() - File->NumFunctionImports() + NumFunctions; - NumFunctions += WasmFile->functions().size(); +void Writer::calculateImports() { + for (Symbol *Sym : Symtab->getSymbols()) { + if (!Sym->isUndefined()) + continue; + if (isa<DataSymbol>(Sym)) + continue; + if (Sym->isWeak() && !Config->Relocatable) + continue; + if (!Sym->isLive()) + continue; + if (!Sym->IsUsedInRegularObj) + continue; - // Memory - if (WasmFile->memories().size() > 1) - fatal(File->getName() + ": contains more than one memory"); + LLVM_DEBUG(dbgs() << "import: " << Sym->getName() << "\n"); + ImportedSymbols.emplace_back(Sym); + if (auto *F = dyn_cast<FunctionSymbol>(Sym)) + F->setFunctionIndex(NumImportedFunctions++); + else + cast<GlobalSymbol>(Sym)->setGlobalIndex(NumImportedGlobals++); } } -void Writer::calculateImports() { +void Writer::calculateExports() { + if (Config->Relocatable) + return; + + if (!Config->Relocatable && !Config->ImportMemory) + Exports.push_back(WasmExport{"memory", WASM_EXTERNAL_MEMORY, 0}); + + if (!Config->Relocatable && Config->ExportTable) + Exports.push_back(WasmExport{kFunctionTableName, WASM_EXTERNAL_TABLE, 0}); + + unsigned FakeGlobalIndex = NumImportedGlobals + InputGlobals.size(); + for (Symbol *Sym : Symtab->getSymbols()) { - if (!Sym->isUndefined() || Sym->isWeak()) + if (!Sym->isExported()) + continue; + if (!Sym->isLive()) continue; - if (Sym->isFunction()) { - Sym->setOutputIndex(FunctionImports.size()); - FunctionImports.push_back(Sym); + StringRef Name = Sym->getName(); + WasmExport Export; + if (auto *F = dyn_cast<DefinedFunction>(Sym)) { + Export = {Name, WASM_EXTERNAL_FUNCTION, F->getFunctionIndex()}; + } else if (auto *G = dyn_cast<DefinedGlobal>(Sym)) { + // TODO(sbc): Remove this check once to mutable global proposal is + // implement in all major browsers. + // See: https://github.com/WebAssembly/mutable-global + if (G->getGlobalType()->Mutable) { + // Only the __stack_pointer should ever be create as mutable. + assert(G == WasmSym::StackPointer); + continue; + } + Export = {Name, WASM_EXTERNAL_GLOBAL, G->getGlobalIndex()}; } else { - Sym->setOutputIndex(GlobalImports.size()); - GlobalImports.push_back(Sym); + auto *D = cast<DefinedData>(Sym); + DefinedFakeGlobals.emplace_back(D); + Export = {Name, WASM_EXTERNAL_GLOBAL, FakeGlobalIndex++}; } + + LLVM_DEBUG(dbgs() << "Export: " << Name << "\n"); + Exports.push_back(Export); + } +} + +void Writer::assignSymtab() { + if (!Config->Relocatable) + return; + + StringMap<uint32_t> SectionSymbolIndices; + + unsigned SymbolIndex = SymtabEntries.size(); + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Symtab entries: " << File->getName() << "\n"); + for (Symbol *Sym : File->getSymbols()) { + if (Sym->getFile() != File) + continue; + + if (auto *S = dyn_cast<SectionSymbol>(Sym)) { + StringRef Name = S->getName(); + if (CustomSectionMapping.count(Name) == 0) + continue; + + auto SSI = SectionSymbolIndices.find(Name); + if (SSI != SectionSymbolIndices.end()) { + Sym->setOutputSymbolIndex(SSI->second); + continue; + } + + SectionSymbolIndices[Name] = SymbolIndex; + CustomSectionSymbols[Name] = cast<SectionSymbol>(Sym); + + Sym->markLive(); + } + + // (Since this is relocatable output, GC is not performed so symbols must + // be live.) + assert(Sym->isLive()); + Sym->setOutputSymbolIndex(SymbolIndex++); + SymtabEntries.emplace_back(Sym); + } + } + + // For the moment, relocatable output doesn't contain any synthetic functions, + // so no need to look through the Symtab for symbols not referenced by + // Symtab->ObjectFiles. +} + +uint32_t Writer::lookupType(const WasmSignature &Sig) { + auto It = TypeIndices.find(Sig); + if (It == TypeIndices.end()) { + error("type not found: " + toString(Sig)); + return 0; } + return It->second; } -uint32_t Writer::getTypeIndex(const WasmSignature &Sig) { +uint32_t Writer::registerType(const WasmSignature &Sig) { auto Pair = TypeIndices.insert(std::make_pair(Sig, Types.size())); - if (Pair.second) + if (Pair.second) { + LLVM_DEBUG(dbgs() << "type " << toString(Sig) << "\n"); Types.push_back(&Sig); + } return Pair.first->second; } void Writer::calculateTypes() { + // The output type section is the union of the following sets: + // 1. Any signature used in the TYPE relocation + // 2. The signatures of all imported functions + // 3. The signatures of all defined functions + for (ObjFile *File : Symtab->ObjectFiles) { - File->TypeMap.reserve(File->getWasmObj()->types().size()); - for (const WasmSignature &Sig : File->getWasmObj()->types()) - File->TypeMap.push_back(getTypeIndex(Sig)); + ArrayRef<WasmSignature> Types = File->getWasmObj()->types(); + for (uint32_t I = 0; I < Types.size(); I++) + if (File->TypeIsUsed[I]) + File->TypeMap[I] = registerType(Types[I]); } -} -void Writer::assignSymbolIndexes() { - uint32_t GlobalIndex = GlobalImports.size(); + for (const Symbol *Sym : ImportedSymbols) + if (auto *F = dyn_cast<FunctionSymbol>(Sym)) + registerType(*F->FunctionType); - if (Config->StackPointerSymbol) { - DefinedGlobals.emplace_back(Config->StackPointerSymbol); - Config->StackPointerSymbol->setOutputIndex(GlobalIndex++); - } + for (const InputFunction *F : InputFunctions) + registerType(F->Signature); +} - if (Config->EmitRelocs) - DefinedGlobals.reserve(Symtab->getSymbols().size()); +void Writer::assignIndexes() { + uint32_t FunctionIndex = NumImportedFunctions + InputFunctions.size(); + auto AddDefinedFunction = [&](InputFunction *Func) { + if (!Func->Live) + return; + InputFunctions.emplace_back(Func); + Func->setFunctionIndex(FunctionIndex++); + }; - uint32_t TableIndex = InitialTableOffset; + for (InputFunction *Func : Symtab->SyntheticFunctions) + AddDefinedFunction(Func); for (ObjFile *File : Symtab->ObjectFiles) { - DEBUG(dbgs() << "assignSymbolIndexes: " << File->getName() << "\n"); - - for (Symbol *Sym : File->getSymbols()) { - // Assign indexes for symbols defined with this file. - if (!Sym->isDefined() || File != Sym->getFile()) - continue; - if (Sym->isFunction()) { - auto *Obj = cast<ObjFile>(Sym->getFile()); - Sym->setOutputIndex(Obj->FunctionIndexOffset + - Sym->getFunctionIndex()); - } else if (Config->EmitRelocs) { - DefinedGlobals.emplace_back(Sym); - Sym->setOutputIndex(GlobalIndex++); - } - } + LLVM_DEBUG(dbgs() << "Functions: " << File->getName() << "\n"); + for (InputFunction *Func : File->Functions) + AddDefinedFunction(Func); + } - for (Symbol *Sym : File->getTableSymbols()) { - if (!Sym->hasTableIndex()) { + uint32_t TableIndex = kInitialTableOffset; + auto HandleRelocs = [&](InputChunk *Chunk) { + if (!Chunk->Live) + return; + ObjFile *File = Chunk->File; + ArrayRef<WasmSignature> Types = File->getWasmObj()->types(); + for (const WasmRelocation &Reloc : Chunk->getRelocations()) { + if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32 || + Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB) { + FunctionSymbol *Sym = File->getFunctionSymbol(Reloc.Index); + if (Sym->hasTableIndex() || !Sym->hasFunctionIndex()) + continue; Sym->setTableIndex(TableIndex++); IndirectFunctions.emplace_back(Sym); + } else if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { + // Mark target type as live + File->TypeMap[Reloc.Index] = registerType(Types[Reloc.Index]); + File->TypeIsUsed[Reloc.Index] = true; } } + }; + + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Handle relocs: " << File->getName() << "\n"); + for (InputChunk *Chunk : File->Functions) + HandleRelocs(Chunk); + for (InputChunk *Chunk : File->Segments) + HandleRelocs(Chunk); + for (auto &P : File->CustomSections) + HandleRelocs(P); + } + + uint32_t GlobalIndex = NumImportedGlobals + InputGlobals.size(); + auto AddDefinedGlobal = [&](InputGlobal *Global) { + if (Global->Live) { + LLVM_DEBUG(dbgs() << "AddDefinedGlobal: " << GlobalIndex << "\n"); + Global->setGlobalIndex(GlobalIndex++); + InputGlobals.push_back(Global); + } + }; + + for (InputGlobal *Global : Symtab->SyntheticGlobals) + AddDefinedGlobal(Global); + + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Globals: " << File->getName() << "\n"); + for (InputGlobal *Global : File->Globals) + AddDefinedGlobal(Global); } } static StringRef getOutputDataSegmentName(StringRef Name) { - if (Config->Relocatable) + if (!Config->MergeDataSegments) return Name; - - for (StringRef V : - {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", - ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", - ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) { - StringRef Prefix = V.drop_back(); - if (Name.startswith(V) || Name == Prefix) - return Prefix; - } - + if (Name.startswith(".text.")) + return ".text"; + if (Name.startswith(".data.")) + return ".data"; + if (Name.startswith(".bss.")) + return ".bss"; return Name; } void Writer::createOutputSegments() { for (ObjFile *File : Symtab->ObjectFiles) { for (InputSegment *Segment : File->Segments) { + if (!Segment->Live) + continue; StringRef Name = getOutputDataSegmentName(Segment->getName()); OutputSegment *&S = SegmentMap[Name]; if (S == nullptr) { - DEBUG(dbgs() << "new segment: " << Name << "\n"); - S = make<OutputSegment>(Name); + LLVM_DEBUG(dbgs() << "new segment: " << Name << "\n"); + S = make<OutputSegment>(Name, Segments.size()); Segments.push_back(S); } S->addInputSegment(Segment); - DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + LLVM_DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); } } } +static const int OPCODE_CALL = 0x10; +static const int OPCODE_END = 0xb; + +// Create synthetic "__wasm_call_ctors" function based on ctor functions +// in input object. +void Writer::createCtorFunction() { + // First write the body's contents to a string. + std::string BodyContent; + { + raw_string_ostream OS(BodyContent); + writeUleb128(OS, 0, "num locals"); + for (const WasmInitEntry &F : InitFunctions) { + writeU8(OS, OPCODE_CALL, "CALL"); + writeUleb128(OS, F.Sym->getFunctionIndex(), "function index"); + } + writeU8(OS, OPCODE_END, "END"); + } + + // Once we know the size of the body we can create the final function body + std::string FunctionBody; + { + raw_string_ostream OS(FunctionBody); + writeUleb128(OS, BodyContent.size(), "function size"); + OS << BodyContent; + } + + ArrayRef<uint8_t> Body = toArrayRef(Saver.save(FunctionBody)); + cast<SyntheticFunction>(WasmSym::CallCtors->Function)->setBody(Body); +} + +// Populate InitFunctions vector with init functions from all input objects. +// This is then used either when creating the output linking section or to +// synthesize the "__wasm_call_ctors" function. +void Writer::calculateInitFunctions() { + for (ObjFile *File : Symtab->ObjectFiles) { + const WasmLinkingData &L = File->getWasmObj()->linkingData(); + for (const WasmInitFunc &F : L.InitFunctions) { + FunctionSymbol *Sym = File->getFunctionSymbol(F.Symbol); + if (*Sym->FunctionType != WasmSignature{{}, WASM_TYPE_NORESULT}) + error("invalid signature for init func: " + toString(*Sym)); + InitFunctions.emplace_back(WasmInitEntry{Sym, F.Priority}); + } + } + + // Sort in order of priority (lowest first) so that they are called + // in the correct order. + std::stable_sort(InitFunctions.begin(), InitFunctions.end(), + [](const WasmInitEntry &L, const WasmInitEntry &R) { + return L.Priority < R.Priority; + }); +} + void Writer::run() { - if (!Config->Relocatable) - InitialTableOffset = 1; + if (Config->Relocatable) + Config->GlobalBase = 0; - log("-- calculateTypes"); - calculateTypes(); log("-- calculateImports"); calculateImports(); - log("-- calculateOffsets"); - calculateOffsets(); + log("-- assignIndexes"); + assignIndexes(); + log("-- calculateInitFunctions"); + calculateInitFunctions(); + if (!Config->Relocatable) + createCtorFunction(); + log("-- calculateTypes"); + calculateTypes(); + log("-- layoutMemory"); + layoutMemory(); + log("-- calculateExports"); + calculateExports(); + log("-- calculateCustomSections"); + calculateCustomSections(); + log("-- assignSymtab"); + assignSymtab(); if (errorHandler().Verbose) { - log("Defined Functions: " + Twine(NumFunctions)); - log("Defined Globals : " + Twine(DefinedGlobals.size())); - log("Function Imports : " + Twine(FunctionImports.size())); - log("Global Imports : " + Twine(GlobalImports.size())); - log("Total Imports : " + - Twine(FunctionImports.size() + GlobalImports.size())); + log("Defined Functions: " + Twine(InputFunctions.size())); + log("Defined Globals : " + Twine(InputGlobals.size())); + log("Function Imports : " + Twine(NumImportedFunctions)); + log("Global Imports : " + Twine(NumImportedGlobals)); for (ObjFile *File : Symtab->ObjectFiles) File->dumpInfo(); } - log("-- assignSymbolIndexes"); - assignSymbolIndexes(); - log("-- layoutMemory"); - layoutMemory(); - createHeader(); log("-- createSections"); createSections(); @@ -721,7 +1058,6 @@ void Writer::run() { // Open a result file. void Writer::openFile() { log("writing: " + Config->OutputFile); - ::remove(Config->OutputFile.str().c_str()); Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = FileOutputBuffer::create(Config->OutputFile, FileSize, diff --git a/wasm/WriterUtils.cpp b/wasm/WriterUtils.cpp index 5bdf0d2e3f65..201529edeaa6 100644 --- a/wasm/WriterUtils.cpp +++ b/wasm/WriterUtils.cpp @@ -8,12 +8,9 @@ //===----------------------------------------------------------------------===// #include "WriterUtils.h" - #include "lld/Common/ErrorHandler.h" - #include "llvm/Support/Debug.h" #include "llvm/Support/EndianStream.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/LEB128.h" #define DEBUG_TYPE "lld" @@ -22,7 +19,7 @@ using namespace llvm; using namespace llvm::wasm; using namespace lld::wasm; -static const char *valueTypeToString(int32_t Type) { +static const char *valueTypeToString(uint8_t Type) { switch (Type) { case WASM_TYPE_I32: return "i32"; @@ -39,61 +36,57 @@ static const char *valueTypeToString(int32_t Type) { namespace lld { -void wasm::debugWrite(uint64_t offset, Twine msg) { - DEBUG(dbgs() << format(" | %08" PRIx64 ": ", offset) << msg << "\n"); +void wasm::debugWrite(uint64_t Offset, const Twine &Msg) { + LLVM_DEBUG(dbgs() << format(" | %08lld: ", Offset) << Msg << "\n"); } -void wasm::writeUleb128(raw_ostream &OS, uint32_t Number, const char *msg) { - if (msg) - debugWrite(OS.tell(), msg + formatv(" [{0:x}]", Number)); +void wasm::writeUleb128(raw_ostream &OS, uint32_t Number, const Twine &Msg) { + debugWrite(OS.tell(), Msg + "[" + utohexstr(Number) + "]"); encodeULEB128(Number, OS); } -void wasm::writeSleb128(raw_ostream &OS, int32_t Number, const char *msg) { - if (msg) - debugWrite(OS.tell(), msg + formatv(" [{0:x}]", Number)); +void wasm::writeSleb128(raw_ostream &OS, int32_t Number, const Twine &Msg) { + debugWrite(OS.tell(), Msg + "[" + utohexstr(Number) + "]"); encodeSLEB128(Number, OS); } -void wasm::writeBytes(raw_ostream &OS, const char *bytes, size_t count, - const char *msg) { - if (msg) - debugWrite(OS.tell(), msg + formatv(" [data[{0}]]", count)); - OS.write(bytes, count); +void wasm::writeBytes(raw_ostream &OS, const char *Bytes, size_t Count, + const Twine &Msg) { + debugWrite(OS.tell(), Msg + " [data[" + Twine(Count) + "]]"); + OS.write(Bytes, Count); } -void wasm::writeStr(raw_ostream &OS, const StringRef String, const char *msg) { - if (msg) - debugWrite(OS.tell(), - msg + formatv(" [str[{0}]: {1}]", String.size(), String)); - writeUleb128(OS, String.size(), nullptr); - writeBytes(OS, String.data(), String.size()); +void wasm::writeStr(raw_ostream &OS, StringRef String, const Twine &Msg) { + debugWrite(OS.tell(), + Msg + " [str[" + Twine(String.size()) + "]: " + String + "]"); + encodeULEB128(String.size(), OS); + OS.write(String.data(), String.size()); } -void wasm::writeU8(raw_ostream &OS, uint8_t byte, const char *msg) { - OS << byte; +void wasm::writeU8(raw_ostream &OS, uint8_t Byte, const Twine &Msg) { + debugWrite(OS.tell(), Msg + " [0x" + utohexstr(Byte) + "]"); + OS << Byte; } -void wasm::writeU32(raw_ostream &OS, uint32_t Number, const char *msg) { - debugWrite(OS.tell(), msg + formatv("[{0:x}]", Number)); - support::endian::Writer<support::little>(OS).write(Number); +void wasm::writeU32(raw_ostream &OS, uint32_t Number, const Twine &Msg) { + debugWrite(OS.tell(), Msg + "[0x" + utohexstr(Number) + "]"); + support::endian::write(OS, Number, support::little); } -void wasm::writeValueType(raw_ostream &OS, int32_t Type, const char *msg) { - debugWrite(OS.tell(), msg + formatv("[type: {0}]", valueTypeToString(Type))); - writeSleb128(OS, Type, nullptr); +void wasm::writeValueType(raw_ostream &OS, uint8_t Type, const Twine &Msg) { + writeU8(OS, Type, Msg + "[type: " + valueTypeToString(Type) + "]"); } void wasm::writeSig(raw_ostream &OS, const WasmSignature &Sig) { - writeSleb128(OS, WASM_TYPE_FUNC, "signature type"); - writeUleb128(OS, Sig.ParamTypes.size(), "param count"); - for (int32_t ParamType : Sig.ParamTypes) { + writeU8(OS, WASM_TYPE_FUNC, "signature type"); + writeUleb128(OS, Sig.ParamTypes.size(), "param Count"); + for (uint8_t ParamType : Sig.ParamTypes) { writeValueType(OS, ParamType, "param type"); } if (Sig.ReturnType == WASM_TYPE_NORESULT) { - writeUleb128(OS, 0, "result count"); + writeUleb128(OS, 0, "result Count"); } else { - writeUleb128(OS, 1, "result count"); + writeUleb128(OS, 1, "result Count"); writeValueType(OS, Sig.ReturnType, "result type"); } } @@ -117,18 +110,27 @@ void wasm::writeInitExpr(raw_ostream &OS, const WasmInitExpr &InitExpr) { } void wasm::writeLimits(raw_ostream &OS, const WasmLimits &Limits) { - writeUleb128(OS, Limits.Flags, "limits flags"); + writeU8(OS, Limits.Flags, "limits flags"); writeUleb128(OS, Limits.Initial, "limits initial"); if (Limits.Flags & WASM_LIMITS_FLAG_HAS_MAX) writeUleb128(OS, Limits.Maximum, "limits max"); } +void wasm::writeGlobalType(raw_ostream &OS, const WasmGlobalType &Type) { + writeValueType(OS, Type.Type, "global type"); + writeU8(OS, Type.Mutable, "global mutable"); +} + void wasm::writeGlobal(raw_ostream &OS, const WasmGlobal &Global) { - writeValueType(OS, Global.Type, "global type"); - writeUleb128(OS, Global.Mutable, "global mutable"); + writeGlobalType(OS, Global.Type); writeInitExpr(OS, Global.InitExpr); } +void wasm::writeTableType(raw_ostream &OS, const llvm::wasm::WasmTable &Type) { + writeU8(OS, WASM_TYPE_ANYFUNC, "table type"); + writeLimits(OS, Type.Limits); +} + void wasm::writeImport(raw_ostream &OS, const WasmImport &Import) { writeStr(OS, Import.Module, "import module name"); writeStr(OS, Import.Field, "import field name"); @@ -138,12 +140,14 @@ void wasm::writeImport(raw_ostream &OS, const WasmImport &Import) { writeUleb128(OS, Import.SigIndex, "import sig index"); break; case WASM_EXTERNAL_GLOBAL: - writeValueType(OS, Import.Global.Type, "import global type"); - writeUleb128(OS, Import.Global.Mutable, "import global mutable"); + writeGlobalType(OS, Import.Global); break; case WASM_EXTERNAL_MEMORY: writeLimits(OS, Import.Memory); break; + case WASM_EXTERNAL_TABLE: + writeTableType(OS, Import.Table); + break; default: fatal("unsupported import type: " + Twine(Import.Kind)); } @@ -162,27 +166,13 @@ void wasm::writeExport(raw_ostream &OS, const WasmExport &Export) { case WASM_EXTERNAL_MEMORY: writeUleb128(OS, Export.Index, "memory index"); break; - default: - fatal("unsupported export type: " + Twine(Export.Kind)); - } -} - -void wasm::writeReloc(raw_ostream &OS, const OutputRelocation &Reloc) { - writeUleb128(OS, Reloc.Reloc.Type, "reloc type"); - writeUleb128(OS, Reloc.Reloc.Offset, "reloc offset"); - writeUleb128(OS, Reloc.NewIndex, "reloc index"); - - switch (Reloc.Reloc.Type) { - case R_WEBASSEMBLY_MEMORY_ADDR_LEB: - case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: - case R_WEBASSEMBLY_MEMORY_ADDR_I32: - writeUleb128(OS, Reloc.Reloc.Addend, "reloc addend"); + case WASM_EXTERNAL_TABLE: + writeUleb128(OS, Export.Index, "table index"); break; default: - break; + fatal("unsupported export type: " + Twine(Export.Kind)); } } - } // namespace lld std::string lld::toString(ValType Type) { @@ -195,6 +185,8 @@ std::string lld::toString(ValType Type) { return "F32"; case ValType::F64: return "F64"; + case ValType::EXCEPT_REF: + return "except_ref"; } llvm_unreachable("Invalid wasm::ValType"); } @@ -213,3 +205,8 @@ std::string lld::toString(const WasmSignature &Sig) { S += toString(static_cast<ValType>(Sig.ReturnType)); return S.str(); } + +std::string lld::toString(const WasmGlobalType &Sig) { + return (Sig.Mutable ? "var " : "const ") + + toString(static_cast<ValType>(Sig.Type)); +} diff --git a/wasm/WriterUtils.h b/wasm/WriterUtils.h index c1ed90793f78..74d727b24b40 100644 --- a/wasm/WriterUtils.h +++ b/wasm/WriterUtils.h @@ -10,49 +10,32 @@ #ifndef LLD_WASM_WRITERUTILS_H #define LLD_WASM_WRITERUTILS_H +#include "lld/Common/LLVM.h" #include "llvm/ADT/Twine.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/raw_ostream.h" using llvm::raw_ostream; -// Needed for WasmSignatureDenseMapInfo -inline bool operator==(const llvm::wasm::WasmSignature &LHS, - const llvm::wasm::WasmSignature &RHS) { - return LHS.ReturnType == RHS.ReturnType && LHS.ParamTypes == RHS.ParamTypes; -} - -inline bool operator!=(const llvm::wasm::WasmSignature &LHS, - const llvm::wasm::WasmSignature &RHS) { - return !(LHS == RHS); -} - namespace lld { namespace wasm { -struct OutputRelocation { - llvm::wasm::WasmRelocation Reloc; - uint32_t NewIndex; - uint32_t Value; -}; - -void debugWrite(uint64_t offset, llvm::Twine msg); +void debugWrite(uint64_t Offset, const Twine &Msg); -void writeUleb128(raw_ostream &OS, uint32_t Number, const char *msg); +void writeUleb128(raw_ostream &OS, uint32_t Number, const Twine &Msg); -void writeSleb128(raw_ostream &OS, int32_t Number, const char *msg); +void writeSleb128(raw_ostream &OS, int32_t Number, const Twine &Msg); -void writeBytes(raw_ostream &OS, const char *bytes, size_t count, - const char *msg = nullptr); +void writeBytes(raw_ostream &OS, const char *Bytes, size_t count, + const Twine &Msg); -void writeStr(raw_ostream &OS, const llvm::StringRef String, - const char *msg = nullptr); +void writeStr(raw_ostream &OS, StringRef String, const Twine &Msg); -void writeU8(raw_ostream &OS, uint8_t byte, const char *msg); +void writeU8(raw_ostream &OS, uint8_t byte, const Twine &Msg); -void writeU32(raw_ostream &OS, uint32_t Number, const char *msg); +void writeU32(raw_ostream &OS, uint32_t Number, const Twine &Msg); -void writeValueType(raw_ostream &OS, int32_t Type, const char *msg); +void writeValueType(raw_ostream &OS, uint8_t Type, const Twine &Msg); void writeSig(raw_ostream &OS, const llvm::wasm::WasmSignature &Sig); @@ -60,18 +43,21 @@ void writeInitExpr(raw_ostream &OS, const llvm::wasm::WasmInitExpr &InitExpr); void writeLimits(raw_ostream &OS, const llvm::wasm::WasmLimits &Limits); +void writeGlobalType(raw_ostream &OS, const llvm::wasm::WasmGlobalType &Type); + void writeGlobal(raw_ostream &OS, const llvm::wasm::WasmGlobal &Global); +void writeTableType(raw_ostream &OS, const llvm::wasm::WasmTable &Type); + void writeImport(raw_ostream &OS, const llvm::wasm::WasmImport &Import); void writeExport(raw_ostream &OS, const llvm::wasm::WasmExport &Export); -void writeReloc(raw_ostream &OS, const OutputRelocation &Reloc); - } // namespace wasm -std::string toString(const llvm::wasm::ValType Type); +std::string toString(llvm::wasm::ValType Type); std::string toString(const llvm::wasm::WasmSignature &Sig); +std::string toString(const llvm::wasm::WasmGlobalType &Sig); } // namespace lld |