summaryrefslogtreecommitdiff
path: root/wasm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 11:08:33 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 11:08:33 +0000
commit20d35e67e67f106f617c939725101223211659f0 (patch)
tree64eb963cbf5ba58765e0a6b64a440965d66a7a4d /wasm
parentae1a339de31cf4065777531959a11e55a2e5fa00 (diff)
Notes
Diffstat (limited to 'wasm')
-rw-r--r--wasm/CMakeLists.txt16
-rw-r--r--wasm/Config.h24
-rw-r--r--wasm/Driver.cpp374
-rw-r--r--wasm/InputChunks.cpp295
-rw-r--r--wasm/InputChunks.h236
-rw-r--r--wasm/InputFiles.cpp462
-rw-r--r--wasm/InputFiles.h95
-rw-r--r--wasm/InputGlobal.h59
-rw-r--r--wasm/InputSegment.cpp25
-rw-r--r--wasm/InputSegment.h76
-rw-r--r--wasm/LTO.cpp155
-rw-r--r--wasm/LTO.h57
-rw-r--r--wasm/MarkLive.cpp118
-rw-r--r--wasm/MarkLive.h21
-rw-r--r--wasm/Options.td119
-rw-r--r--wasm/OutputSections.cpp287
-rw-r--r--wasm/OutputSections.h50
-rw-r--r--wasm/OutputSegment.h28
-rw-r--r--wasm/SymbolTable.cpp390
-rw-r--r--wasm/SymbolTable.h43
-rw-r--r--wasm/Symbols.cpp240
-rw-r--r--wasm/Symbols.h337
-rw-r--r--wasm/Writer.cpp954
-rw-r--r--wasm/WriterUtils.cpp113
-rw-r--r--wasm/WriterUtils.h46
25 files changed, 3308 insertions, 1312 deletions
diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt
index 19b0d168437c..1a9e09b38429 100644
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -2,10 +2,16 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
tablegen(LLVM Options.inc -gen-opt-parser-defs)
add_public_tablegen_target(WasmOptionsTableGen)
+if(NOT LLD_BUILT_STANDALONE)
+ set(tablegen_deps intrinsics_gen)
+endif()
+
add_lld_library(lldWasm
Driver.cpp
+ InputChunks.cpp
InputFiles.cpp
- InputSegment.cpp
+ LTO.cpp
+ MarkLive.cpp
OutputSections.cpp
SymbolTable.cpp
Symbols.cpp
@@ -17,10 +23,16 @@ add_lld_library(lldWasm
BinaryFormat
Core
Demangle
+ LTO
+ MC
Object
Option
Support
LINK_LIBS
lldCommon
- )
+
+ DEPENDS
+ WasmOptionsTableGen
+ ${tablegen_deps}
+ ) \ No newline at end of file
diff --git a/wasm/Config.h b/wasm/Config.h
index 82f49ce175bb..76a780567072 100644
--- a/wasm/Config.h
+++ b/wasm/Config.h
@@ -13,33 +13,43 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/Wasm.h"
-
-#include "Symbols.h"
-
-using llvm::wasm::WasmGlobal;
+#include "llvm/Support/CachePruning.h"
namespace lld {
namespace wasm {
struct Configuration {
bool AllowUndefined;
- bool CheckSignatures;
+ bool CompressRelocTargets;
bool Demangle;
- bool EmitRelocs;
+ bool DisableVerify;
+ bool ExportAll;
+ bool ExportTable;
+ bool GcSections;
bool ImportMemory;
+ bool ImportTable;
+ bool MergeDataSegments;
+ bool PrintGcSections;
bool Relocatable;
+ bool SaveTemps;
bool StripAll;
bool StripDebug;
+ bool StackFirst;
uint32_t GlobalBase;
uint32_t InitialMemory;
uint32_t MaxMemory;
uint32_t ZStackSize;
+ unsigned LTOPartitions;
+ unsigned LTOO;
+ unsigned Optimize;
+ unsigned ThinLTOJobs;
llvm::StringRef Entry;
llvm::StringRef OutputFile;
+ llvm::StringRef ThinLTOCacheDir;
llvm::StringSet<> AllowUndefinedSymbols;
std::vector<llvm::StringRef> SearchPaths;
- Symbol *StackPointerSymbol = nullptr;
+ llvm::CachePruningPolicy ThinLTOCachePolicy;
};
// The only instance of Configuration struct.
diff --git a/wasm/Driver.cpp b/wasm/Driver.cpp
index 97ec262be308..329b5ae80a9c 100644
--- a/wasm/Driver.cpp
+++ b/wasm/Driver.cpp
@@ -9,11 +9,15 @@
#include "lld/Common/Driver.h"
#include "Config.h"
+#include "InputChunks.h"
+#include "InputGlobal.h"
+#include "MarkLive.h"
#include "SymbolTable.h"
#include "Writer.h"
#include "lld/Common/Args.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
#include "lld/Common/Threads.h"
#include "lld/Common/Version.h"
#include "llvm/ADT/Twine.h"
@@ -22,6 +26,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/TargetSelect.h"
+
+#define DEBUG_TYPE "lld"
using namespace llvm;
using namespace llvm::sys;
@@ -30,14 +37,9 @@ using namespace llvm::wasm;
using namespace lld;
using namespace lld::wasm;
-namespace {
+Configuration *lld::wasm::Config;
-// Parses command line options.
-class WasmOptTable : public llvm::opt::OptTable {
-public:
- WasmOptTable();
- llvm::opt::InputArgList parse(ArrayRef<const char *> Argv);
-};
+namespace {
// Create enum with OPT_xxx values for each option in Options.td
enum {
@@ -47,24 +49,36 @@ enum {
#undef OPTION
};
+// This function is called on startup. We need this for LTO since
+// LTO calls LLVM functions to compile bitcode files to native code.
+// Technically this can be delayed until we read bitcode files, but
+// we don't bother to do lazily because the initialization is fast.
+static void initLLVM() {
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+}
+
class LinkerDriver {
public:
void link(ArrayRef<const char *> ArgsArr);
private:
- void createFiles(llvm::opt::InputArgList &Args);
+ void createFiles(opt::InputArgList &Args);
void addFile(StringRef Path);
void addLibrary(StringRef Name);
+
+ // True if we are in --whole-archive and --no-whole-archive.
+ bool InWholeArchive = false;
+
std::vector<InputFile *> Files;
};
-
} // anonymous namespace
-Configuration *lld::wasm::Config;
-
bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly,
raw_ostream &Error) {
- errorHandler().LogName = Args[0];
+ errorHandler().LogName = sys::path::filename(Args[0]);
errorHandler().ErrorOS = &Error;
errorHandler().ColorDiagnostics = Error.has_colors();
errorHandler().ErrorLimitExceededMsg =
@@ -74,6 +88,7 @@ bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly,
Config = make<Configuration>();
Symtab = make<SymbolTable>();
+ initLLVM();
LinkerDriver().link(Args);
// Exit immediately if we don't need to return to the caller.
@@ -86,8 +101,6 @@ bool lld::wasm::link(ArrayRef<const char *> Args, bool CanExitEarly,
return !errorCount();
}
-// Create OptTable
-
// Create prefix string literals used in Options.td
#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
#include "Options.inc"
@@ -102,6 +115,14 @@ static const opt::OptTable::Info OptInfo[] = {
#undef OPTION
};
+namespace {
+class WasmOptTable : public llvm::opt::OptTable {
+public:
+ WasmOptTable() : OptTable(OptInfo) {}
+ opt::InputArgList parse(ArrayRef<const char *> Argv);
+};
+} // namespace
+
// Set color diagnostics according to -color-diagnostics={auto,always,never}
// or -no-color-diagnostics flags.
static void handleColorDiagnostics(opt::InputArgList &Args) {
@@ -109,19 +130,18 @@ static void handleColorDiagnostics(opt::InputArgList &Args) {
OPT_no_color_diagnostics);
if (!Arg)
return;
-
- if (Arg->getOption().getID() == OPT_color_diagnostics)
+ if (Arg->getOption().getID() == OPT_color_diagnostics) {
errorHandler().ColorDiagnostics = true;
- else if (Arg->getOption().getID() == OPT_no_color_diagnostics)
+ } else if (Arg->getOption().getID() == OPT_no_color_diagnostics) {
errorHandler().ColorDiagnostics = false;
- else {
+ } else {
StringRef S = Arg->getValue();
if (S == "always")
errorHandler().ColorDiagnostics = true;
- if (S == "never")
+ else if (S == "never")
errorHandler().ColorDiagnostics = false;
- if (S != "auto")
- error("unknown option: -color-diagnostics=" + S);
+ else if (S != "auto")
+ error("unknown option: --color-diagnostics=" + S);
}
}
@@ -134,25 +154,15 @@ static Optional<std::string> findFile(StringRef Path1, const Twine &Path2) {
return None;
}
-// Inject a new undefined symbol into the link. This will cause the link to
-// fail unless this symbol can be found.
-static void addSyntheticUndefinedFunction(StringRef Name,
- const WasmSignature *Type) {
- log("injecting undefined func: " + Name);
- Symtab->addUndefinedFunction(Name, Type);
-}
-
-static void printHelp(const char *Argv0) {
- WasmOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false);
-}
-
-WasmOptTable::WasmOptTable() : OptTable(OptInfo) {}
-
opt::InputArgList WasmOptTable::parse(ArrayRef<const char *> Argv) {
SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size());
unsigned MissingIndex;
unsigned MissingCount;
+
+ // Expand response files (arguments in the form of @<filename>)
+ cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Vec);
+
opt::InputArgList Args = this->ParseArgs(Vec, MissingIndex, MissingCount);
handleColorDiagnostics(Args);
@@ -161,16 +171,80 @@ opt::InputArgList WasmOptTable::parse(ArrayRef<const char *> Argv) {
return Args;
}
+// Currently we allow a ".imports" to live alongside a library. This can
+// be used to specify a list of symbols which can be undefined at link
+// time (imported from the environment. For example libc.a include an
+// import file that lists the syscall functions it relies on at runtime.
+// In the long run this information would be better stored as a symbol
+// attribute/flag in the object file itself.
+// See: https://github.com/WebAssembly/tool-conventions/issues/35
+static void readImportFile(StringRef Filename) {
+ if (Optional<MemoryBufferRef> Buf = readFile(Filename))
+ for (StringRef Sym : args::getLines(*Buf))
+ Config->AllowUndefinedSymbols.insert(Sym);
+}
+
+// Returns slices of MB by parsing MB as an archive file.
+// Each slice consists of a member file in the archive.
+std::vector<MemoryBufferRef> static getArchiveMembers(
+ MemoryBufferRef MB) {
+ std::unique_ptr<Archive> File =
+ CHECK(Archive::create(MB),
+ MB.getBufferIdentifier() + ": failed to parse archive");
+
+ std::vector<MemoryBufferRef> V;
+ Error Err = Error::success();
+ for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
+ Archive::Child C =
+ CHECK(COrErr, MB.getBufferIdentifier() +
+ ": could not get the child of the archive");
+ MemoryBufferRef MBRef =
+ CHECK(C.getMemoryBufferRef(),
+ MB.getBufferIdentifier() +
+ ": could not get the buffer for a child of the archive");
+ V.push_back(MBRef);
+ }
+ if (Err)
+ fatal(MB.getBufferIdentifier() + ": Archive::children failed: " +
+ toString(std::move(Err)));
+
+ // Take ownership of memory buffers created for members of thin archives.
+ for (std::unique_ptr<MemoryBuffer> &MB : File->takeThinBuffers())
+ make<std::unique_ptr<MemoryBuffer>>(std::move(MB));
+
+ return V;
+}
+
void LinkerDriver::addFile(StringRef Path) {
Optional<MemoryBufferRef> Buffer = readFile(Path);
if (!Buffer.hasValue())
return;
MemoryBufferRef MBRef = *Buffer;
- if (identify_magic(MBRef.getBuffer()) == file_magic::archive)
+ switch (identify_magic(MBRef.getBuffer())) {
+ case file_magic::archive: {
+ // Handle -whole-archive.
+ if (InWholeArchive) {
+ for (MemoryBufferRef &M : getArchiveMembers(MBRef))
+ Files.push_back(createObjectFile(M));
+ return;
+ }
+
+ SmallString<128> ImportFile = Path;
+ path::replace_extension(ImportFile, ".imports");
+ if (fs::exists(ImportFile))
+ readImportFile(ImportFile.str());
+
Files.push_back(make<ArchiveFile>(MBRef));
- else
- Files.push_back(make<ObjFile>(MBRef));
+ return;
+ }
+ case file_magic::bitcode:
+ case file_magic::wasm_object:
+ Files.push_back(createObjectFile(MBRef));
+ break;
+ default:
+ error("unknown file type: " + MBRef.getBufferIdentifier());
+ }
}
// Add a given library by searching it from input search paths.
@@ -194,11 +268,14 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) {
case OPT_INPUT:
addFile(Arg->getValue());
break;
+ case OPT_whole_archive:
+ InWholeArchive = true;
+ break;
+ case OPT_no_whole_archive:
+ InWholeArchive = false;
+ break;
}
}
-
- if (Files.empty())
- error("no input files");
}
static StringRef getEntry(opt::InputArgList &Args, StringRef Default) {
@@ -210,13 +287,71 @@ static StringRef getEntry(opt::InputArgList &Args, StringRef Default) {
return Arg->getValue();
}
+static const uint8_t UnreachableFn[] = {
+ 0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
+ 0x00 /* opcode unreachable */, 0x0b /* opcode end */
+};
+
+// For weak undefined functions, there may be "call" instructions that reference
+// the symbol. In this case, we need to synthesise a dummy/stub function that
+// will abort at runtime, so that relocations can still provided an operand to
+// the call instruction that passes Wasm validation.
+static void handleWeakUndefines() {
+ for (Symbol *Sym : Symtab->getSymbols()) {
+ if (!Sym->isUndefined() || !Sym->isWeak())
+ continue;
+ auto *FuncSym = dyn_cast<FunctionSymbol>(Sym);
+ if (!FuncSym)
+ continue;
+
+ // It is possible for undefined functions not to have a signature (eg. if
+ // added via "--undefined"), but weak undefined ones do have a signature.
+ assert(FuncSym->FunctionType);
+ const WasmSignature &Sig = *FuncSym->FunctionType;
+
+ // Add a synthetic dummy for weak undefined functions. These dummies will
+ // be GC'd if not used as the target of any "call" instructions.
+ Optional<std::string> SymName = demangleItanium(Sym->getName());
+ StringRef DebugName =
+ Saver.save("undefined function " +
+ (SymName ? StringRef(*SymName) : Sym->getName()));
+ SyntheticFunction *Func =
+ make<SyntheticFunction>(Sig, Sym->getName(), DebugName);
+ Func->setBody(UnreachableFn);
+ // Ensure it compares equal to the null pointer, and so that table relocs
+ // don't pull in the stub body (only call-operand relocs should do that).
+ Func->setTableIndex(0);
+ Symtab->SyntheticFunctions.emplace_back(Func);
+ // Hide our dummy to prevent export.
+ uint32_t Flags = WASM_SYMBOL_VISIBILITY_HIDDEN;
+ replaceSymbol<DefinedFunction>(Sym, Sym->getName(), Flags, nullptr, Func);
+ }
+}
+
+// Force Sym to be entered in the output. Used for -u or equivalent.
+static Symbol *addUndefined(StringRef Name) {
+ Symbol *S = Symtab->addUndefinedFunction(Name, 0, nullptr, nullptr);
+
+ // Since symbol S may not be used inside the program, LTO may
+ // eliminate it. Mark the symbol as "used" to prevent it.
+ S->IsUsedInRegularObj = true;
+
+ return S;
+}
+
void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
WasmOptTable Parser;
opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
// Handle --help
if (Args.hasArg(OPT_help)) {
- printHelp(ArgsArr[0]);
+ Parser.PrintHelp(outs(), ArgsArr[0], "LLVM Linker", false);
+ return;
+ }
+
+ // Handle --version
+ if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) {
+ outs() << getLLDVersion() << "\n";
return;
}
@@ -229,26 +364,40 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
errorHandler().ErrorLimit = args::getInteger(Args, OPT_error_limit, 20);
- if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) {
- outs() << getLLDVersion() << "\n";
- return;
- }
-
Config->AllowUndefined = Args.hasArg(OPT_allow_undefined);
- Config->CheckSignatures =
- Args.hasFlag(OPT_check_signatures, OPT_no_check_signatures, false);
- Config->EmitRelocs = Args.hasArg(OPT_emit_relocs);
+ Config->Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, true);
+ Config->DisableVerify = Args.hasArg(OPT_disable_verify);
Config->Entry = getEntry(Args, Args.hasArg(OPT_relocatable) ? "" : "_start");
+ Config->ExportAll = Args.hasArg(OPT_export_all);
+ Config->ExportTable = Args.hasArg(OPT_export_table);
+ errorHandler().FatalWarnings =
+ Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
Config->ImportMemory = Args.hasArg(OPT_import_memory);
+ Config->ImportTable = Args.hasArg(OPT_import_table);
+ Config->LTOO = args::getInteger(Args, OPT_lto_O, 2);
+ Config->LTOPartitions = args::getInteger(Args, OPT_lto_partitions, 1);
+ Config->Optimize = args::getInteger(Args, OPT_O, 0);
Config->OutputFile = Args.getLastArgValue(OPT_o);
Config->Relocatable = Args.hasArg(OPT_relocatable);
+ Config->GcSections =
+ Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, !Config->Relocatable);
+ Config->MergeDataSegments =
+ Args.hasFlag(OPT_merge_data_segments, OPT_no_merge_data_segments,
+ !Config->Relocatable);
+ Config->PrintGcSections =
+ Args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
+ Config->SaveTemps = Args.hasArg(OPT_save_temps);
Config->SearchPaths = args::getStrings(Args, OPT_L);
Config->StripAll = Args.hasArg(OPT_strip_all);
Config->StripDebug = Args.hasArg(OPT_strip_debug);
+ Config->StackFirst = Args.hasArg(OPT_stack_first);
+ Config->ThinLTOCacheDir = Args.getLastArgValue(OPT_thinlto_cache_dir);
+ Config->ThinLTOCachePolicy = CHECK(
+ parseCachePruningPolicy(Args.getLastArgValue(OPT_thinlto_cache_policy)),
+ "--thinlto-cache-policy: invalid cache policy");
+ Config->ThinLTOJobs = args::getInteger(Args, OPT_thinlto_jobs, -1u);
errorHandler().Verbose = Args.hasArg(OPT_verbose);
ThreadsEnabled = Args.hasFlag(OPT_threads, OPT_no_threads, true);
- if (Config->Relocatable)
- Config->EmitRelocs = true;
Config->InitialMemory = args::getInteger(Args, OPT_initial_memory, 0);
Config->GlobalBase = args::getInteger(Args, OPT_global_base, 1024);
@@ -256,33 +405,72 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
Config->ZStackSize =
args::getZOptionValue(Args, OPT_z, "stack-size", WasmPageSize);
+ Config->CompressRelocTargets = Config->Optimize > 0 && !Config->Relocatable;
+
+ if (Config->LTOO > 3)
+ error("invalid optimization level for LTO: " + Twine(Config->LTOO));
+ if (Config->LTOPartitions == 0)
+ error("--lto-partitions: number of threads must be > 0");
+ if (Config->ThinLTOJobs == 0)
+ error("--thinlto-jobs: number of threads must be > 0");
+
if (auto *Arg = Args.getLastArg(OPT_allow_undefined_file))
- if (Optional<MemoryBufferRef> Buf = readFile(Arg->getValue()))
- for (StringRef Sym : args::getLines(*Buf))
- Config->AllowUndefinedSymbols.insert(Sym);
+ readImportFile(Arg->getValue());
+
+ if (!Args.hasArg(OPT_INPUT)) {
+ error("no input files");
+ return;
+ }
if (Config->OutputFile.empty())
error("no output file specified");
- if (!Args.hasArg(OPT_INPUT))
- error("no input files");
+ if (Config->ImportTable && Config->ExportTable)
+ error("--import-table and --export-table may not be used together");
- if (Config->Relocatable && !Config->Entry.empty())
- error("entry point specified for relocatable output file");
- if (Config->Relocatable && Args.hasArg(OPT_undefined))
- error("undefined symbols specified for relocatable output file");
+ if (Config->Relocatable) {
+ if (!Config->Entry.empty())
+ error("entry point specified for relocatable output file");
+ if (Config->GcSections)
+ error("-r and --gc-sections may not be used together");
+ if (Args.hasArg(OPT_undefined))
+ error("-r -and --undefined may not be used together");
+ }
+ Symbol *EntrySym = nullptr;
if (!Config->Relocatable) {
- if (!Config->Entry.empty()) {
- static WasmSignature Signature = {{}, WASM_TYPE_NORESULT};
- addSyntheticUndefinedFunction(Config->Entry, &Signature);
- }
+ llvm::wasm::WasmGlobal Global;
+ Global.Type = {WASM_TYPE_I32, true};
+ Global.InitExpr.Value.Int32 = 0;
+ Global.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
+ Global.SymbolName = "__stack_pointer";
+ InputGlobal *StackPointer = make<InputGlobal>(Global, nullptr);
+ StackPointer->Live = true;
+
+ static WasmSignature NullSignature = {{}, WASM_TYPE_NORESULT};
+
+ // Add synthetic symbols before any others
+ WasmSym::CallCtors = Symtab->addSyntheticFunction(
+ "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN,
+ make<SyntheticFunction>(NullSignature, "__wasm_call_ctors"));
+ // TODO(sbc): Remove WASM_SYMBOL_VISIBILITY_HIDDEN when the mutable global
+ // spec proposal is implemented in all major browsers.
+ // See: https://github.com/WebAssembly/mutable-global
+ WasmSym::StackPointer = Symtab->addSyntheticGlobal(
+ "__stack_pointer", WASM_SYMBOL_VISIBILITY_HIDDEN, StackPointer);
+ WasmSym::HeapBase = Symtab->addSyntheticDataSymbol("__heap_base", 0);
+ WasmSym::DsoHandle = Symtab->addSyntheticDataSymbol(
+ "__dso_handle", WASM_SYMBOL_VISIBILITY_HIDDEN);
+ WasmSym::DataEnd = Symtab->addSyntheticDataSymbol("__data_end", 0);
+
+ // For now, since we don't actually use the start function as the
+ // wasm start symbol, we don't need to care about it signature.
+ if (!Config->Entry.empty())
+ EntrySym = addUndefined(Config->Entry);
// Handle the `--undefined <sym>` options.
- for (StringRef S : args::getStrings(Args, OPT_undefined))
- addSyntheticUndefinedFunction(S, nullptr);
-
- Config->StackPointerSymbol = Symtab->addDefinedGlobal("__stack_pointer");
+ for (auto *Arg : Args.filtered(OPT_undefined))
+ addUndefined(Arg->getValue());
}
createFiles(Args);
@@ -293,29 +481,59 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// symbols that we need to the symbol table.
for (InputFile *F : Files)
Symtab->addFile(F);
+ if (errorCount())
+ return;
+
+ // Add synthetic dummies for weak undefined functions.
+ if (!Config->Relocatable)
+ handleWeakUndefines();
+
+ // Handle --export.
+ for (auto *Arg : Args.filtered(OPT_export)) {
+ StringRef Name = Arg->getValue();
+ Symbol *Sym = Symtab->find(Name);
+ if (Sym && Sym->isDefined())
+ Sym->ForceExport = true;
+ else if (!Config->AllowUndefined)
+ error("symbol exported via --export not found: " + Name);
+ }
+
+ // Do link-time optimization if given files are LLVM bitcode files.
+ // This compiles bitcode files into real object files.
+ Symtab->addCombinedLTOObject();
+ if (errorCount())
+ return;
// Make sure we have resolved all symbols.
if (!Config->Relocatable && !Config->AllowUndefined) {
Symtab->reportRemainingUndefines();
} else {
- // When we allow undefined symbols we cannot include those defined in
- // -u/--undefined since these undefined symbols have only names and no
- // function signature, which means they cannot be written to the final
- // output.
- for (StringRef S : args::getStrings(Args, OPT_undefined)) {
- Symbol *Sym = Symtab->find(S);
+ // Even when using --allow-undefined we still want to report the absence of
+ // our initial set of undefined symbols (i.e. the entry point and symbols
+ // specified via --undefined).
+ // Part of the reason for this is that these function don't have signatures
+ // so which means they cannot be written as wasm function imports.
+ for (auto *Arg : Args.filtered(OPT_undefined)) {
+ Symbol *Sym = Symtab->find(Arg->getValue());
if (!Sym->isDefined())
- error("function forced with --undefined not found: " + Sym->getName());
+ error("symbol forced with --undefined not found: " + Sym->getName());
}
+ if (EntrySym && !EntrySym->isDefined())
+ error("entry symbol not defined (pass --no-entry to supress): " +
+ EntrySym->getName());
}
if (errorCount())
return;
- if (!Config->Entry.empty() && !Symtab->find(Config->Entry)->isDefined())
- error("entry point not found: " + Config->Entry);
+ if (EntrySym)
+ EntrySym->setHidden(false);
+
if (errorCount())
return;
+ // Do size optimizations: garbage collection
+ markLive();
+
// Write the result to the file.
writeResult();
}
diff --git a/wasm/InputChunks.cpp b/wasm/InputChunks.cpp
new file mode 100644
index 000000000000..fcefac7d99b8
--- /dev/null
+++ b/wasm/InputChunks.cpp
@@ -0,0 +1,295 @@
+//===- InputChunks.cpp ----------------------------------------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InputChunks.h"
+#include "Config.h"
+#include "OutputSegment.h"
+#include "WriterUtils.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/LLVM.h"
+#include "llvm/Support/LEB128.h"
+
+#define DEBUG_TYPE "lld"
+
+using namespace llvm;
+using namespace llvm::wasm;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::wasm;
+
+static StringRef ReloctTypeToString(uint8_t RelocType) {
+ switch (RelocType) {
+#define WASM_RELOC(NAME, REL) case REL: return #NAME;
+#include "llvm/BinaryFormat/WasmRelocs.def"
+#undef WASM_RELOC
+ }
+ llvm_unreachable("unknown reloc type");
+}
+
+std::string lld::toString(const InputChunk *C) {
+ return (toString(C->File) + ":(" + C->getName() + ")").str();
+}
+
+StringRef InputChunk::getComdatName() const {
+ uint32_t Index = getComdat();
+ if (Index == UINT32_MAX)
+ return StringRef();
+ return File->getWasmObj()->linkingData().Comdats[Index];
+}
+
+void InputChunk::copyRelocations(const WasmSection &Section) {
+ if (Section.Relocations.empty())
+ return;
+ size_t Start = getInputSectionOffset();
+ size_t Size = getInputSize();
+ for (const WasmRelocation &R : Section.Relocations)
+ if (R.Offset >= Start && R.Offset < Start + Size)
+ Relocations.push_back(R);
+}
+
+void InputChunk::verifyRelocTargets() const {
+ for (const WasmRelocation &Rel : Relocations) {
+ uint32_t ExistingValue;
+ unsigned BytesRead = 0;
+ uint32_t Offset = Rel.Offset - getInputSectionOffset();
+ const uint8_t *Loc = data().data() + Offset;
+ switch (Rel.Type) {
+ case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ ExistingValue = decodeULEB128(Loc, &BytesRead);
+ break;
+ case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ ExistingValue = static_cast<uint32_t>(decodeSLEB128(Loc, &BytesRead));
+ break;
+ case R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ case R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ ExistingValue = static_cast<uint32_t>(read32le(Loc));
+ break;
+ default:
+ llvm_unreachable("unknown relocation type");
+ }
+
+ if (BytesRead && BytesRead != 5)
+ warn("expected LEB at relocation site be 5-byte padded");
+ uint32_t ExpectedValue = File->calcExpectedValue(Rel);
+ if (ExpectedValue != ExistingValue)
+ warn("unexpected existing value for " + ReloctTypeToString(Rel.Type) +
+ ": existing=" + Twine(ExistingValue) +
+ " expected=" + Twine(ExpectedValue));
+ }
+}
+
+// Copy this input chunk to an mmap'ed output file and apply relocations.
+void InputChunk::writeTo(uint8_t *Buf) const {
+ // Copy contents
+ memcpy(Buf + OutputOffset, data().data(), data().size());
+
+ // Apply relocations
+ if (Relocations.empty())
+ return;
+
+#ifndef NDEBUG
+ verifyRelocTargets();
+#endif
+
+ LLVM_DEBUG(dbgs() << "applying relocations: " << getName()
+ << " count=" << Relocations.size() << "\n");
+ int32_t Off = OutputOffset - getInputSectionOffset();
+
+ for (const WasmRelocation &Rel : Relocations) {
+ uint8_t *Loc = Buf + Rel.Offset + Off;
+ uint32_t Value = File->calcNewValue(Rel);
+ LLVM_DEBUG(dbgs() << "apply reloc: type=" << ReloctTypeToString(Rel.Type)
+ << " addend=" << Rel.Addend << " index=" << Rel.Index
+ << " value=" << Value << " offset=" << Rel.Offset
+ << "\n");
+
+ switch (Rel.Type) {
+ case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ encodeULEB128(Value, Loc, 5);
+ break;
+ case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ encodeSLEB128(static_cast<int32_t>(Value), Loc, 5);
+ break;
+ case R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ case R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ write32le(Loc, Value);
+ break;
+ default:
+ llvm_unreachable("unknown relocation type");
+ }
+ }
+}
+
+// Copy relocation entries to a given output stream.
+// This function is used only when a user passes "-r". For a regular link,
+// we consume relocations instead of copying them to an output file.
+void InputChunk::writeRelocations(raw_ostream &OS) const {
+ if (Relocations.empty())
+ return;
+
+ int32_t Off = OutputOffset - getInputSectionOffset();
+ LLVM_DEBUG(dbgs() << "writeRelocations: " << File->getName()
+ << " offset=" << Twine(Off) << "\n");
+
+ for (const WasmRelocation &Rel : Relocations) {
+ writeUleb128(OS, Rel.Type, "reloc type");
+ writeUleb128(OS, Rel.Offset + Off, "reloc offset");
+ writeUleb128(OS, File->calcNewIndex(Rel), "reloc index");
+
+ switch (Rel.Type) {
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ case R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ writeSleb128(OS, File->calcNewAddend(Rel), "reloc addend");
+ break;
+ }
+ }
+}
+
+void InputFunction::setFunctionIndex(uint32_t Index) {
+ LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << getName()
+ << " -> " << Index << "\n");
+ assert(!hasFunctionIndex());
+ FunctionIndex = Index;
+}
+
+void InputFunction::setTableIndex(uint32_t Index) {
+ LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << getName() << " -> "
+ << Index << "\n");
+ assert(!hasTableIndex());
+ TableIndex = Index;
+}
+
+// Write a relocation value without padding and return the number of bytes
+// witten.
+static unsigned writeCompressedReloc(uint8_t *Buf, const WasmRelocation &Rel,
+ uint32_t Value) {
+ switch (Rel.Type) {
+ case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ return encodeULEB128(Value, Buf);
+ case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ return encodeSLEB128(static_cast<int32_t>(Value), Buf);
+ default:
+ llvm_unreachable("unexpected relocation type");
+ }
+}
+
+static unsigned getRelocWidthPadded(const WasmRelocation &Rel) {
+ switch (Rel.Type) {
+ case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ return 5;
+ default:
+ llvm_unreachable("unexpected relocation type");
+ }
+}
+
+static unsigned getRelocWidth(const WasmRelocation &Rel, uint32_t Value) {
+ uint8_t Buf[5];
+ return writeCompressedReloc(Buf, Rel, Value);
+}
+
+// Relocations of type LEB and SLEB in the code section are padded to 5 bytes
+// so that a fast linker can blindly overwrite them without needing to worry
+// about the number of bytes needed to encode the values.
+// However, for optimal output the code section can be compressed to remove
+// the padding then outputting non-relocatable files.
+// In this case we need to perform a size calculation based on the value at each
+// relocation. At best we end up saving 4 bytes for each relocation entry.
+//
+// This function only computes the final output size. It must be called
+// before getSize() is used to calculate of layout of the code section.
+void InputFunction::calculateSize() {
+ if (!File || !Config->CompressRelocTargets)
+ return;
+
+ LLVM_DEBUG(dbgs() << "calculateSize: " << getName() << "\n");
+
+ const uint8_t *SecStart = File->CodeSection->Content.data();
+ const uint8_t *FuncStart = SecStart + getInputSectionOffset();
+ uint32_t FunctionSizeLength;
+ decodeULEB128(FuncStart, &FunctionSizeLength);
+
+ uint32_t Start = getInputSectionOffset();
+ uint32_t End = Start + Function->Size;
+
+ uint32_t LastRelocEnd = Start + FunctionSizeLength;
+ for (WasmRelocation &Rel : Relocations) {
+ LLVM_DEBUG(dbgs() << " region: " << (Rel.Offset - LastRelocEnd) << "\n");
+ CompressedFuncSize += Rel.Offset - LastRelocEnd;
+ CompressedFuncSize += getRelocWidth(Rel, File->calcNewValue(Rel));
+ LastRelocEnd = Rel.Offset + getRelocWidthPadded(Rel);
+ }
+ LLVM_DEBUG(dbgs() << " final region: " << (End - LastRelocEnd) << "\n");
+ CompressedFuncSize += End - LastRelocEnd;
+
+ // Now we know how long the resulting function is we can add the encoding
+ // of its length
+ uint8_t Buf[5];
+ CompressedSize = CompressedFuncSize + encodeULEB128(CompressedFuncSize, Buf);
+
+ LLVM_DEBUG(dbgs() << " calculateSize orig: " << Function->Size << "\n");
+ LLVM_DEBUG(dbgs() << " calculateSize new: " << CompressedSize << "\n");
+}
+
+// Override the default writeTo method so that we can (optionally) write the
+// compressed version of the function.
+void InputFunction::writeTo(uint8_t *Buf) const {
+ if (!File || !Config->CompressRelocTargets)
+ return InputChunk::writeTo(Buf);
+
+ Buf += OutputOffset;
+ uint8_t *Orig = Buf; (void)Orig;
+
+ const uint8_t *SecStart = File->CodeSection->Content.data();
+ const uint8_t *FuncStart = SecStart + getInputSectionOffset();
+ const uint8_t *End = FuncStart + Function->Size;
+ uint32_t Count;
+ decodeULEB128(FuncStart, &Count);
+ FuncStart += Count;
+
+ LLVM_DEBUG(dbgs() << "write func: " << getName() << "\n");
+ Buf += encodeULEB128(CompressedFuncSize, Buf);
+ const uint8_t *LastRelocEnd = FuncStart;
+ for (const WasmRelocation &Rel : Relocations) {
+ unsigned ChunkSize = (SecStart + Rel.Offset) - LastRelocEnd;
+ LLVM_DEBUG(dbgs() << " write chunk: " << ChunkSize << "\n");
+ memcpy(Buf, LastRelocEnd, ChunkSize);
+ Buf += ChunkSize;
+ Buf += writeCompressedReloc(Buf, Rel, File->calcNewValue(Rel));
+ LastRelocEnd = SecStart + Rel.Offset + getRelocWidthPadded(Rel);
+ }
+
+ unsigned ChunkSize = End - LastRelocEnd;
+ LLVM_DEBUG(dbgs() << " write final chunk: " << ChunkSize << "\n");
+ memcpy(Buf, LastRelocEnd, ChunkSize);
+ LLVM_DEBUG(dbgs() << " total: " << (Buf + ChunkSize - Orig) << "\n");
+}
diff --git a/wasm/InputChunks.h b/wasm/InputChunks.h
new file mode 100644
index 000000000000..526e29870b21
--- /dev/null
+++ b/wasm/InputChunks.h
@@ -0,0 +1,236 @@
+//===- InputChunks.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An InputChunks represents an indivisible opaque region of a input wasm file.
+// i.e. a single wasm data segment or a single wasm function.
+//
+// They are written directly to the mmap'd output file after which relocations
+// are applied. Because each Chunk is independent they can be written in
+// parallel.
+//
+// Chunks are also unit on which garbage collection (--gc-sections) operates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_WASM_INPUT_CHUNKS_H
+#define LLD_WASM_INPUT_CHUNKS_H
+
+#include "Config.h"
+#include "InputFiles.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/Wasm.h"
+
+using llvm::object::WasmSection;
+using llvm::object::WasmSegment;
+using llvm::wasm::WasmFunction;
+using llvm::wasm::WasmRelocation;
+using llvm::wasm::WasmSignature;
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace lld {
+namespace wasm {
+
+class ObjFile;
+class OutputSegment;
+
+class InputChunk {
+public:
+ enum Kind { DataSegment, Function, SyntheticFunction, Section };
+
+ Kind kind() const { return SectionKind; }
+
+ virtual uint32_t getSize() const { return data().size(); }
+
+ void copyRelocations(const WasmSection &Section);
+
+ virtual void writeTo(uint8_t *SectionStart) const;
+
+ ArrayRef<WasmRelocation> getRelocations() const { return Relocations; }
+
+ virtual StringRef getName() const = 0;
+ virtual StringRef getDebugName() const = 0;
+ virtual uint32_t getComdat() const = 0;
+ StringRef getComdatName() const;
+
+ size_t NumRelocations() const { return Relocations.size(); }
+ void writeRelocations(llvm::raw_ostream &OS) const;
+
+ ObjFile *File;
+ int32_t OutputOffset = 0;
+
+ // Signals that the section is part of the output. The garbage collector,
+ // and COMDAT handling can set a sections' Live bit.
+ // If GC is disabled, all sections start out as live by default.
+ unsigned Live : 1;
+
+protected:
+ InputChunk(ObjFile *F, Kind K)
+ : File(F), Live(!Config->GcSections), SectionKind(K) {}
+ virtual ~InputChunk() = default;
+ virtual ArrayRef<uint8_t> data() const = 0;
+ virtual uint32_t getInputSectionOffset() const = 0;
+ virtual uint32_t getInputSize() const { return getSize(); };
+
+ // Verifies the existing data at relocation targets matches our expectations.
+ // This is performed only debug builds as an extra sanity check.
+ void verifyRelocTargets() const;
+
+ std::vector<WasmRelocation> Relocations;
+ Kind SectionKind;
+};
+
+// Represents a WebAssembly data segment which can be included as part of
+// an output data segments. Note that in WebAssembly, unlike ELF and other
+// formats, used the term "data segment" to refer to the continous regions of
+// memory that make on the data section. See:
+// https://webassembly.github.io/spec/syntax/modules.html#syntax-data
+//
+// For example, by default, clang will produce a separate data section for
+// each global variable.
+class InputSegment : public InputChunk {
+public:
+ InputSegment(const WasmSegment &Seg, ObjFile *F)
+ : InputChunk(F, InputChunk::DataSegment), Segment(Seg) {}
+
+ static bool classof(const InputChunk *C) { return C->kind() == DataSegment; }
+
+ uint32_t getAlignment() const { return Segment.Data.Alignment; }
+ StringRef getName() const override { return Segment.Data.Name; }
+ StringRef getDebugName() const override { return StringRef(); }
+ uint32_t getComdat() const override { return Segment.Data.Comdat; }
+
+ const OutputSegment *OutputSeg = nullptr;
+ int32_t OutputSegmentOffset = 0;
+
+protected:
+ ArrayRef<uint8_t> data() const override { return Segment.Data.Content; }
+ uint32_t getInputSectionOffset() const override {
+ return Segment.SectionOffset;
+ }
+
+ const WasmSegment &Segment;
+};
+
+// Represents a single wasm function within and input file. These are
+// combined to create the final output CODE section.
+class InputFunction : public InputChunk {
+public:
+ InputFunction(const WasmSignature &S, const WasmFunction *Func, ObjFile *F)
+ : InputChunk(F, InputChunk::Function), Signature(S), Function(Func) {}
+
+ static bool classof(const InputChunk *C) {
+ return C->kind() == InputChunk::Function ||
+ C->kind() == InputChunk::SyntheticFunction;
+ }
+
+ void writeTo(uint8_t *SectionStart) const override;
+ StringRef getName() const override { return Function->SymbolName; }
+ StringRef getDebugName() const override { return Function->DebugName; }
+ uint32_t getComdat() const override { return Function->Comdat; }
+ uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); }
+ uint32_t getFunctionCodeOffset() const { return Function->CodeOffset; }
+ uint32_t getSize() const override {
+ if (Config->CompressRelocTargets && File) {
+ assert(CompressedSize);
+ return CompressedSize;
+ }
+ return data().size();
+ }
+ uint32_t getFunctionIndex() const { return FunctionIndex.getValue(); }
+ bool hasFunctionIndex() const { return FunctionIndex.hasValue(); }
+ void setFunctionIndex(uint32_t Index);
+ uint32_t getTableIndex() const { return TableIndex.getValue(); }
+ bool hasTableIndex() const { return TableIndex.hasValue(); }
+ void setTableIndex(uint32_t Index);
+
+ // The size of a given input function can depend on the values of the
+ // LEB relocations within it. This finalizeContents method is called after
+ // all the symbol values have be calcualted but before getSize() is ever
+ // called.
+ void calculateSize();
+
+ const WasmSignature &Signature;
+
+protected:
+ ArrayRef<uint8_t> data() const override {
+ assert(!Config->CompressRelocTargets);
+ return File->CodeSection->Content.slice(getInputSectionOffset(),
+ Function->Size);
+ }
+
+ uint32_t getInputSize() const override { return Function->Size; }
+
+ uint32_t getInputSectionOffset() const override {
+ return Function->CodeSectionOffset;
+ }
+
+ const WasmFunction *Function;
+ llvm::Optional<uint32_t> FunctionIndex;
+ llvm::Optional<uint32_t> TableIndex;
+ uint32_t CompressedFuncSize = 0;
+ uint32_t CompressedSize = 0;
+};
+
+class SyntheticFunction : public InputFunction {
+public:
+ SyntheticFunction(const WasmSignature &S, StringRef Name,
+ StringRef DebugName = {})
+ : InputFunction(S, nullptr, nullptr), Name(Name), DebugName(DebugName) {
+ SectionKind = InputChunk::SyntheticFunction;
+ }
+
+ static bool classof(const InputChunk *C) {
+ return C->kind() == InputChunk::SyntheticFunction;
+ }
+
+ StringRef getName() const override { return Name; }
+ StringRef getDebugName() const override { return DebugName; }
+ uint32_t getComdat() const override { return UINT32_MAX; }
+
+ void setBody(ArrayRef<uint8_t> Body_) { Body = Body_; }
+
+protected:
+ ArrayRef<uint8_t> data() const override { return Body; }
+
+ StringRef Name;
+ StringRef DebugName;
+ ArrayRef<uint8_t> Body;
+};
+
+// Represents a single Wasm Section within an input file.
+class InputSection : public InputChunk {
+public:
+ InputSection(const WasmSection &S, ObjFile *F)
+ : InputChunk(F, InputChunk::Section), Section(S) {
+ assert(Section.Type == llvm::wasm::WASM_SEC_CUSTOM);
+ }
+
+ StringRef getName() const override { return Section.Name; }
+ StringRef getDebugName() const override { return StringRef(); }
+ uint32_t getComdat() const override { return UINT32_MAX; }
+
+protected:
+ ArrayRef<uint8_t> data() const override { return Section.Content; }
+
+ // Offset within the input section. This is only zero since this chunk
+ // type represents an entire input section, not part of one.
+ uint32_t getInputSectionOffset() const override { return 0; }
+
+ const WasmSection &Section;
+};
+
+} // namespace wasm
+
+std::string toString(const wasm::InputChunk *);
+} // namespace lld
+
+#endif // LLD_WASM_INPUT_CHUNKS_H
diff --git a/wasm/InputFiles.cpp b/wasm/InputFiles.cpp
index 1a1a6812c48e..53a24c3cffd4 100644
--- a/wasm/InputFiles.cpp
+++ b/wasm/InputFiles.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
-
#include "Config.h"
-#include "InputSegment.h"
+#include "InputChunks.h"
+#include "InputGlobal.h"
#include "SymbolTable.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
@@ -42,64 +42,126 @@ Optional<MemoryBufferRef> lld::wasm::readFile(StringRef Path) {
return MBRef;
}
-void ObjFile::dumpInfo() const {
- log("reloc info for: " + getName() + "\n" +
- " FunctionIndexOffset : " + Twine(FunctionIndexOffset) + "\n" +
- " NumFunctionImports : " + Twine(NumFunctionImports()) + "\n" +
- " NumGlobalImports : " + Twine(NumGlobalImports()) + "\n");
-}
+InputFile *lld::wasm::createObjectFile(MemoryBufferRef MB) {
+ file_magic Magic = identify_magic(MB.getBuffer());
+ if (Magic == file_magic::wasm_object)
+ return make<ObjFile>(MB);
-bool ObjFile::isImportedFunction(uint32_t Index) const {
- return Index < NumFunctionImports();
-}
+ if (Magic == file_magic::bitcode)
+ return make<BitcodeFile>(MB);
-Symbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
- return FunctionSymbols[Index];
+ fatal("unknown file type: " + MB.getBufferIdentifier());
}
-Symbol *ObjFile::getTableSymbol(uint32_t Index) const {
- return TableSymbols[Index];
-}
-
-Symbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
- return GlobalSymbols[Index];
-}
-
-uint32_t ObjFile::getRelocatedAddress(uint32_t Index) const {
- return getGlobalSymbol(Index)->getVirtualAddress();
+void ObjFile::dumpInfo() const {
+ log("info for: " + getName() +
+ "\n Symbols : " + Twine(Symbols.size()) +
+ "\n Function Imports : " + Twine(WasmObj->getNumImportedFunctions()) +
+ "\n Global Imports : " + Twine(WasmObj->getNumImportedGlobals()));
}
-uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const {
- Symbol *Sym = getFunctionSymbol(Original);
- uint32_t Index = Sym->getOutputIndex();
- DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": "
- << Original << " -> " << Index << "\n");
- return Index;
+// Relocations contain either symbol or type indices. This function takes a
+// relocation and returns relocated index (i.e. translates from the input
+// sybmol/type space to the output symbol/type space).
+uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
+ if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
+ assert(TypeIsUsed[Reloc.Index]);
+ return TypeMap[Reloc.Index];
+ }
+ return Symbols[Reloc.Index]->getOutputSymbolIndex();
}
-uint32_t ObjFile::relocateTypeIndex(uint32_t Original) const {
- return TypeMap[Original];
+// Relocations can contain addend for combined sections. This function takes a
+// relocation and returns updated addend by offset in the output section.
+uint32_t ObjFile::calcNewAddend(const WasmRelocation &Reloc) const {
+ switch (Reloc.Type) {
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ return Reloc.Addend;
+ case R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
+ default:
+ llvm_unreachable("unexpected relocation type");
+ }
}
-uint32_t ObjFile::relocateTableIndex(uint32_t Original) const {
- Symbol *Sym = getTableSymbol(Original);
- uint32_t Index = Sym->getTableIndex();
- DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original
- << " -> " << Index << "\n");
- return Index;
+// Calculate the value we expect to find at the relocation location.
+// This is used as a sanity check before applying a relocation to a given
+// location. It is useful for catching bugs in the compiler and linker.
+uint32_t ObjFile::calcExpectedValue(const WasmRelocation &Reloc) const {
+ switch (Reloc.Type) {
+ case R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
+ const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
+ return TableEntries[Sym.Info.ElementIndex];
+ }
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB: {
+ const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
+ if (Sym.isUndefined())
+ return 0;
+ const WasmSegment& Segment = WasmObj->dataSegments()[Sym.Info.DataRef.Segment];
+ return Segment.Data.Offset.Value.Int32 + Sym.Info.DataRef.Offset +
+ Reloc.Addend;
+ }
+ case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
+ return Sym->Function->getFunctionInputOffset() +
+ Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
+ }
+ return 0;
+ case R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ return Reloc.Addend;
+ case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ return Reloc.Index;
+ case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
+ const WasmSymbol& Sym = WasmObj->syms()[Reloc.Index];
+ return Sym.Info.ElementIndex;
+ }
+ default:
+ llvm_unreachable("unknown relocation type");
+ }
}
-uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const {
- Symbol *Sym = getGlobalSymbol(Original);
- uint32_t Index = Sym->getOutputIndex();
- DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original
- << " -> " << Index << "\n");
- return Index;
+// Translate from the relocation's index into the final linked output value.
+uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
+ switch (Reloc.Type) {
+ case R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ return getFunctionSymbol(Reloc.Index)->getTableIndex();
+ case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
+ case R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+ if (auto *Sym = dyn_cast<DefinedData>(getDataSymbol(Reloc.Index)))
+ if (Sym->isLive())
+ return Sym->getVirtualAddress() + Reloc.Addend;
+ return 0;
+ case R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ return TypeMap[Reloc.Index];
+ case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ return getFunctionSymbol(Reloc.Index)->getFunctionIndex();
+ case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
+ return getGlobalSymbol(Reloc.Index)->getGlobalIndex();
+ case R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ if (auto *Sym = dyn_cast<DefinedFunction>(getFunctionSymbol(Reloc.Index))) {
+ return Sym->Function->OutputOffset +
+ Sym->Function->getFunctionCodeOffset() + Reloc.Addend;
+ }
+ return 0;
+ case R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ return getSectionSymbol(Reloc.Index)->Section->OutputOffset + Reloc.Addend;
+ default:
+ llvm_unreachable("unknown relocation type");
+ }
}
void ObjFile::parse() {
// Parse a memory buffer as a wasm file.
- DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
+ LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), toString(this));
auto *Obj = dyn_cast<WasmObjectFile>(Bin.get());
@@ -111,156 +173,175 @@ void ObjFile::parse() {
Bin.release();
WasmObj.reset(Obj);
+ // Build up a map of function indices to table indices for use when
+ // verifying the existing table index relocations
+ uint32_t TotalFunctions =
+ WasmObj->getNumImportedFunctions() + WasmObj->functions().size();
+ TableEntries.resize(TotalFunctions);
+ for (const WasmElemSegment &Seg : WasmObj->elements()) {
+ if (Seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
+ fatal(toString(this) + ": invalid table elements");
+ uint32_t Offset = Seg.Offset.Value.Int32;
+ for (uint32_t Index = 0; Index < Seg.Functions.size(); Index++) {
+
+ uint32_t FunctionIndex = Seg.Functions[Index];
+ TableEntries[FunctionIndex] = Offset + Index;
+ }
+ }
+
// Find the code and data sections. Wasm objects can have at most one code
// and one data section.
+ uint32_t SectionIndex = 0;
for (const SectionRef &Sec : WasmObj->sections()) {
const WasmSection &Section = WasmObj->getWasmSection(Sec);
- if (Section.Type == WASM_SEC_CODE)
+ if (Section.Type == WASM_SEC_CODE) {
CodeSection = &Section;
- else if (Section.Type == WASM_SEC_DATA)
+ } else if (Section.Type == WASM_SEC_DATA) {
DataSection = &Section;
+ } else if (Section.Type == WASM_SEC_CUSTOM) {
+ CustomSections.emplace_back(make<InputSection>(Section, this));
+ CustomSections.back()->copyRelocations(Section);
+ CustomSectionsByIndex[SectionIndex] = CustomSections.back();
+ }
+ SectionIndex++;
}
- initializeSymbols();
-}
+ TypeMap.resize(getWasmObj()->types().size());
+ TypeIsUsed.resize(getWasmObj()->types().size(), false);
-// Return the InputSegment in which a given symbol is defined.
-InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) {
- uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym);
- for (InputSegment *Segment : Segments) {
- if (Address >= Segment->startVA() && Address < Segment->endVA()) {
- DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> "
- << Segment->getName() << "\n");
+ ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
+ UsedComdats.resize(Comdats.size());
+ for (unsigned I = 0; I < Comdats.size(); ++I)
+ UsedComdats[I] = Symtab->addComdat(Comdats[I]);
- return Segment;
- }
+ // Populate `Segments`.
+ for (const WasmSegment &S : WasmObj->dataSegments()) {
+ InputSegment *Seg = make<InputSegment>(S, this);
+ Seg->copyRelocations(*DataSection);
+ Segments.emplace_back(Seg);
+ }
+
+ // Populate `Functions`.
+ ArrayRef<WasmFunction> Funcs = WasmObj->functions();
+ ArrayRef<uint32_t> FuncTypes = WasmObj->functionTypes();
+ ArrayRef<WasmSignature> Types = WasmObj->types();
+ Functions.reserve(Funcs.size());
+
+ for (size_t I = 0, E = Funcs.size(); I != E; ++I) {
+ InputFunction *F =
+ make<InputFunction>(Types[FuncTypes[I]], &Funcs[I], this);
+ F->copyRelocations(*CodeSection);
+ Functions.emplace_back(F);
+ }
+
+ // Populate `Globals`.
+ for (const WasmGlobal &G : WasmObj->globals())
+ Globals.emplace_back(make<InputGlobal>(G, this));
+
+ // Populate `Symbols` based on the WasmSymbols in the object.
+ Symbols.reserve(WasmObj->getNumberOfSymbols());
+ for (const SymbolRef &Sym : WasmObj->symbols()) {
+ const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
+ if (Symbol *Sym = createDefined(WasmSym))
+ Symbols.push_back(Sym);
+ else
+ Symbols.push_back(createUndefined(WasmSym));
}
- error("symbol not found in any segment: " + WasmSym.Name);
- return nullptr;
}
-static void copyRelocationsRange(std::vector<WasmRelocation> &To,
- ArrayRef<WasmRelocation> From, size_t Start,
- size_t End) {
- for (const WasmRelocation &R : From)
- if (R.Offset >= Start && R.Offset < End)
- To.push_back(R);
+bool ObjFile::isExcludedByComdat(InputChunk *Chunk) const {
+ uint32_t C = Chunk->getComdat();
+ if (C == UINT32_MAX)
+ return false;
+ return !UsedComdats[C];
}
-void ObjFile::initializeSymbols() {
- Symbols.reserve(WasmObj->getNumberOfSymbols());
+FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t Index) const {
+ return cast<FunctionSymbol>(Symbols[Index]);
+}
- for (const WasmImport &Import : WasmObj->imports()) {
- switch (Import.Kind) {
- case WASM_EXTERNAL_FUNCTION:
- ++FunctionImports;
- break;
- case WASM_EXTERNAL_GLOBAL:
- ++GlobalImports;
- break;
- }
- }
+GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t Index) const {
+ return cast<GlobalSymbol>(Symbols[Index]);
+}
- FunctionSymbols.resize(FunctionImports + WasmObj->functions().size());
- GlobalSymbols.resize(GlobalImports + WasmObj->globals().size());
+SectionSymbol *ObjFile::getSectionSymbol(uint32_t Index) const {
+ return cast<SectionSymbol>(Symbols[Index]);
+}
- for (const WasmSegment &S : WasmObj->dataSegments()) {
- InputSegment *Seg = make<InputSegment>(&S, this);
- copyRelocationsRange(Seg->Relocations, DataSection->Relocations,
- Seg->getInputSectionOffset(),
- Seg->getInputSectionOffset() + Seg->getSize());
- Segments.emplace_back(Seg);
- }
+DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
+ return cast<DataSymbol>(Symbols[Index]);
+}
- // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols
- // in the object
- for (const SymbolRef &Sym : WasmObj->symbols()) {
- const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
- Symbol *S;
- switch (WasmSym.Type) {
- case WasmSymbol::SymbolType::FUNCTION_IMPORT:
- case WasmSymbol::SymbolType::GLOBAL_IMPORT:
- S = createUndefined(WasmSym);
- break;
- case WasmSymbol::SymbolType::GLOBAL_EXPORT:
- S = createDefined(WasmSym, getSegment(WasmSym));
- break;
- case WasmSymbol::SymbolType::FUNCTION_EXPORT:
- S = createDefined(WasmSym);
- break;
- case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME:
- // These are for debugging only, no need to create linker symbols for them
- continue;
- }
+Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
+ if (!Sym.isDefined())
+ return nullptr;
- Symbols.push_back(S);
- if (WasmSym.isFunction()) {
- DEBUG(dbgs() << "Function: " << WasmSym.ElementIndex << " -> "
- << toString(*S) << "\n");
- FunctionSymbols[WasmSym.ElementIndex] = S;
- if (WasmSym.HasAltIndex)
- FunctionSymbols[WasmSym.AltIndex] = S;
- } else {
- DEBUG(dbgs() << "Global: " << WasmSym.ElementIndex << " -> "
- << toString(*S) << "\n");
- GlobalSymbols[WasmSym.ElementIndex] = S;
- if (WasmSym.HasAltIndex)
- GlobalSymbols[WasmSym.AltIndex] = S;
+ StringRef Name = Sym.Info.Name;
+ uint32_t Flags = Sym.Info.Flags;
+
+ switch (Sym.Info.Kind) {
+ case WASM_SYMBOL_TYPE_FUNCTION: {
+ InputFunction *Func =
+ Functions[Sym.Info.ElementIndex - WasmObj->getNumImportedFunctions()];
+ if (isExcludedByComdat(Func)) {
+ Func->Live = false;
+ return nullptr;
}
- }
- DEBUG(for (size_t I = 0; I < FunctionSymbols.size(); ++I)
- assert(FunctionSymbols[I] != nullptr);
- for (size_t I = 0; I < GlobalSymbols.size(); ++I)
- assert(GlobalSymbols[I] != nullptr););
-
- // Populate `TableSymbols` with all symbols that are called indirectly
- uint32_t SegmentCount = WasmObj->elements().size();
- if (SegmentCount) {
- if (SegmentCount > 1)
- fatal(getName() + ": contains more than one element segment");
- const WasmElemSegment &Segment = WasmObj->elements()[0];
- if (Segment.Offset.Opcode != WASM_OPCODE_I32_CONST)
- fatal(getName() + ": unsupported element segment");
- if (Segment.TableIndex != 0)
- fatal(getName() + ": unsupported table index in elem segment");
- if (Segment.Offset.Value.Int32 != 0)
- fatal(getName() + ": unsupported element segment offset");
- TableSymbols.reserve(Segment.Functions.size());
- for (uint64_t FunctionIndex : Segment.Functions)
- TableSymbols.push_back(getFunctionSymbol(FunctionIndex));
+ if (Sym.isBindingLocal())
+ return make<DefinedFunction>(Name, Flags, this, Func);
+ return Symtab->addDefinedFunction(Name, Flags, this, Func);
}
+ case WASM_SYMBOL_TYPE_DATA: {
+ InputSegment *Seg = Segments[Sym.Info.DataRef.Segment];
+ if (isExcludedByComdat(Seg)) {
+ Seg->Live = false;
+ return nullptr;
+ }
- DEBUG(dbgs() << "TableSymbols: " << TableSymbols.size() << "\n");
- DEBUG(dbgs() << "Functions : " << FunctionSymbols.size() << "\n");
- DEBUG(dbgs() << "Globals : " << GlobalSymbols.size() << "\n");
-}
+ uint32_t Offset = Sym.Info.DataRef.Offset;
+ uint32_t Size = Sym.Info.DataRef.Size;
-Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
- return Symtab->addUndefined(this, &Sym);
+ if (Sym.isBindingLocal())
+ return make<DefinedData>(Name, Flags, this, Seg, Offset, Size);
+ return Symtab->addDefinedData(Name, Flags, this, Seg, Offset, Size);
+ }
+ case WASM_SYMBOL_TYPE_GLOBAL: {
+ InputGlobal *Global =
+ Globals[Sym.Info.ElementIndex - WasmObj->getNumImportedGlobals()];
+ if (Sym.isBindingLocal())
+ return make<DefinedGlobal>(Name, Flags, this, Global);
+ return Symtab->addDefinedGlobal(Name, Flags, this, Global);
+ }
+ case WASM_SYMBOL_TYPE_SECTION: {
+ InputSection *Section = CustomSectionsByIndex[Sym.Info.ElementIndex];
+ assert(Sym.isBindingLocal());
+ return make<SectionSymbol>(Name, Flags, Section, this);
+ }
+ }
+ llvm_unreachable("unknown symbol kind");
}
-Symbol *ObjFile::createDefined(const WasmSymbol &Sym,
- const InputSegment *Segment) {
- Symbol *S;
- if (Sym.isLocal()) {
- S = make<Symbol>(Sym.Name, true);
- Symbol::Kind Kind;
- if (Sym.Type == WasmSymbol::SymbolType::FUNCTION_EXPORT)
- Kind = Symbol::Kind::DefinedFunctionKind;
- else if (Sym.Type == WasmSymbol::SymbolType::GLOBAL_EXPORT)
- Kind = Symbol::Kind::DefinedGlobalKind;
- else
- llvm_unreachable("invalid local symbol type");
- S->update(Kind, this, &Sym, Segment);
- return S;
+Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
+ StringRef Name = Sym.Info.Name;
+ uint32_t Flags = Sym.Info.Flags;
+
+ switch (Sym.Info.Kind) {
+ case WASM_SYMBOL_TYPE_FUNCTION:
+ return Symtab->addUndefinedFunction(Name, Flags, this, Sym.FunctionType);
+ case WASM_SYMBOL_TYPE_DATA:
+ return Symtab->addUndefinedData(Name, Flags, this);
+ case WASM_SYMBOL_TYPE_GLOBAL:
+ return Symtab->addUndefinedGlobal(Name, Flags, this, Sym.GlobalType);
+ case WASM_SYMBOL_TYPE_SECTION:
+ llvm_unreachable("section symbols cannot be undefined");
}
- return Symtab->addDefined(this, &Sym, Segment);
+ llvm_unreachable("unknown symbol kind");
}
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
- DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
+ LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
File = CHECK(Archive::create(MB), toString(this));
// Read the symbol table to construct Lazy symbols.
@@ -269,7 +350,7 @@ void ArchiveFile::parse() {
Symtab->addLazy(this, &Sym);
++Count;
}
- DEBUG(dbgs() << "Read " << Count << " symbols\n");
+ LLVM_DEBUG(dbgs() << "Read " << Count << " symbols\n");
}
void ArchiveFile::addMember(const Archive::Symbol *Sym) {
@@ -282,22 +363,59 @@ void ArchiveFile::addMember(const Archive::Symbol *Sym) {
if (!Seen.insert(C.getChildOffset()).second)
return;
- DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
- DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
+ LLVM_DEBUG(dbgs() << "loading lazy: " << Sym->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
MemoryBufferRef MB =
CHECK(C.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
Sym->getName());
- if (identify_magic(MB.getBuffer()) != file_magic::wasm_object) {
- error("unknown file type: " + MB.getBufferIdentifier());
+ InputFile *Obj = createObjectFile(MB);
+ Obj->ArchiveName = getName();
+ Symtab->addFile(Obj);
+}
+
+static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) {
+ switch (GvVisibility) {
+ case GlobalValue::DefaultVisibility:
+ return WASM_SYMBOL_VISIBILITY_DEFAULT;
+ case GlobalValue::HiddenVisibility:
+ case GlobalValue::ProtectedVisibility:
+ return WASM_SYMBOL_VISIBILITY_HIDDEN;
+ }
+ llvm_unreachable("unknown visibility");
+}
+
+static Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &ObjSym,
+ BitcodeFile &F) {
+ StringRef Name = Saver.save(ObjSym.getName());
+
+ uint32_t Flags = ObjSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
+ Flags |= mapVisibility(ObjSym.getVisibility());
+
+ if (ObjSym.isUndefined()) {
+ if (ObjSym.isExecutable())
+ return Symtab->addUndefinedFunction(Name, Flags, &F, nullptr);
+ return Symtab->addUndefinedData(Name, Flags, &F);
+ }
+
+ if (ObjSym.isExecutable())
+ return Symtab->addDefinedFunction(Name, Flags, &F, nullptr);
+ return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
+}
+
+void BitcodeFile::parse() {
+ Obj = check(lto::InputFile::create(MemoryBufferRef(
+ MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
+ Triple T(Obj->getTargetTriple());
+ if (T.getArch() != Triple::wasm32) {
+ error(toString(MB.getBufferIdentifier()) + ": machine type must be wasm32");
return;
}
- InputFile *Obj = make<ObjFile>(MB);
- Obj->ParentName = ParentName;
- Symtab->addFile(Obj);
+ for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
+ Symbols.push_back(createBitcodeSymbol(ObjSym, *this));
}
// Returns a string in the format of "foo.o" or "foo.a(bar.o)".
@@ -305,8 +423,8 @@ std::string lld::toString(const wasm::InputFile *File) {
if (!File)
return "<internal>";
- if (File->ParentName.empty())
+ if (File->ArchiveName.empty())
return File->getName();
- return (File->ParentName + "(" + File->getName() + ")").str();
+ return (File->ArchiveName + "(" + File->getName() + ")").str();
}
diff --git a/wasm/InputFiles.h b/wasm/InputFiles.h
index 158cc53cafb1..ec77446e6308 100644
--- a/wasm/InputFiles.h
+++ b/wasm/InputFiles.h
@@ -10,34 +10,46 @@
#ifndef LLD_WASM_INPUT_FILES_H
#define LLD_WASM_INPUT_FILES_H
+#include "Symbols.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/MemoryBuffer.h"
-
-#include "WriterUtils.h"
-
#include <vector>
using llvm::object::Archive;
using llvm::object::WasmObjectFile;
using llvm::object::WasmSection;
using llvm::object::WasmSymbol;
+using llvm::wasm::WasmGlobal;
using llvm::wasm::WasmImport;
+using llvm::wasm::WasmRelocation;
+using llvm::wasm::WasmSignature;
+
+namespace llvm {
+namespace lto {
+class InputFile;
+}
+} // namespace llvm
namespace lld {
namespace wasm {
-class Symbol;
+class InputChunk;
+class InputFunction;
class InputSegment;
+class InputGlobal;
+class InputSection;
class InputFile {
public:
enum Kind {
ObjectKind,
ArchiveKind,
+ BitcodeKind,
};
virtual ~InputFile() {}
@@ -51,12 +63,17 @@ public:
Kind kind() const { return FileKind; }
// An archive file name if this file is created from an archive.
- StringRef ParentName;
+ StringRef ArchiveName;
+
+ ArrayRef<Symbol *> getSymbols() const { return Symbols; }
protected:
InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
MemoryBufferRef MB;
+ // List of all symbols referenced or defined by this file.
+ std::vector<Symbol *> Symbols;
+
private:
const Kind FileKind;
};
@@ -89,58 +106,54 @@ public:
void dumpInfo() const;
- uint32_t relocateTypeIndex(uint32_t Original) const;
- uint32_t relocateFunctionIndex(uint32_t Original) const;
- uint32_t relocateGlobalIndex(uint32_t Original) const;
- uint32_t relocateTableIndex(uint32_t Original) const;
- uint32_t getRelocatedAddress(uint32_t Index) const;
-
- // Returns true if the given function index is an imported function,
- // as opposed to the locally defined function.
- bool isImportedFunction(uint32_t Index) const;
-
- size_t NumFunctionImports() const { return FunctionImports; }
- size_t NumGlobalImports() const { return GlobalImports; }
+ uint32_t calcNewIndex(const WasmRelocation &Reloc) const;
+ uint32_t calcNewValue(const WasmRelocation &Reloc) const;
+ uint32_t calcNewAddend(const WasmRelocation &Reloc) const;
+ uint32_t calcExpectedValue(const WasmRelocation &Reloc) const;
- int32_t FunctionIndexOffset = 0;
const WasmSection *CodeSection = nullptr;
- std::vector<OutputRelocation> CodeRelocations;
- int32_t CodeOffset = 0;
const WasmSection *DataSection = nullptr;
+ // Maps input type indices to output type indices
std::vector<uint32_t> TypeMap;
+ std::vector<bool> TypeIsUsed;
+ // Maps function indices to table indices
+ std::vector<uint32_t> TableEntries;
+ std::vector<bool> UsedComdats;
std::vector<InputSegment *> Segments;
+ std::vector<InputFunction *> Functions;
+ std::vector<InputGlobal *> Globals;
+ std::vector<InputSection *> CustomSections;
+ llvm::DenseMap<uint32_t, InputSection *> CustomSectionsByIndex;
- ArrayRef<Symbol *> getSymbols() { return Symbols; }
- ArrayRef<Symbol *> getTableSymbols() { return TableSymbols; }
+ Symbol *getSymbol(uint32_t Index) const { return Symbols[Index]; }
+ FunctionSymbol *getFunctionSymbol(uint32_t Index) const;
+ DataSymbol *getDataSymbol(uint32_t Index) const;
+ GlobalSymbol *getGlobalSymbol(uint32_t Index) const;
+ SectionSymbol *getSectionSymbol(uint32_t Index) const;
private:
- Symbol *createDefined(const WasmSymbol &Sym,
- const InputSegment *Segment = nullptr);
+ Symbol *createDefined(const WasmSymbol &Sym);
Symbol *createUndefined(const WasmSymbol &Sym);
- void initializeSymbols();
- InputSegment *getSegment(const WasmSymbol &WasmSym);
- Symbol *getFunctionSymbol(uint32_t FunctionIndex) const;
- Symbol *getTableSymbol(uint32_t TableIndex) const;
- Symbol *getGlobalSymbol(uint32_t GlobalIndex) const;
- // List of all symbols referenced or defined by this file.
- std::vector<Symbol *> Symbols;
-
- // List of all function symbols indexed by the function index space
- std::vector<Symbol *> FunctionSymbols;
+ bool isExcludedByComdat(InputChunk *Chunk) const;
- // List of all global symbols indexed by the global index space
- std::vector<Symbol *> GlobalSymbols;
+ std::unique_ptr<WasmObjectFile> WasmObj;
+};
- // List of all indirect symbols indexed by table index space.
- std::vector<Symbol *> TableSymbols;
+class BitcodeFile : public InputFile {
+public:
+ explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
+ static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
- uint32_t GlobalImports = 0;
- uint32_t FunctionImports = 0;
- std::unique_ptr<WasmObjectFile> WasmObj;
+ void parse() override;
+ std::unique_ptr<llvm::lto::InputFile> Obj;
};
+// Will report a fatal() error if the input buffer is not a valid bitcode
+// or was object file.
+InputFile *createObjectFile(MemoryBufferRef MB);
+
// Opens a given file.
llvm::Optional<MemoryBufferRef> readFile(StringRef Path);
diff --git a/wasm/InputGlobal.h b/wasm/InputGlobal.h
new file mode 100644
index 000000000000..37d0ab903706
--- /dev/null
+++ b/wasm/InputGlobal.h
@@ -0,0 +1,59 @@
+//===- InputGlobal.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_WASM_INPUT_GLOBAL_H
+#define LLD_WASM_INPUT_GLOBAL_H
+
+#include "Config.h"
+#include "InputFiles.h"
+#include "WriterUtils.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/Object/Wasm.h"
+
+using llvm::wasm::WasmGlobal;
+using llvm::wasm::WasmInitExpr;
+
+namespace lld {
+namespace wasm {
+
+// Represents a single Wasm Global Variable within an input file. These are
+// combined to form the final GLOBALS section.
+class InputGlobal {
+public:
+ InputGlobal(const WasmGlobal &G, ObjFile *F)
+ : File(F), Global(G), Live(!Config->GcSections) {}
+
+ StringRef getName() const { return Global.SymbolName; }
+ const WasmGlobalType &getType() const { return Global.Type; }
+
+ uint32_t getGlobalIndex() const { return GlobalIndex.getValue(); }
+ bool hasGlobalIndex() const { return GlobalIndex.hasValue(); }
+ void setGlobalIndex(uint32_t Index) {
+ assert(!hasGlobalIndex());
+ GlobalIndex = Index;
+ }
+
+ ObjFile *File;
+ WasmGlobal Global;
+
+ bool Live = false;
+
+protected:
+ llvm::Optional<uint32_t> GlobalIndex;
+};
+
+} // namespace wasm
+
+inline std::string toString(const wasm::InputGlobal *G) {
+ return (toString(G->File) + ":(" + G->getName() + ")").str();
+}
+
+} // namespace lld
+
+#endif // LLD_WASM_INPUT_GLOBAL_H
diff --git a/wasm/InputSegment.cpp b/wasm/InputSegment.cpp
deleted file mode 100644
index 650914386259..000000000000
--- a/wasm/InputSegment.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- InputSegment.cpp ---------------------------------------------------===//
-//
-// The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "InputSegment.h"
-#include "OutputSegment.h"
-#include "lld/Common/LLVM.h"
-
-#define DEBUG_TYPE "lld"
-
-using namespace llvm;
-using namespace lld::wasm;
-
-uint32_t InputSegment::translateVA(uint32_t Address) const {
- assert(Address >= startVA() && Address < endVA());
- int32_t Delta = OutputSeg->StartVA + OutputSegmentOffset - startVA();
- DEBUG(dbgs() << "translateVA: " << getName() << " Delta=" << Delta
- << " Address=" << Address << "\n");
- return Address + Delta;
-}
diff --git a/wasm/InputSegment.h b/wasm/InputSegment.h
deleted file mode 100644
index f70a3ded895e..000000000000
--- a/wasm/InputSegment.h
+++ /dev/null
@@ -1,76 +0,0 @@
-//===- InputSegment.h -------------------------------------------*- C++ -*-===//
-//
-// The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Represents a WebAssembly data segment which can be included as part of
-// an output data segments. Note that in WebAssembly, unlike ELF and other
-// formats, used the term "data segment" to refer to the continous regions of
-// memory that make on the data section. See:
-// https://webassembly.github.io/spec/syntax/modules.html#syntax-data
-//
-// For example, by default, clang will produce a separate data section for
-// each global variable.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLD_WASM_INPUT_SEGMENT_H
-#define LLD_WASM_INPUT_SEGMENT_H
-
-#include "WriterUtils.h"
-#include "lld/Common/ErrorHandler.h"
-#include "llvm/Object/Wasm.h"
-
-using llvm::object::WasmSegment;
-using llvm::wasm::WasmRelocation;
-
-namespace lld {
-namespace wasm {
-
-class ObjFile;
-class OutputSegment;
-
-class InputSegment {
-public:
- InputSegment(const WasmSegment *Seg, const ObjFile *F)
- : Segment(Seg), File(F) {}
-
- // Translate an offset in the input segment to an offset in the output
- // segment.
- uint32_t translateVA(uint32_t Address) const;
-
- const OutputSegment *getOutputSegment() const { return OutputSeg; }
-
- uint32_t getOutputSegmentOffset() const { return OutputSegmentOffset; }
-
- uint32_t getInputSectionOffset() const { return Segment->SectionOffset; }
-
- void setOutputSegment(const OutputSegment *Segment, uint32_t Offset) {
- OutputSeg = Segment;
- OutputSegmentOffset = Offset;
- }
-
- uint32_t getSize() const { return Segment->Data.Content.size(); }
- uint32_t getAlignment() const { return Segment->Data.Alignment; }
- uint32_t startVA() const { return Segment->Data.Offset.Value.Int32; }
- uint32_t endVA() const { return startVA() + getSize(); }
- StringRef getName() const { return Segment->Data.Name; }
-
- const WasmSegment *Segment;
- const ObjFile *File;
- std::vector<WasmRelocation> Relocations;
- std::vector<OutputRelocation> OutRelocations;
-
-protected:
- const OutputSegment *OutputSeg = nullptr;
- uint32_t OutputSegmentOffset = 0;
-};
-
-} // namespace wasm
-} // namespace lld
-
-#endif // LLD_WASM_INPUT_SEGMENT_H
diff --git a/wasm/LTO.cpp b/wasm/LTO.cpp
new file mode 100644
index 000000000000..f15551da8b80
--- /dev/null
+++ b/wasm/LTO.cpp
@@ -0,0 +1,155 @@
+//===- LTO.cpp ------------------------------------------------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LTO.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Strings.h"
+#include "lld/Common/TargetOptionsCommandFlags.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/LTO/Caching.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::object;
+
+using namespace lld;
+using namespace lld::wasm;
+
+static std::unique_ptr<lto::LTO> createLTO() {
+ lto::Config C;
+ C.Options = InitTargetOptionsFromCodeGenFlags();
+
+ // Always emit a section per function/data with LTO.
+ C.Options.FunctionSections = true;
+ C.Options.DataSections = true;
+
+ // Wasm currently only supports ThreadModel::Single
+ C.Options.ThreadModel = ThreadModel::Single;
+
+ C.DisableVerify = Config->DisableVerify;
+ C.DiagHandler = diagnosticHandler;
+ C.OptLevel = Config->LTOO;
+
+ if (Config->SaveTemps)
+ checkError(C.addSaveTemps(Config->OutputFile.str() + ".",
+ /*UseInputModulePath*/ true));
+
+ lto::ThinBackend Backend;
+ if (Config->ThinLTOJobs != -1U)
+ Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs);
+ return llvm::make_unique<lto::LTO>(std::move(C), Backend,
+ Config->LTOPartitions);
+}
+
+BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {}
+
+BitcodeCompiler::~BitcodeCompiler() = default;
+
+static void undefine(Symbol *S) {
+ if (isa<DefinedFunction>(S))
+ replaceSymbol<UndefinedFunction>(S, S->getName(), 0);
+ else if (isa<DefinedData>(S))
+ replaceSymbol<UndefinedData>(S, S->getName(), 0);
+ else
+ llvm_unreachable("unexpected symbol kind");
+}
+
+void BitcodeCompiler::add(BitcodeFile &F) {
+ lto::InputFile &Obj = *F.Obj;
+ unsigned SymNum = 0;
+ ArrayRef<Symbol *> Syms = F.getSymbols();
+ std::vector<lto::SymbolResolution> Resols(Syms.size());
+
+ // Provide a resolution to the LTO API for each symbol.
+ for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) {
+ Symbol *Sym = Syms[SymNum];
+ lto::SymbolResolution &R = Resols[SymNum];
+ ++SymNum;
+
+ // Ideally we shouldn't check for SF_Undefined but currently IRObjectFile
+ // reports two symbols for module ASM defined. Without this check, lld
+ // flags an undefined in IR with a definition in ASM as prevailing.
+ // Once IRObjectFile is fixed to report only one symbol this hack can
+ // be removed.
+ R.Prevailing = !ObjSym.isUndefined() && Sym->getFile() == &F;
+ R.VisibleToRegularObj = Config->Relocatable || Sym->IsUsedInRegularObj ||
+ (R.Prevailing && Sym->isExported());
+ if (R.Prevailing)
+ undefine(Sym);
+ }
+ checkError(LTOObj->add(std::move(F.Obj), Resols));
+}
+
+// Merge all the bitcode files we have seen, codegen the result
+// and return the resulting objects.
+std::vector<StringRef> BitcodeCompiler::compile() {
+ unsigned MaxTasks = LTOObj->getMaxTasks();
+ Buf.resize(MaxTasks);
+ Files.resize(MaxTasks);
+
+ // The --thinlto-cache-dir option specifies the path to a directory in which
+ // to cache native object files for ThinLTO incremental builds. If a path was
+ // specified, configure LTO to use it as the cache directory.
+ lto::NativeObjectCache Cache;
+ if (!Config->ThinLTOCacheDir.empty())
+ Cache = check(
+ lto::localCache(Config->ThinLTOCacheDir,
+ [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
+ Files[Task] = std::move(MB);
+ }));
+
+ checkError(LTOObj->run(
+ [&](size_t Task) {
+ return llvm::make_unique<lto::NativeObjectStream>(
+ llvm::make_unique<raw_svector_ostream>(Buf[Task]));
+ },
+ Cache));
+
+ if (!Config->ThinLTOCacheDir.empty())
+ pruneCache(Config->ThinLTOCacheDir, Config->ThinLTOCachePolicy);
+
+ std::vector<StringRef> Ret;
+ for (unsigned I = 0; I != MaxTasks; ++I) {
+ if (Buf[I].empty())
+ continue;
+ if (Config->SaveTemps) {
+ if (I == 0)
+ saveBuffer(Buf[I], Config->OutputFile + ".lto.o");
+ else
+ saveBuffer(Buf[I], Config->OutputFile + Twine(I) + ".lto.o");
+ }
+ Ret.emplace_back(Buf[I].data(), Buf[I].size());
+ }
+
+ for (std::unique_ptr<MemoryBuffer> &File : Files)
+ if (File)
+ Ret.push_back(File->getBuffer());
+
+ return Ret;
+}
diff --git a/wasm/LTO.h b/wasm/LTO.h
new file mode 100644
index 000000000000..cf726de5643a
--- /dev/null
+++ b/wasm/LTO.h
@@ -0,0 +1,57 @@
+//===- LTO.h ----------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a way to combine bitcode files into one wasm
+// file by compiling them using LLVM.
+//
+// If LTO is in use, your input files are not in regular wasm files
+// but instead LLVM bitcode files. In that case, the linker has to
+// convert bitcode files into the native format so that we can create
+// a wasm file that contains native code. This file provides that
+// functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_WASM_LTO_H
+#define LLD_WASM_LTO_H
+
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/SmallString.h"
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace lto {
+class LTO;
+}
+} // namespace llvm
+
+namespace lld {
+namespace wasm {
+
+class BitcodeFile;
+class InputFile;
+
+class BitcodeCompiler {
+public:
+ BitcodeCompiler();
+ ~BitcodeCompiler();
+
+ void add(BitcodeFile &F);
+ std::vector<StringRef> compile();
+
+private:
+ std::unique_ptr<llvm::lto::LTO> LTOObj;
+ std::vector<SmallString<0>> Buf;
+ std::vector<std::unique_ptr<MemoryBuffer>> Files;
+};
+} // namespace wasm
+} // namespace lld
+
+#endif
diff --git a/wasm/MarkLive.cpp b/wasm/MarkLive.cpp
new file mode 100644
index 000000000000..dfaa712c3296
--- /dev/null
+++ b/wasm/MarkLive.cpp
@@ -0,0 +1,118 @@
+//===- MarkLive.cpp -------------------------------------------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements --gc-sections, which is a feature to remove unused
+// chunks from the output. Unused chunks are those that are not reachable from
+// known root symbols or chunks. This feature is implemented as a mark-sweep
+// garbage collector.
+//
+// Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
+// default. Starting with the GC-roots, visit all reachable chunks and set their
+// Live bits. The Writer will then ignore chunks whose Live bits are off, so
+// that such chunk are not appear in the output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MarkLive.h"
+#include "Config.h"
+#include "InputChunks.h"
+#include "InputGlobal.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+
+#define DEBUG_TYPE "lld"
+
+using namespace llvm;
+using namespace llvm::wasm;
+using namespace lld;
+using namespace lld::wasm;
+
+void lld::wasm::markLive() {
+ if (!Config->GcSections)
+ return;
+
+ LLVM_DEBUG(dbgs() << "markLive\n");
+ SmallVector<InputChunk *, 256> Q;
+
+ auto Enqueue = [&](Symbol *Sym) {
+ if (!Sym || Sym->isLive())
+ return;
+ LLVM_DEBUG(dbgs() << "markLive: " << Sym->getName() << "\n");
+ Sym->markLive();
+ if (InputChunk *Chunk = Sym->getChunk())
+ Q.push_back(Chunk);
+ };
+
+ // Add GC root symbols.
+ if (!Config->Entry.empty())
+ Enqueue(Symtab->find(Config->Entry));
+ Enqueue(WasmSym::CallCtors);
+
+ // We need to preserve any exported symbol
+ for (Symbol *Sym : Symtab->getSymbols())
+ if (Sym->isExported())
+ Enqueue(Sym);
+
+ // The ctor functions are all used in the synthetic __wasm_call_ctors
+ // function, but since this function is created in-place it doesn't contain
+ // relocations which mean we have to manually mark the ctors.
+ for (const ObjFile *Obj : Symtab->ObjectFiles) {
+ const WasmLinkingData &L = Obj->getWasmObj()->linkingData();
+ for (const WasmInitFunc &F : L.InitFunctions)
+ Enqueue(Obj->getFunctionSymbol(F.Symbol));
+ }
+
+ // Follow relocations to mark all reachable chunks.
+ while (!Q.empty()) {
+ InputChunk *C = Q.pop_back_val();
+
+ for (const WasmRelocation Reloc : C->getRelocations()) {
+ if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB)
+ continue;
+ Symbol *Sym = C->File->getSymbol(Reloc.Index);
+
+ // If the function has been assigned the special index zero in the table,
+ // the relocation doesn't pull in the function body, since the function
+ // won't actually go in the table (the runtime will trap attempts to call
+ // that index, since we don't use it). A function with a table index of
+ // zero is only reachable via "call", not via "call_indirect". The stub
+ // functions used for weak-undefined symbols have this behaviour (compare
+ // equal to null pointer, only reachable via direct call).
+ if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB ||
+ Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32) {
+ FunctionSymbol *FuncSym = cast<FunctionSymbol>(Sym);
+ if (FuncSym->hasTableIndex() && FuncSym->getTableIndex() == 0)
+ continue;
+ }
+
+ Enqueue(Sym);
+ }
+ }
+
+ // Report garbage-collected sections.
+ if (Config->PrintGcSections) {
+ for (const ObjFile *Obj : Symtab->ObjectFiles) {
+ for (InputChunk *C : Obj->Functions)
+ if (!C->Live)
+ message("removing unused section " + toString(C));
+ for (InputChunk *C : Obj->Segments)
+ if (!C->Live)
+ message("removing unused section " + toString(C));
+ for (InputGlobal *G : Obj->Globals)
+ if (!G->Live)
+ message("removing unused section " + toString(G));
+ }
+ for (InputChunk *C : Symtab->SyntheticFunctions)
+ if (!C->Live)
+ message("removing unused section " + toString(C));
+ for (InputGlobal *G : Symtab->SyntheticGlobals)
+ if (!G->Live)
+ message("removing unused section " + toString(G));
+ }
+}
diff --git a/wasm/MarkLive.h b/wasm/MarkLive.h
new file mode 100644
index 000000000000..0b58f153ce45
--- /dev/null
+++ b/wasm/MarkLive.h
@@ -0,0 +1,21 @@
+//===- MarkLive.h -----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_WASM_MARKLIVE_H
+#define LLD_WASM_MARKLIVE_H
+
+namespace lld {
+namespace wasm {
+
+void markLive();
+
+} // namespace wasm
+} // namespace lld
+
+#endif // LLD_WASM_MARKLIVE_H
diff --git a/wasm/Options.td b/wasm/Options.td
index df0c6d708072..43588a830e31 100644
--- a/wasm/Options.td
+++ b/wasm/Options.td
@@ -11,21 +11,47 @@ multiclass Eq<string name> {
def _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>;
}
-def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">,
- HelpText<"Add a directory to the library search path">;
+multiclass B<string name, string help1, string help2> {
+ def NAME: Flag<["--", "-"], name>, HelpText<help1>;
+ def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText<help2>;
+}
+// The follow flags are shared with the ELF linker
def color_diagnostics: F<"color-diagnostics">,
HelpText<"Use colors in diagnostics">;
def color_diagnostics_eq: J<"color-diagnostics=">,
- HelpText<"Use colors in diagnostics">;
+ HelpText<"Use colors in diagnostics; one of 'always', 'never', 'auto'">;
+
+defm demangle: B<"demangle",
+ "Demangle symbol names",
+ "Do not demangle symbol names">;
+
+def entry: S<"entry">, MetaVarName<"<entry>">,
+ HelpText<"Name of entry point symbol">;
+
+def error_limit: J<"error-limit=">,
+ HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">;
+
+def fatal_warnings: F<"fatal-warnings">,
+ HelpText<"Treat warnings as errors">;
+
+defm gc_sections: B<"gc-sections",
+ "Enable garbage collection of unused sections",
+ "Disable garbage collection of unused sections">;
+
+defm merge_data_segments: B<"merge-data-segments",
+ "Enable merging data segments",
+ "Disable merging data segments">;
-// The follow flags are shared with the ELF linker
def help: F<"help">, HelpText<"Print option help">;
def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"<libName>">,
HelpText<"Root name of library to use">;
+def L: JoinedOrSeparate<["-"], "L">, MetaVarName<"<dir>">,
+ HelpText<"Add a directory to the library search path">;
+
def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">;
def no_threads: F<"no-threads">,
@@ -34,70 +60,99 @@ def no_threads: F<"no-threads">,
def no_color_diagnostics: F<"no-color-diagnostics">,
HelpText<"Do not use colors in diagnostics">;
-def no_check_signatures: F<"no-check-signatures">, HelpText<"Don't check function signatures">;
+def no_fatal_warnings: F<"no-fatal-warnings">;
def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
HelpText<"Path to file to write output">;
-def threads: F<"threads">, HelpText<"Run the linker multi-threaded">;
+def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">;
-def check_signatures: F<"check-signatures">, HelpText<"Check function signatures">;
-
-def v: Flag<["-"], "v">, HelpText<"Display the version number">;
-
-def version: F<"version">, HelpText<"Display the version number and exit">;
-
-def verbose: F<"verbose">, HelpText<"Verbose mode">;
+defm print_gc_sections: B<"print-gc-sections",
+ "List removed unused sections",
+ "Do not list removed unused sections">;
def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">;
-def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">;
-
def strip_all: F<"strip-all">, HelpText<"Strip all symbols">;
def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">;
+def threads: F<"threads">, HelpText<"Run the linker multi-threaded">;
+
defm undefined: Eq<"undefined">,
HelpText<"Force undefined symbol during linking">;
+def v: Flag<["-"], "v">, HelpText<"Display the version number">;
+
+def verbose: F<"verbose">, HelpText<"Verbose mode">;
+
+def version: F<"version">, HelpText<"Display the version number and exit">;
+
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
-def entry: S<"entry">, MetaVarName<"<entry>">,
- HelpText<"Name of entry point symbol">;
+// The follow flags are unique to wasm
-def no_entry: F<"no-entry">,
- HelpText<"Do not output any entry point">;
+def allow_undefined: F<"allow-undefined">,
+ HelpText<"Allow undefined symbols in linked binary">;
-def error_limit: J<"error-limit=">,
- HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">;
+def allow_undefined_file: J<"allow-undefined-file=">,
+ HelpText<"Allow symbols listed in <file> to be undefined in linked binary">;
-// The follow flags are unique to wasm
+def allow_undefined_file_s: Separate<["-"], "allow-undefined-file">,
+ Alias<allow_undefined_file>;
+
+defm export: Eq<"export">,
+ HelpText<"Force a symbol to be exported">;
+
+def export_all: F<"export-all">,
+ HelpText<"Export all symbols (normally combined with --no-gc-sections)">;
+
+def export_table: F<"export-table">,
+ HelpText<"Export function table to the environment">;
def global_base: J<"global-base=">,
HelpText<"Where to start to place global data">;
+def import_memory: F<"import-memory">,
+ HelpText<"Import memory from the environment">;
+
+def import_table: F<"import-table">,
+ HelpText<"Import function table from the environment">;
+
def initial_memory: J<"initial-memory=">,
HelpText<"Initial size of the linear memory">;
def max_memory: J<"max-memory=">,
HelpText<"Maximum size of the linear memory">;
-def import_memory: F<"import-memory">,
- HelpText<"Import memory from the environment">;
-
-def allow_undefined: F<"allow-undefined">,
- HelpText<"Allow undefined symbols in linked binary">;
+def no_entry: F<"no-entry">,
+ HelpText<"Do not output any entry point">;
-def allow_undefined_file: J<"allow-undefined-file=">,
- HelpText<"Allow symbols listed in <file> to be undefined in linked binary">;
+def stack_first: F<"stack-first">,
+ HelpText<"Place stack at start of linear memory rather than after data">;
-def allow_undefined_file_s: Separate<["-"], "allow-undefined-file">, Alias<allow_undefined_file>;
+defm whole_archive: B<"whole-archive",
+ "Force load of all members in a static library",
+ "Do not force load of all members in a static library (default)">;
// Aliases
+def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>;
+def alias_entry_entry: J<"entry=">, Alias<entry>;
def alias_initial_memory_i: Flag<["-"], "i">, Alias<initial_memory>;
def alias_max_memory_m: Flag<["-"], "m">, Alias<max_memory>;
def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>;
-def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>;
-def alias_entry_entry: J<"entry=">, Alias<entry>;
def alias_undefined_u: JoinedOrSeparate<["-"], "u">, Alias<undefined>;
+
+// LTO-related options.
+def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">,
+ HelpText<"Optimization level for LTO">;
+def lto_partitions: J<"lto-partitions=">,
+ HelpText<"Number of LTO codegen partitions">;
+def disable_verify: F<"disable-verify">;
+def save_temps: F<"save-temps">;
+def thinlto_cache_dir: J<"thinlto-cache-dir=">,
+ HelpText<"Path to ThinLTO cached object file directory">;
+defm thinlto_cache_policy: Eq<"thinlto-cache-policy">,
+ HelpText<"Pruning policy for the ThinLTO cache">;
+def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">;
diff --git a/wasm/OutputSections.cpp b/wasm/OutputSections.cpp
index a55538269065..256a9884f947 100644
--- a/wasm/OutputSections.cpp
+++ b/wasm/OutputSections.cpp
@@ -8,13 +8,11 @@
//===----------------------------------------------------------------------===//
#include "OutputSections.h"
-
-#include "Config.h"
+#include "InputChunks.h"
#include "InputFiles.h"
#include "OutputSegment.h"
-#include "SymbolTable.h"
+#include "WriterUtils.h"
#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
#include "lld/Common/Threads.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/LEB128.h"
@@ -26,12 +24,6 @@ using namespace llvm::wasm;
using namespace lld;
using namespace lld::wasm;
-enum class RelocEncoding {
- Uleb128,
- Sleb128,
- I32,
-};
-
static StringRef sectionTypeToString(uint32_t SectionType) {
switch (SectionType) {
case WASM_SEC_CUSTOM:
@@ -63,159 +55,40 @@ static StringRef sectionTypeToString(uint32_t SectionType) {
}
}
-std::string lld::toString(const OutputSection &Section) {
- std::string rtn = Section.getSectionName();
- if (!Section.Name.empty())
- rtn += "(" + Section.Name + ")";
- return rtn;
-}
-
-static void applyRelocation(uint8_t *Buf, const OutputRelocation &Reloc) {
- DEBUG(dbgs() << "write reloc: type=" << Reloc.Reloc.Type
- << " index=" << Reloc.Reloc.Index << " value=" << Reloc.Value
- << " offset=" << Reloc.Reloc.Offset << "\n");
- Buf += Reloc.Reloc.Offset;
- int64_t ExistingValue;
- switch (Reloc.Reloc.Type) {
- case R_WEBASSEMBLY_TYPE_INDEX_LEB:
- case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- ExistingValue = decodeULEB128(Buf);
- if (ExistingValue != Reloc.Reloc.Index) {
- DEBUG(dbgs() << "existing value: " << decodeULEB128(Buf) << "\n");
- assert(decodeULEB128(Buf) == Reloc.Reloc.Index);
- }
- LLVM_FALLTHROUGH;
- case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- encodeULEB128(Reloc.Value, Buf, 5);
- break;
- case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- ExistingValue = decodeSLEB128(Buf);
- if (ExistingValue != Reloc.Reloc.Index) {
- DEBUG(dbgs() << "existing value: " << decodeSLEB128(Buf) << "\n");
- assert(decodeSLEB128(Buf) == Reloc.Reloc.Index);
- }
- LLVM_FALLTHROUGH;
- case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- encodeSLEB128(static_cast<int32_t>(Reloc.Value), Buf, 5);
- break;
- case R_WEBASSEMBLY_TABLE_INDEX_I32:
- case R_WEBASSEMBLY_MEMORY_ADDR_I32:
- support::endian::write32<support::little>(Buf, Reloc.Value);
- break;
- default:
- llvm_unreachable("unknown relocation type");
- }
-}
-
-static void applyRelocations(uint8_t *Buf, ArrayRef<OutputRelocation> Relocs) {
- if (!Relocs.size())
- return;
- log("applyRelocations: count=" + Twine(Relocs.size()));
- for (const OutputRelocation &Reloc : Relocs)
- applyRelocation(Buf, Reloc);
-}
-
-// Relocations contain an index into the function, global or table index
-// space of the input file. This function takes a relocation and returns the
-// relocated index (i.e. translates from the input index space to the output
-// index space).
-static uint32_t calcNewIndex(const ObjFile &File, const WasmRelocation &Reloc) {
- switch (Reloc.Type) {
- case R_WEBASSEMBLY_TYPE_INDEX_LEB:
- return File.relocateTypeIndex(Reloc.Index);
- case R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- return File.relocateFunctionIndex(Reloc.Index);
- case R_WEBASSEMBLY_TABLE_INDEX_I32:
- case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- return File.relocateTableIndex(Reloc.Index);
- case R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case R_WEBASSEMBLY_MEMORY_ADDR_I32:
- return File.relocateGlobalIndex(Reloc.Index);
- default:
- llvm_unreachable("unknown relocation type");
- }
-}
-
-// Take a vector of relocations from an input file and create output
-// relocations based on them. Calculates the updated index and offset for
-// each relocation as well as the value to write out in the final binary.
-static void calcRelocations(const ObjFile &File,
- ArrayRef<WasmRelocation> Relocs,
- std::vector<OutputRelocation> &OutputRelocs,
- int32_t OutputOffset) {
- log("calcRelocations: " + File.getName() + " offset=" + Twine(OutputOffset));
- for (const WasmRelocation &Reloc : Relocs) {
- OutputRelocation NewReloc;
- NewReloc.Reloc = Reloc;
- NewReloc.Reloc.Offset += OutputOffset;
- DEBUG(dbgs() << "reloc: type=" << Reloc.Type << " index=" << Reloc.Index
- << " offset=" << Reloc.Offset
- << " newOffset=" << NewReloc.Reloc.Offset << "\n");
-
- if (Config->EmitRelocs)
- NewReloc.NewIndex = calcNewIndex(File, Reloc);
- else
- NewReloc.NewIndex = UINT32_MAX;
-
- switch (Reloc.Type) {
- case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case R_WEBASSEMBLY_MEMORY_ADDR_I32:
- case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- NewReloc.Value = File.getRelocatedAddress(Reloc.Index);
- if (NewReloc.Value != UINT32_MAX)
- NewReloc.Value += Reloc.Addend;
- break;
- default:
- NewReloc.Value = calcNewIndex(File, Reloc);
- break;
- }
-
- OutputRelocs.emplace_back(NewReloc);
- }
+// Returns a string, e.g. "FUNCTION(.text)".
+std::string lld::toString(const OutputSection &Sec) {
+ if (!Sec.Name.empty())
+ return (Sec.getSectionName() + "(" + Sec.Name + ")").str();
+ return Sec.getSectionName();
}
-std::string OutputSection::getSectionName() const {
+StringRef OutputSection::getSectionName() const {
return sectionTypeToString(Type);
}
-std::string SubSection::getSectionName() const {
- return std::string("subsection <type=") + std::to_string(Type) + ">";
-}
-
void OutputSection::createHeader(size_t BodySize) {
raw_string_ostream OS(Header);
- debugWrite(OS.tell(), "section type [" + Twine(getSectionName()) + "]");
- writeUleb128(OS, Type, nullptr);
+ debugWrite(OS.tell(), "section type [" + getSectionName() + "]");
+ encodeULEB128(Type, OS);
writeUleb128(OS, BodySize, "section size");
OS.flush();
log("createHeader: " + toString(*this) + " body=" + Twine(BodySize) +
" total=" + Twine(getSize()));
}
-CodeSection::CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs)
- : OutputSection(WASM_SEC_CODE), InputObjects(Objs) {
+CodeSection::CodeSection(ArrayRef<InputFunction *> Functions)
+ : OutputSection(WASM_SEC_CODE), Functions(Functions) {
+ assert(Functions.size() > 0);
+
raw_string_ostream OS(CodeSectionHeader);
- writeUleb128(OS, NumFunctions, "function count");
+ writeUleb128(OS, Functions.size(), "function count");
OS.flush();
BodySize = CodeSectionHeader.size();
- for (ObjFile *File : InputObjects) {
- if (!File->CodeSection)
- continue;
-
- File->CodeOffset = BodySize;
- ArrayRef<uint8_t> Content = File->CodeSection->Content;
- unsigned HeaderSize = 0;
- decodeULEB128(Content.data(), &HeaderSize);
-
- calcRelocations(*File, File->CodeSection->Relocations,
- File->CodeRelocations, BodySize - HeaderSize);
-
- size_t PayloadSize = Content.size() - HeaderSize;
- BodySize += PayloadSize;
+ for (InputFunction *Func : Functions) {
+ Func->OutputOffset = BodySize;
+ Func->calculateSize();
+ BodySize += Func->getSize();
}
createHeader(BodySize);
@@ -224,49 +97,32 @@ CodeSection::CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs)
void CodeSection::writeTo(uint8_t *Buf) {
log("writing " + toString(*this));
log(" size=" + Twine(getSize()));
+ log(" headersize=" + Twine(Header.size()));
+ log(" codeheadersize=" + Twine(CodeSectionHeader.size()));
Buf += Offset;
// Write section header
memcpy(Buf, Header.data(), Header.size());
Buf += Header.size();
- uint8_t *ContentsStart = Buf;
-
// Write code section headers
memcpy(Buf, CodeSectionHeader.data(), CodeSectionHeader.size());
- Buf += CodeSectionHeader.size();
// Write code section bodies
- parallelForEach(InputObjects, [ContentsStart](ObjFile *File) {
- if (!File->CodeSection)
- return;
-
- ArrayRef<uint8_t> Content(File->CodeSection->Content);
-
- // Payload doesn't include the initial header (function count)
- unsigned HeaderSize = 0;
- decodeULEB128(Content.data(), &HeaderSize);
-
- size_t PayloadSize = Content.size() - HeaderSize;
- memcpy(ContentsStart + File->CodeOffset, Content.data() + HeaderSize,
- PayloadSize);
-
- log("applying relocations for: " + File->getName());
- applyRelocations(ContentsStart, File->CodeRelocations);
- });
+ parallelForEach(Functions,
+ [&](const InputChunk *Chunk) { Chunk->writeTo(Buf); });
}
uint32_t CodeSection::numRelocations() const {
uint32_t Count = 0;
- for (ObjFile *File : InputObjects)
- Count += File->CodeRelocations.size();
+ for (const InputChunk *Func : Functions)
+ Count += Func->NumRelocations();
return Count;
}
void CodeSection::writeRelocations(raw_ostream &OS) const {
- for (ObjFile *File : InputObjects)
- for (const OutputRelocation &Reloc : File->CodeRelocations)
- writeReloc(OS, Reloc);
+ for (const InputChunk *C : Functions)
+ C->writeRelocations(OS);
}
DataSection::DataSection(ArrayRef<OutputSegment *> Segments)
@@ -285,18 +141,14 @@ DataSection::DataSection(ArrayRef<OutputSegment *> Segments)
writeUleb128(OS, WASM_OPCODE_END, "opcode:end");
writeUleb128(OS, Segment->Size, "segment size");
OS.flush();
- Segment->setSectionOffset(BodySize);
- BodySize += Segment->Header.size();
+
+ Segment->SectionOffset = BodySize;
+ BodySize += Segment->Header.size() + Segment->Size;
log("Data segment: size=" + Twine(Segment->Size));
- for (InputSegment *InputSeg : Segment->InputSegments) {
- uint32_t InputOffset = InputSeg->getInputSectionOffset();
- uint32_t OutputOffset = Segment->getSectionOffset() +
- Segment->Header.size() +
- InputSeg->getOutputSegmentOffset();
- calcRelocations(*InputSeg->File, InputSeg->Relocations,
- InputSeg->OutRelocations, OutputOffset - InputOffset);
- }
- BodySize += Segment->Size;
+
+ for (InputSegment *InputSeg : Segment->InputSegments)
+ InputSeg->OutputOffset = Segment->SectionOffset + Segment->Header.size() +
+ InputSeg->OutputSegmentOffset;
}
createHeader(BodySize);
@@ -311,38 +163,77 @@ void DataSection::writeTo(uint8_t *Buf) {
memcpy(Buf, Header.data(), Header.size());
Buf += Header.size();
- uint8_t *ContentsStart = Buf;
-
// Write data section headers
memcpy(Buf, DataSectionHeader.data(), DataSectionHeader.size());
- parallelForEach(Segments, [ContentsStart](const OutputSegment *Segment) {
+ parallelForEach(Segments, [&](const OutputSegment *Segment) {
// Write data segment header
- uint8_t *SegStart = ContentsStart + Segment->getSectionOffset();
+ uint8_t *SegStart = Buf + Segment->SectionOffset;
memcpy(SegStart, Segment->Header.data(), Segment->Header.size());
// Write segment data payload
- for (const InputSegment *Input : Segment->InputSegments) {
- ArrayRef<uint8_t> Content(Input->Segment->Data.Content);
- memcpy(SegStart + Segment->Header.size() +
- Input->getOutputSegmentOffset(),
- Content.data(), Content.size());
- applyRelocations(ContentsStart, Input->OutRelocations);
- }
+ for (const InputChunk *Chunk : Segment->InputSegments)
+ Chunk->writeTo(Buf);
});
}
uint32_t DataSection::numRelocations() const {
uint32_t Count = 0;
for (const OutputSegment *Seg : Segments)
- for (const InputSegment *InputSeg : Seg->InputSegments)
- Count += InputSeg->OutRelocations.size();
+ for (const InputChunk *InputSeg : Seg->InputSegments)
+ Count += InputSeg->NumRelocations();
return Count;
}
void DataSection::writeRelocations(raw_ostream &OS) const {
for (const OutputSegment *Seg : Segments)
- for (const InputSegment *InputSeg : Seg->InputSegments)
- for (const OutputRelocation &Reloc : InputSeg->OutRelocations)
- writeReloc(OS, Reloc);
+ for (const InputChunk *C : Seg->InputSegments)
+ C->writeRelocations(OS);
+}
+
+CustomSection::CustomSection(std::string Name,
+ ArrayRef<InputSection *> InputSections)
+ : OutputSection(WASM_SEC_CUSTOM, Name), PayloadSize(0),
+ InputSections(InputSections) {
+ raw_string_ostream OS(NameData);
+ encodeULEB128(Name.size(), OS);
+ OS << Name;
+ OS.flush();
+
+ for (InputSection *Section : InputSections) {
+ Section->OutputOffset = PayloadSize;
+ PayloadSize += Section->getSize();
+ }
+
+ createHeader(PayloadSize + NameData.size());
+}
+
+void CustomSection::writeTo(uint8_t *Buf) {
+ log("writing " + toString(*this) + " size=" + Twine(getSize()) +
+ " chunks=" + Twine(InputSections.size()));
+
+ assert(Offset);
+ Buf += Offset;
+
+ // Write section header
+ memcpy(Buf, Header.data(), Header.size());
+ Buf += Header.size();
+ memcpy(Buf, NameData.data(), NameData.size());
+ Buf += NameData.size();
+
+ // Write custom sections payload
+ parallelForEach(InputSections,
+ [&](const InputSection *Section) { Section->writeTo(Buf); });
+}
+
+uint32_t CustomSection::numRelocations() const {
+ uint32_t Count = 0;
+ for (const InputSection *InputSect : InputSections)
+ Count += InputSect->NumRelocations();
+ return Count;
+}
+
+void CustomSection::writeRelocations(raw_ostream &OS) const {
+ for (const InputSection *S : InputSections)
+ S->writeRelocations(OS);
}
diff --git a/wasm/OutputSections.h b/wasm/OutputSections.h
index fc73f36ad286..189d6507c4b3 100644
--- a/wasm/OutputSections.h
+++ b/wasm/OutputSections.h
@@ -10,7 +10,7 @@
#ifndef LLD_WASM_OUTPUT_SECTIONS_H
#define LLD_WASM_OUTPUT_SECTIONS_H
-#include "InputSegment.h"
+#include "InputChunks.h"
#include "WriterUtils.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
@@ -28,7 +28,6 @@ std::string toString(const wasm::OutputSection &Section);
namespace wasm {
class OutputSegment;
-class ObjFile;
class OutputSection {
public:
@@ -36,7 +35,7 @@ public:
: Type(Type), Name(Name) {}
virtual ~OutputSection() = default;
- std::string getSectionName() const;
+ StringRef getSectionName() const;
void setOffset(size_t NewOffset) {
log("setOffset: " + toString(*this) + ": " + Twine(NewOffset));
Offset = NewOffset;
@@ -61,7 +60,7 @@ public:
SyntheticSection(uint32_t Type, std::string Name = "")
: OutputSection(Type, Name), BodyOutputStream(Body) {
if (!Name.empty())
- writeStr(BodyOutputStream, Name);
+ writeStr(BodyOutputStream, Name, "section name");
}
void writeTo(uint8_t *Buf) override {
@@ -86,32 +85,16 @@ protected:
raw_string_ostream BodyOutputStream;
};
-// Some synthetic sections (e.g. "name" and "linking") have subsections.
-// Just like the synthetic sections themselves these need to be created before
-// they can be written out (since they are preceded by their length). This
-// class is used to create subsections and then write them into the stream
-// of the parent section.
-class SubSection : public SyntheticSection {
-public:
- explicit SubSection(uint32_t Type) : SyntheticSection(Type) {}
-
- std::string getSectionName() const;
- void writeToStream(raw_ostream &OS) {
- writeBytes(OS, Header.data(), Header.size());
- writeBytes(OS, Body.data(), Body.size());
- }
-};
-
class CodeSection : public OutputSection {
public:
- explicit CodeSection(uint32_t NumFunctions, ArrayRef<ObjFile *> Objs);
+ explicit CodeSection(ArrayRef<InputFunction *> Functions);
size_t getSize() const override { return Header.size() + BodySize; }
void writeTo(uint8_t *Buf) override;
uint32_t numRelocations() const override;
void writeRelocations(raw_ostream &OS) const override;
protected:
- ArrayRef<ObjFile *> InputObjects;
+ ArrayRef<InputFunction *> Functions;
std::string CodeSectionHeader;
size_t BodySize = 0;
};
@@ -130,6 +113,29 @@ protected:
size_t BodySize = 0;
};
+// Represents a custom section in the output file. Wasm custom sections are
+// used for storing user-defined metadata. Unlike the core sections types
+// they are identified by their string name.
+// The linker combines custom sections that have the same name by simply
+// concatenating them.
+// Note that some custom sections such as "name" and "linking" are handled
+// separately and are instead synthesized by the linker.
+class CustomSection : public OutputSection {
+public:
+ CustomSection(std::string Name, ArrayRef<InputSection *> InputSections);
+ size_t getSize() const override {
+ return Header.size() + NameData.size() + PayloadSize;
+ }
+ void writeTo(uint8_t *Buf) override;
+ uint32_t numRelocations() const override;
+ void writeRelocations(raw_ostream &OS) const override;
+
+protected:
+ size_t PayloadSize;
+ ArrayRef<InputSection *> InputSections;
+ std::string NameData;
+};
+
} // namespace wasm
} // namespace lld
diff --git a/wasm/OutputSegment.h b/wasm/OutputSegment.h
index a22c80234420..d5c89cd19f4c 100644
--- a/wasm/OutputSegment.h
+++ b/wasm/OutputSegment.h
@@ -10,7 +10,7 @@
#ifndef LLD_WASM_OUTPUT_SEGMENT_H
#define LLD_WASM_OUTPUT_SEGMENT_H
-#include "InputSegment.h"
+#include "InputChunks.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Object/Wasm.h"
@@ -21,21 +21,20 @@ class InputSegment;
class OutputSegment {
public:
- OutputSegment(StringRef N) : Name(N) {}
-
- void addInputSegment(InputSegment *Segment) {
- Alignment = std::max(Alignment, Segment->getAlignment());
- InputSegments.push_back(Segment);
- Size = llvm::alignTo(Size, Segment->getAlignment());
- Segment->setOutputSegment(this, Size);
- Size += Segment->getSize();
+ OutputSegment(StringRef N, uint32_t Index) : Name(N), Index(Index) {}
+
+ void addInputSegment(InputSegment *InSeg) {
+ Alignment = std::max(Alignment, InSeg->getAlignment());
+ InputSegments.push_back(InSeg);
+ Size = llvm::alignTo(Size, InSeg->getAlignment());
+ InSeg->OutputSeg = this;
+ InSeg->OutputSegmentOffset = Size;
+ Size += InSeg->getSize();
}
- uint32_t getSectionOffset() const { return SectionOffset; }
-
- void setSectionOffset(uint32_t Offset) { SectionOffset = Offset; }
-
StringRef Name;
+ const uint32_t Index;
+ uint32_t SectionOffset = 0;
uint32_t Alignment = 0;
uint32_t StartVA = 0;
std::vector<InputSegment *> InputSegments;
@@ -45,9 +44,6 @@ public:
// Segment header
std::string Header;
-
-private:
- uint32_t SectionOffset = 0;
};
} // namespace wasm
diff --git a/wasm/SymbolTable.cpp b/wasm/SymbolTable.cpp
index 751008da0536..e1ba23769738 100644
--- a/wasm/SymbolTable.cpp
+++ b/wasm/SymbolTable.cpp
@@ -8,17 +8,18 @@
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
-
#include "Config.h"
+#include "InputChunks.h"
+#include "InputGlobal.h"
#include "WriterUtils.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
-
-#include <unordered_set>
+#include "llvm/ADT/SetVector.h"
#define DEBUG_TYPE "lld"
using namespace llvm;
+using namespace llvm::wasm;
using namespace lld;
using namespace lld::wasm;
@@ -28,17 +29,46 @@ void SymbolTable::addFile(InputFile *File) {
log("Processing: " + toString(File));
File->parse();
- if (auto *F = dyn_cast<ObjFile>(File))
+ // LLVM bitcode file
+ if (auto *F = dyn_cast<BitcodeFile>(File))
+ BitcodeFiles.push_back(F);
+ else if (auto *F = dyn_cast<ObjFile>(File))
ObjectFiles.push_back(F);
}
+// This function is where all the optimizations of link-time
+// optimization happens. When LTO is in use, some input files are
+// not in native object file format but in the LLVM bitcode format.
+// This function compiles bitcode files into a few big native files
+// using LLVM functions and replaces bitcode symbols with the results.
+// Because all bitcode files that the program consists of are passed
+// to the compiler at once, it can do whole-program optimization.
+void SymbolTable::addCombinedLTOObject() {
+ if (BitcodeFiles.empty())
+ return;
+
+ // Compile bitcode files and replace bitcode symbols.
+ LTO.reset(new BitcodeCompiler);
+ for (BitcodeFile *F : BitcodeFiles)
+ LTO->add(*F);
+
+ for (StringRef Filename : LTO->compile()) {
+ auto *Obj = make<ObjFile>(MemoryBufferRef(Filename, "lto.tmp"));
+ Obj->parse();
+ ObjectFiles.push_back(Obj);
+ }
+}
+
void SymbolTable::reportRemainingUndefines() {
- std::unordered_set<Symbol *> Undefs;
+ SetVector<Symbol *> Undefs;
for (Symbol *Sym : SymVector) {
- if (Sym->isUndefined() && !Sym->isWeak() &&
- Config->AllowUndefinedSymbols.count(Sym->getName()) == 0) {
- Undefs.insert(Sym);
- }
+ if (!Sym->isUndefined() || Sym->isWeak())
+ continue;
+ if (Config->AllowUndefinedSymbols.count(Sym->getName()) != 0)
+ continue;
+ if (!Sym->IsUsedInRegularObj)
+ continue;
+ Undefs.insert(Sym);
}
if (Undefs.empty())
@@ -55,183 +85,281 @@ void SymbolTable::reportRemainingUndefines() {
}
Symbol *SymbolTable::find(StringRef Name) {
- auto It = SymMap.find(CachedHashStringRef(Name));
- if (It == SymMap.end())
- return nullptr;
- return It->second;
+ return SymMap.lookup(CachedHashStringRef(Name));
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) {
Symbol *&Sym = SymMap[CachedHashStringRef(Name)];
if (Sym)
return {Sym, false};
- Sym = make<Symbol>(Name, false);
+ Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
+ Sym->IsUsedInRegularObj = false;
SymVector.emplace_back(Sym);
return {Sym, true};
}
-void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) {
- error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " +
- toString(Existing->getFile()) + "\n>>> defined in " +
- toString(NewFile));
+static void reportTypeError(const Symbol *Existing, const InputFile *File,
+ llvm::wasm::WasmSymbolType Type) {
+ error("symbol type mismatch: " + toString(*Existing) + "\n>>> defined as " +
+ toString(Existing->getWasmType()) + " in " +
+ toString(Existing->getFile()) + "\n>>> defined as " + toString(Type) +
+ " in " + toString(File));
}
-// Get the signature for a given function symbol, either by looking
-// it up in function sections (for defined functions), of the imports section
-// (for imported functions).
-static const WasmSignature *getFunctionSig(const ObjFile &Obj,
- const WasmSymbol &Sym) {
- DEBUG(dbgs() << "getFunctionSig: " << Sym.Name << "\n");
- const WasmObjectFile *WasmObj = Obj.getWasmObj();
- return &WasmObj->types()[Sym.FunctionType];
+static void checkFunctionType(Symbol *Existing, const InputFile *File,
+ const WasmSignature *NewSig) {
+ auto ExistingFunction = dyn_cast<FunctionSymbol>(Existing);
+ if (!ExistingFunction) {
+ reportTypeError(Existing, File, WASM_SYMBOL_TYPE_FUNCTION);
+ return;
+ }
+
+ if (!NewSig)
+ return;
+
+ const WasmSignature *OldSig = ExistingFunction->FunctionType;
+ if (!OldSig) {
+ ExistingFunction->FunctionType = NewSig;
+ return;
+ }
+
+ if (*NewSig != *OldSig)
+ warn("function signature mismatch: " + Existing->getName() +
+ "\n>>> defined as " + toString(*OldSig) + " in " +
+ toString(Existing->getFile()) + "\n>>> defined as " +
+ toString(*NewSig) + " in " + toString(File));
}
// Check the type of new symbol matches that of the symbol is replacing.
// For functions this can also involve verifying that the signatures match.
-static void checkSymbolTypes(const Symbol &Existing, const InputFile &F,
- const WasmSymbol &New,
- const WasmSignature *NewSig) {
- if (Existing.isLazy())
+static void checkGlobalType(const Symbol *Existing, const InputFile *File,
+ const WasmGlobalType *NewType) {
+ if (!isa<GlobalSymbol>(Existing)) {
+ reportTypeError(Existing, File, WASM_SYMBOL_TYPE_GLOBAL);
return;
+ }
- bool NewIsFunction = New.Type == WasmSymbol::SymbolType::FUNCTION_EXPORT ||
- New.Type == WasmSymbol::SymbolType::FUNCTION_IMPORT;
-
- // First check the symbol types match (i.e. either both are function
- // symbols or both are data symbols).
- if (Existing.isFunction() != NewIsFunction) {
- error("symbol type mismatch: " + New.Name + "\n>>> defined as " +
- (Existing.isFunction() ? "Function" : "Global") + " in " +
- toString(Existing.getFile()) + "\n>>> defined as " +
- (NewIsFunction ? "Function" : "Global") + " in " + F.getName());
- return;
+ const WasmGlobalType *OldType = cast<GlobalSymbol>(Existing)->getGlobalType();
+ if (*NewType != *OldType) {
+ error("Global type mismatch: " + Existing->getName() + "\n>>> defined as " +
+ toString(*OldType) + " in " + toString(Existing->getFile()) +
+ "\n>>> defined as " + toString(*NewType) + " in " + toString(File));
}
+}
- // For function symbols, optionally check the function signature matches too.
- if (!NewIsFunction || !Config->CheckSignatures)
- return;
- // Skip the signature check if the existing function has no signature (e.g.
- // if it is an undefined symbol generated by --undefined command line flag).
- if (!Existing.hasFunctionType())
- return;
+static void checkDataType(const Symbol *Existing, const InputFile *File) {
+ if (!isa<DataSymbol>(Existing))
+ reportTypeError(Existing, File, WASM_SYMBOL_TYPE_DATA);
+}
- DEBUG(dbgs() << "checkSymbolTypes: " << New.Name << "\n");
- assert(NewSig);
+DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name,
+ uint32_t Flags,
+ InputFunction *Function) {
+ LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << Name << "\n");
+ assert(!find(Name));
+ SyntheticFunctions.emplace_back(Function);
+ return replaceSymbol<DefinedFunction>(insert(Name).first, Name, Flags,
+ nullptr, Function);
+}
- const WasmSignature &OldSig = Existing.getFunctionType();
- if (*NewSig == OldSig)
- return;
+DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef Name,
+ uint32_t Flags) {
+ LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << Name << "\n");
+ assert(!find(Name));
+ return replaceSymbol<DefinedData>(insert(Name).first, Name, Flags);
+}
- error("function signature mismatch: " + New.Name + "\n>>> defined as " +
- toString(OldSig) + " in " + toString(Existing.getFile()) +
- "\n>>> defined as " + toString(*NewSig) + " in " + F.getName());
+DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef Name, uint32_t Flags,
+ InputGlobal *Global) {
+ LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << Name << " -> " << Global
+ << "\n");
+ assert(!find(Name));
+ SyntheticGlobals.emplace_back(Global);
+ return replaceSymbol<DefinedGlobal>(insert(Name).first, Name, Flags, nullptr,
+ Global);
}
-Symbol *SymbolTable::addDefinedGlobal(StringRef Name) {
- DEBUG(dbgs() << "addDefinedGlobal: " << Name << "\n");
+static bool shouldReplace(const Symbol *Existing, InputFile *NewFile,
+ uint32_t NewFlags) {
+ // If existing symbol is undefined, replace it.
+ if (!Existing->isDefined()) {
+ LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
+ << Existing->getName() << "\n");
+ return true;
+ }
+
+ // Now we have two defined symbols. If the new one is weak, we can ignore it.
+ if ((NewFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
+ LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
+ return false;
+ }
+
+ // If the existing symbol is weak, we should replace it.
+ if (Existing->isWeak()) {
+ LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
+ return true;
+ }
+
+ // Neither symbol is week. They conflict.
+ error("duplicate symbol: " + toString(*Existing) + "\n>>> defined in " +
+ toString(Existing->getFile()) + "\n>>> defined in " +
+ toString(NewFile));
+ return true;
+}
+
+Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags,
+ InputFile *File,
+ InputFunction *Function) {
+ LLVM_DEBUG(dbgs() << "addDefinedFunction: " << Name << "\n");
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
- if (WasInserted)
- S->update(Symbol::DefinedGlobalKind);
- else if (!S->isGlobal())
- error("symbol type mismatch: " + Name);
+
+ if (!File || File->kind() == InputFile::ObjectKind)
+ S->IsUsedInRegularObj = true;
+
+ if (WasInserted || S->isLazy()) {
+ replaceSymbol<DefinedFunction>(S, Name, Flags, File, Function);
+ return S;
+ }
+
+ if (Function)
+ checkFunctionType(S, File, &Function->Signature);
+
+ if (shouldReplace(S, File, Flags))
+ replaceSymbol<DefinedFunction>(S, Name, Flags, File, Function);
return S;
}
-Symbol *SymbolTable::addDefined(InputFile *F, const WasmSymbol *Sym,
- const InputSegment *Segment) {
- DEBUG(dbgs() << "addDefined: " << Sym->Name << "\n");
+Symbol *SymbolTable::addDefinedData(StringRef Name, uint32_t Flags,
+ InputFile *File, InputSegment *Segment,
+ uint32_t Address, uint32_t Size) {
+ LLVM_DEBUG(dbgs() << "addDefinedData:" << Name << " addr:" << Address
+ << "\n");
Symbol *S;
bool WasInserted;
- Symbol::Kind Kind = Symbol::DefinedFunctionKind;
- const WasmSignature *NewSig = nullptr;
- if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_EXPORT)
- Kind = Symbol::DefinedGlobalKind;
- else
- NewSig = getFunctionSig(*cast<ObjFile>(F), *Sym);
+ std::tie(S, WasInserted) = insert(Name);
- std::tie(S, WasInserted) = insert(Sym->Name);
- if (WasInserted) {
- S->update(Kind, F, Sym, Segment, NewSig);
- } else if (S->isLazy()) {
- // The existing symbol is lazy. Replace it without checking types since
- // lazy symbols don't have any type information.
- DEBUG(dbgs() << "replacing existing lazy symbol: " << Sym->Name << "\n");
- S->update(Kind, F, Sym, Segment, NewSig);
- } else if (!S->isDefined()) {
- // The existing symbol table entry is undefined. The new symbol replaces
- // it, after checking the type matches
- DEBUG(dbgs() << "resolving existing undefined symbol: " << Sym->Name
- << "\n");
- checkSymbolTypes(*S, *F, *Sym, NewSig);
- S->update(Kind, F, Sym, Segment, NewSig);
- } else if (Sym->isWeak()) {
- // the new symbol is weak we can ignore it
- DEBUG(dbgs() << "existing symbol takes precedence\n");
- } else if (S->isWeak()) {
- // the new symbol is not weak and the existing symbol is, so we replace
- // it
- DEBUG(dbgs() << "replacing existing weak symbol\n");
- checkSymbolTypes(*S, *F, *Sym, NewSig);
- S->update(Kind, F, Sym, Segment, NewSig);
- } else {
- // neither symbol is week. They conflict.
- reportDuplicate(S, F);
+ if (!File || File->kind() == InputFile::ObjectKind)
+ S->IsUsedInRegularObj = true;
+
+ if (WasInserted || S->isLazy()) {
+ replaceSymbol<DefinedData>(S, Name, Flags, File, Segment, Address, Size);
+ return S;
}
+
+ checkDataType(S, File);
+
+ if (shouldReplace(S, File, Flags))
+ replaceSymbol<DefinedData>(S, Name, Flags, File, Segment, Address, Size);
return S;
}
-Symbol *SymbolTable::addUndefinedFunction(StringRef Name,
- const WasmSignature *Type) {
+Symbol *SymbolTable::addDefinedGlobal(StringRef Name, uint32_t Flags,
+ InputFile *File, InputGlobal *Global) {
+ LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << Name << "\n");
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
- if (WasInserted) {
- S->update(Symbol::UndefinedFunctionKind, nullptr, nullptr, nullptr, Type);
- } else if (!S->isFunction()) {
- error("symbol type mismatch: " + Name);
+
+ if (!File || File->kind() == InputFile::ObjectKind)
+ S->IsUsedInRegularObj = true;
+
+ if (WasInserted || S->isLazy()) {
+ replaceSymbol<DefinedGlobal>(S, Name, Flags, File, Global);
+ return S;
}
+
+ checkGlobalType(S, File, &Global->getType());
+
+ if (shouldReplace(S, File, Flags))
+ replaceSymbol<DefinedGlobal>(S, Name, Flags, File, Global);
return S;
}
-Symbol *SymbolTable::addUndefined(InputFile *F, const WasmSymbol *Sym) {
- DEBUG(dbgs() << "addUndefined: " << Sym->Name << "\n");
+Symbol *SymbolTable::addUndefinedFunction(StringRef Name, uint32_t Flags,
+ InputFile *File,
+ const WasmSignature *Sig) {
+ LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name << "\n");
+
Symbol *S;
bool WasInserted;
- Symbol::Kind Kind = Symbol::UndefinedFunctionKind;
- const WasmSignature *NewSig = nullptr;
- if (Sym->Type == WasmSymbol::SymbolType::GLOBAL_IMPORT)
- Kind = Symbol::UndefinedGlobalKind;
+ std::tie(S, WasInserted) = insert(Name);
+
+ if (!File || File->kind() == InputFile::ObjectKind)
+ S->IsUsedInRegularObj = true;
+
+ if (WasInserted)
+ replaceSymbol<UndefinedFunction>(S, Name, Flags, File, Sig);
+ else if (auto *Lazy = dyn_cast<LazySymbol>(S))
+ Lazy->fetch();
else
- NewSig = getFunctionSig(*cast<ObjFile>(F), *Sym);
- std::tie(S, WasInserted) = insert(Sym->Name);
- if (WasInserted) {
- S->update(Kind, F, Sym, nullptr, NewSig);
- } else if (S->isLazy()) {
- DEBUG(dbgs() << "resolved by existing lazy\n");
- auto *AF = cast<ArchiveFile>(S->getFile());
- AF->addMember(&S->getArchiveSymbol());
- } else if (S->isDefined()) {
- DEBUG(dbgs() << "resolved by existing\n");
- checkSymbolTypes(*S, *F, *Sym, NewSig);
- }
+ checkFunctionType(S, File, Sig);
+
+ return S;
+}
+
+Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags,
+ InputFile *File) {
+ LLVM_DEBUG(dbgs() << "addUndefinedData: " << Name << "\n");
+
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name);
+
+ if (!File || File->kind() == InputFile::ObjectKind)
+ S->IsUsedInRegularObj = true;
+
+ if (WasInserted)
+ replaceSymbol<UndefinedData>(S, Name, Flags, File);
+ else if (auto *Lazy = dyn_cast<LazySymbol>(S))
+ Lazy->fetch();
+ else if (S->isDefined())
+ checkDataType(S, File);
return S;
}
-void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol *Sym) {
- DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n");
+Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, uint32_t Flags,
+ InputFile *File,
+ const WasmGlobalType *Type) {
+ LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << Name << "\n");
+
+ Symbol *S;
+ bool WasInserted;
+ std::tie(S, WasInserted) = insert(Name);
+
+ if (!File || File->kind() == InputFile::ObjectKind)
+ S->IsUsedInRegularObj = true;
+
+ if (WasInserted)
+ replaceSymbol<UndefinedGlobal>(S, Name, Flags, File, Type);
+ else if (auto *Lazy = dyn_cast<LazySymbol>(S))
+ Lazy->fetch();
+ else if (S->isDefined())
+ checkGlobalType(S, File, Type);
+ return S;
+}
+
+void SymbolTable::addLazy(ArchiveFile *File, const Archive::Symbol *Sym) {
+ LLVM_DEBUG(dbgs() << "addLazy: " << Sym->getName() << "\n");
StringRef Name = Sym->getName();
+
Symbol *S;
bool WasInserted;
std::tie(S, WasInserted) = insert(Name);
+
if (WasInserted) {
- S->update(Symbol::LazyKind, F);
- S->setArchiveSymbol(*Sym);
- } else if (S->isUndefined()) {
- // There is an existing undefined symbol. The can load from the
- // archive.
- DEBUG(dbgs() << "replacing existing undefined\n");
- F->addMember(Sym);
+ replaceSymbol<LazySymbol>(S, Name, File, *Sym);
+ return;
+ }
+
+ // If there is an existing undefined symbol, load a new one from the archive.
+ if (S->isUndefined()) {
+ LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
+ File->addMember(Sym);
}
}
+
+bool SymbolTable::addComdat(StringRef Name) {
+ return Comdats.insert(CachedHashStringRef(Name)).second;
+}
diff --git a/wasm/SymbolTable.h b/wasm/SymbolTable.h
index fbb74ed14796..26242e6cddd6 100644
--- a/wasm/SymbolTable.h
+++ b/wasm/SymbolTable.h
@@ -11,13 +11,13 @@
#define LLD_WASM_SYMBOL_TABLE_H
#include "InputFiles.h"
+#include "LTO.h"
#include "Symbols.h"
-
#include "llvm/ADT/CachedHashString.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/raw_ostream.h"
-using llvm::object::WasmSymbol;
+using llvm::wasm::WasmGlobalType;
using llvm::wasm::WasmSignature;
namespace lld {
@@ -40,27 +40,52 @@ class InputSegment;
class SymbolTable {
public:
void addFile(InputFile *File);
+ void addCombinedLTOObject();
std::vector<ObjFile *> ObjectFiles;
+ std::vector<BitcodeFile *> BitcodeFiles;
+ std::vector<InputFunction *> SyntheticFunctions;
+ std::vector<InputGlobal *> SyntheticGlobals;
- void reportDuplicate(Symbol *Existing, InputFile *NewFile);
void reportRemainingUndefines();
ArrayRef<Symbol *> getSymbols() const { return SymVector; }
Symbol *find(StringRef Name);
- Symbol *addDefined(InputFile *F, const WasmSymbol *Sym,
- const InputSegment *Segment = nullptr);
- Symbol *addUndefined(InputFile *F, const WasmSymbol *Sym);
- Symbol *addUndefinedFunction(StringRef Name, const WasmSignature *Type);
- Symbol *addDefinedGlobal(StringRef Name);
+ Symbol *addDefinedFunction(StringRef Name, uint32_t Flags, InputFile *File,
+ InputFunction *Function);
+ Symbol *addDefinedData(StringRef Name, uint32_t Flags, InputFile *File,
+ InputSegment *Segment, uint32_t Address,
+ uint32_t Size);
+ Symbol *addDefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File,
+ InputGlobal *G);
+
+ Symbol *addUndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File,
+ const WasmSignature *Signature);
+ Symbol *addUndefinedData(StringRef Name, uint32_t Flags, InputFile *File);
+ Symbol *addUndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File,
+ const WasmGlobalType *Type);
+
void addLazy(ArchiveFile *F, const Archive::Symbol *Sym);
+ bool addComdat(StringRef Name);
+
+ DefinedData *addSyntheticDataSymbol(StringRef Name, uint32_t Flags);
+ DefinedGlobal *addSyntheticGlobal(StringRef Name, uint32_t Flags,
+ InputGlobal *Global);
+ DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags,
+ InputFunction *Function);
+
private:
std::pair<Symbol *, bool> insert(StringRef Name);
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap;
std::vector<Symbol *> SymVector;
+
+ llvm::DenseSet<llvm::CachedHashStringRef> Comdats;
+
+ // For LTO.
+ std::unique_ptr<BitcodeCompiler> LTO;
};
extern SymbolTable *Symtab;
diff --git a/wasm/Symbols.cpp b/wasm/Symbols.cpp
index 6bf5459c2663..a11081cbcf77 100644
--- a/wasm/Symbols.cpp
+++ b/wasm/Symbols.cpp
@@ -8,92 +8,224 @@
//===----------------------------------------------------------------------===//
#include "Symbols.h"
-
#include "Config.h"
+#include "InputChunks.h"
#include "InputFiles.h"
-#include "InputSegment.h"
+#include "InputGlobal.h"
+#include "OutputSegment.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
#define DEBUG_TYPE "lld"
using namespace llvm;
+using namespace llvm::wasm;
using namespace lld;
using namespace lld::wasm;
-uint32_t Symbol::getGlobalIndex() const {
- assert(!Sym->isFunction());
- return Sym->ElementIndex;
+DefinedFunction *WasmSym::CallCtors;
+DefinedData *WasmSym::DsoHandle;
+DefinedData *WasmSym::DataEnd;
+DefinedData *WasmSym::HeapBase;
+DefinedGlobal *WasmSym::StackPointer;
+
+WasmSymbolType Symbol::getWasmType() const {
+ if (isa<FunctionSymbol>(this))
+ return WASM_SYMBOL_TYPE_FUNCTION;
+ if (isa<DataSymbol>(this))
+ return WASM_SYMBOL_TYPE_DATA;
+ if (isa<GlobalSymbol>(this))
+ return WASM_SYMBOL_TYPE_GLOBAL;
+ if (isa<SectionSymbol>(this))
+ return WASM_SYMBOL_TYPE_SECTION;
+ llvm_unreachable("invalid symbol kind");
+}
+
+InputChunk *Symbol::getChunk() const {
+ if (auto *F = dyn_cast<DefinedFunction>(this))
+ return F->Function;
+ if (auto *D = dyn_cast<DefinedData>(this))
+ return D->Segment;
+ return nullptr;
+}
+
+bool Symbol::isLive() const {
+ if (auto *G = dyn_cast<DefinedGlobal>(this))
+ return G->Global->Live;
+ if (InputChunk *C = getChunk())
+ return C->Live;
+ return Referenced;
}
-uint32_t Symbol::getFunctionIndex() const {
- assert(Sym->isFunction());
- return Sym->ElementIndex;
+void Symbol::markLive() {
+ if (auto *G = dyn_cast<DefinedGlobal>(this))
+ G->Global->Live = true;
+ if (InputChunk *C = getChunk())
+ C->Live = true;
+ Referenced = true;
}
-const WasmSignature &Symbol::getFunctionType() const {
- assert(FunctionType != nullptr);
- return *FunctionType;
+uint32_t Symbol::getOutputSymbolIndex() const {
+ assert(OutputSymbolIndex != INVALID_INDEX);
+ return OutputSymbolIndex;
}
-uint32_t Symbol::getVirtualAddress() const {
- assert(isGlobal());
- DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n");
- if (isUndefined())
- return UINT32_MAX;
- if (VirtualAddress.hasValue())
- return VirtualAddress.getValue();
+void Symbol::setOutputSymbolIndex(uint32_t Index) {
+ LLVM_DEBUG(dbgs() << "setOutputSymbolIndex " << Name << " -> " << Index
+ << "\n");
+ assert(OutputSymbolIndex == INVALID_INDEX);
+ OutputSymbolIndex = Index;
+}
+
+bool Symbol::isWeak() const {
+ return (Flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK;
+}
- assert(Sym != nullptr);
- ObjFile *Obj = cast<ObjFile>(File);
- const WasmGlobal &Global =
- Obj->getWasmObj()->globals()[getGlobalIndex() - Obj->NumGlobalImports()];
- assert(Global.Type == llvm::wasm::WASM_TYPE_I32);
- assert(Segment);
- return Segment->translateVA(Global.InitExpr.Value.Int32);
+bool Symbol::isLocal() const {
+ return (Flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_LOCAL;
}
-uint32_t Symbol::getOutputIndex() const {
- if (isUndefined() && isWeak())
- return 0;
- return OutputIndex.getValue();
+bool Symbol::isHidden() const {
+ return (Flags & WASM_SYMBOL_VISIBILITY_MASK) == WASM_SYMBOL_VISIBILITY_HIDDEN;
}
-void Symbol::setVirtualAddress(uint32_t Value) {
- DEBUG(dbgs() << "setVirtualAddress " << Name << " -> " << Value << "\n");
- assert(!VirtualAddress.hasValue());
- VirtualAddress = Value;
+void Symbol::setHidden(bool IsHidden) {
+ LLVM_DEBUG(dbgs() << "setHidden: " << Name << " -> " << IsHidden << "\n");
+ Flags &= ~WASM_SYMBOL_VISIBILITY_MASK;
+ if (IsHidden)
+ Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN;
+ else
+ Flags |= WASM_SYMBOL_VISIBILITY_DEFAULT;
}
-void Symbol::setOutputIndex(uint32_t Index) {
- DEBUG(dbgs() << "setOutputIndex " << Name << " -> " << Index << "\n");
- assert(!OutputIndex.hasValue());
- OutputIndex = Index;
+bool Symbol::isExported() const {
+ if (!isDefined() || isLocal())
+ return false;
+
+ if (ForceExport || Config->ExportAll)
+ return true;
+
+ return !isHidden();
}
-void Symbol::setTableIndex(uint32_t Index) {
- DEBUG(dbgs() << "setTableIndex " << Name << " -> " << Index << "\n");
- assert(!TableIndex.hasValue());
+uint32_t FunctionSymbol::getFunctionIndex() const {
+ if (auto *F = dyn_cast<DefinedFunction>(this))
+ return F->Function->getFunctionIndex();
+ assert(FunctionIndex != INVALID_INDEX);
+ return FunctionIndex;
+}
+
+void FunctionSymbol::setFunctionIndex(uint32_t Index) {
+ LLVM_DEBUG(dbgs() << "setFunctionIndex " << Name << " -> " << Index << "\n");
+ assert(FunctionIndex == INVALID_INDEX);
+ FunctionIndex = Index;
+}
+
+bool FunctionSymbol::hasFunctionIndex() const {
+ if (auto *F = dyn_cast<DefinedFunction>(this))
+ return F->Function->hasFunctionIndex();
+ return FunctionIndex != INVALID_INDEX;
+}
+
+uint32_t FunctionSymbol::getTableIndex() const {
+ if (auto *F = dyn_cast<DefinedFunction>(this))
+ return F->Function->getTableIndex();
+ assert(TableIndex != INVALID_INDEX);
+ return TableIndex;
+}
+
+bool FunctionSymbol::hasTableIndex() const {
+ if (auto *F = dyn_cast<DefinedFunction>(this))
+ return F->Function->hasTableIndex();
+ return TableIndex != INVALID_INDEX;
+}
+
+void FunctionSymbol::setTableIndex(uint32_t Index) {
+ // For imports, we set the table index here on the Symbol; for defined
+ // functions we set the index on the InputFunction so that we don't export
+ // the same thing twice (keeps the table size down).
+ if (auto *F = dyn_cast<DefinedFunction>(this)) {
+ F->Function->setTableIndex(Index);
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "setTableIndex " << Name << " -> " << Index << "\n");
+ assert(TableIndex == INVALID_INDEX);
TableIndex = Index;
}
-void Symbol::update(Kind K, InputFile *F, const WasmSymbol *WasmSym,
- const InputSegment *Seg, const WasmSignature *Sig) {
- SymbolKind = K;
- File = F;
- Sym = WasmSym;
- Segment = Seg;
- FunctionType = Sig;
+DefinedFunction::DefinedFunction(StringRef Name, uint32_t Flags, InputFile *F,
+ InputFunction *Function)
+ : FunctionSymbol(Name, DefinedFunctionKind, Flags, F,
+ Function ? &Function->Signature : nullptr),
+ Function(Function) {}
+
+uint32_t DefinedData::getVirtualAddress() const {
+ LLVM_DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n");
+ if (Segment)
+ return Segment->OutputSeg->StartVA + Segment->OutputSegmentOffset + Offset;
+ return Offset;
+}
+
+void DefinedData::setVirtualAddress(uint32_t Value) {
+ LLVM_DEBUG(dbgs() << "setVirtualAddress " << Name << " -> " << Value << "\n");
+ assert(!Segment);
+ Offset = Value;
+}
+
+uint32_t DefinedData::getOutputSegmentOffset() const {
+ LLVM_DEBUG(dbgs() << "getOutputSegmentOffset: " << getName() << "\n");
+ return Segment->OutputSegmentOffset + Offset;
+}
+
+uint32_t DefinedData::getOutputSegmentIndex() const {
+ LLVM_DEBUG(dbgs() << "getOutputSegmentIndex: " << getName() << "\n");
+ return Segment->OutputSeg->Index;
+}
+
+uint32_t GlobalSymbol::getGlobalIndex() const {
+ if (auto *F = dyn_cast<DefinedGlobal>(this))
+ return F->Global->getGlobalIndex();
+ assert(GlobalIndex != INVALID_INDEX);
+ return GlobalIndex;
+}
+
+void GlobalSymbol::setGlobalIndex(uint32_t Index) {
+ LLVM_DEBUG(dbgs() << "setGlobalIndex " << Name << " -> " << Index << "\n");
+ assert(GlobalIndex == INVALID_INDEX);
+ GlobalIndex = Index;
+}
+
+bool GlobalSymbol::hasGlobalIndex() const {
+ if (auto *F = dyn_cast<DefinedGlobal>(this))
+ return F->Global->hasGlobalIndex();
+ return GlobalIndex != INVALID_INDEX;
}
-bool Symbol::isWeak() const { return Sym && Sym->isWeak(); }
+DefinedGlobal::DefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File,
+ InputGlobal *Global)
+ : GlobalSymbol(Name, DefinedGlobalKind, Flags, File,
+ Global ? &Global->getType() : nullptr),
+ Global(Global) {}
+
+uint32_t SectionSymbol::getOutputSectionIndex() const {
+ LLVM_DEBUG(dbgs() << "getOutputSectionIndex: " << getName() << "\n");
+ assert(OutputSectionIndex != INVALID_INDEX);
+ return OutputSectionIndex;
+}
+
+void SectionSymbol::setOutputSectionIndex(uint32_t Index) {
+ LLVM_DEBUG(dbgs() << "setOutputSectionIndex: " << getName() << " -> " << Index
+ << "\n");
+ assert(Index != INVALID_INDEX);
+ OutputSectionIndex = Index;
+}
-bool Symbol::isHidden() const { return Sym && Sym->isHidden(); }
+void LazySymbol::fetch() { cast<ArchiveFile>(File)->addMember(&ArchiveSymbol); }
std::string lld::toString(const wasm::Symbol &Sym) {
if (Config->Demangle)
if (Optional<std::string> S = demangleItanium(Sym.getName()))
- return "`" + *S + "'";
+ return *S;
return Sym.getName();
}
@@ -101,14 +233,20 @@ std::string lld::toString(wasm::Symbol::Kind Kind) {
switch (Kind) {
case wasm::Symbol::DefinedFunctionKind:
return "DefinedFunction";
+ case wasm::Symbol::DefinedDataKind:
+ return "DefinedData";
case wasm::Symbol::DefinedGlobalKind:
return "DefinedGlobal";
case wasm::Symbol::UndefinedFunctionKind:
return "UndefinedFunction";
+ case wasm::Symbol::UndefinedDataKind:
+ return "UndefinedData";
case wasm::Symbol::UndefinedGlobalKind:
return "UndefinedGlobal";
case wasm::Symbol::LazyKind:
return "LazyKind";
+ case wasm::Symbol::SectionKind:
+ return "SectionKind";
}
- llvm_unreachable("Invalid symbol kind!");
+ llvm_unreachable("invalid symbol kind");
}
diff --git a/wasm/Symbols.h b/wasm/Symbols.h
index 8194bcaca383..815cc97d22d1 100644
--- a/wasm/Symbols.h
+++ b/wasm/Symbols.h
@@ -10,53 +10,59 @@
#ifndef LLD_WASM_SYMBOLS_H
#define LLD_WASM_SYMBOLS_H
+#include "Config.h"
#include "lld/Common/LLVM.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Wasm.h"
using llvm::object::Archive;
using llvm::object::WasmSymbol;
-using llvm::wasm::WasmExport;
-using llvm::wasm::WasmImport;
+using llvm::wasm::WasmGlobal;
+using llvm::wasm::WasmGlobalType;
using llvm::wasm::WasmSignature;
+using llvm::wasm::WasmSymbolType;
namespace lld {
namespace wasm {
class InputFile;
+class InputChunk;
class InputSegment;
+class InputFunction;
+class InputGlobal;
+class InputSection;
+#define INVALID_INDEX UINT32_MAX
+
+// The base class for real symbol classes.
class Symbol {
public:
enum Kind {
DefinedFunctionKind,
+ DefinedDataKind,
DefinedGlobalKind,
-
- LazyKind,
+ SectionKind,
UndefinedFunctionKind,
+ UndefinedDataKind,
UndefinedGlobalKind,
-
- LastDefinedKind = DefinedGlobalKind,
- InvalidKind,
+ LazyKind,
};
- Symbol(StringRef Name, bool IsLocal)
- : WrittenToSymtab(0), WrittenToNameSec(0), IsLocal(IsLocal), Name(Name) {}
+ Kind kind() const { return SymbolKind; }
- Kind getKind() const { return SymbolKind; }
+ bool isDefined() const {
+ return SymbolKind == DefinedFunctionKind || SymbolKind == DefinedDataKind ||
+ SymbolKind == DefinedGlobalKind || SymbolKind == SectionKind;
+ }
- bool isLazy() const { return SymbolKind == LazyKind; }
- bool isDefined() const { return SymbolKind <= LastDefinedKind; }
bool isUndefined() const {
- return SymbolKind == UndefinedGlobalKind ||
- SymbolKind == UndefinedFunctionKind;
+ return SymbolKind == UndefinedFunctionKind ||
+ SymbolKind == UndefinedDataKind || SymbolKind == UndefinedGlobalKind;
}
- bool isFunction() const {
- return SymbolKind == DefinedFunctionKind ||
- SymbolKind == UndefinedFunctionKind;
- }
- bool isGlobal() const { return !isFunction(); }
- bool isLocal() const { return IsLocal; }
+
+ bool isLazy() const { return SymbolKind == LazyKind; }
+
+ bool isLocal() const;
bool isWeak() const;
bool isHidden() const;
@@ -66,57 +72,278 @@ public:
// Returns the file from which this symbol was created.
InputFile *getFile() const { return File; }
- uint32_t getGlobalIndex() const;
- uint32_t getFunctionIndex() const;
+ InputChunk *getChunk() const;
- bool hasFunctionType() const { return FunctionType != nullptr; }
- const WasmSignature &getFunctionType() const;
- uint32_t getOutputIndex() const;
- uint32_t getTableIndex() const { return TableIndex.getValue(); }
+ // Indicates that the section or import for this symbol will be included in
+ // the final image.
+ bool isLive() const;
- // Returns the virtual address of a defined global.
- // Only works for globals, not functions.
- uint32_t getVirtualAddress() const;
+ // Marks the symbol's InputChunk as Live, so that it will be included in the
+ // final image.
+ void markLive();
+
+ void setHidden(bool IsHidden);
- // Set the output index of the symbol (in the function or global index
- // space of the output object.
- void setOutputIndex(uint32_t Index);
+ // Get/set the index in the output symbol table. This is only used for
+ // relocatable output.
+ uint32_t getOutputSymbolIndex() const;
+ void setOutputSymbolIndex(uint32_t Index);
- // Returns true if a table index has been set for this symbol
- bool hasTableIndex() const { return TableIndex.hasValue(); }
+ WasmSymbolType getWasmType() const;
+ bool isExported() const;
- // Set the table index of the symbol
+ // True if this symbol was referenced by a regular (non-bitcode) object.
+ unsigned IsUsedInRegularObj : 1;
+ unsigned ForceExport : 1;
+
+protected:
+ Symbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F)
+ : IsUsedInRegularObj(false), ForceExport(false), Name(Name),
+ SymbolKind(K), Flags(Flags), File(F), Referenced(!Config->GcSections) {}
+
+ StringRef Name;
+ Kind SymbolKind;
+ uint32_t Flags;
+ InputFile *File;
+ uint32_t OutputSymbolIndex = INVALID_INDEX;
+ bool Referenced;
+};
+
+class FunctionSymbol : public Symbol {
+public:
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedFunctionKind ||
+ S->kind() == UndefinedFunctionKind;
+ }
+
+ // Get/set the table index
void setTableIndex(uint32_t Index);
+ uint32_t getTableIndex() const;
+ bool hasTableIndex() const;
+
+ // Get/set the function index
+ uint32_t getFunctionIndex() const;
+ void setFunctionIndex(uint32_t Index);
+ bool hasFunctionIndex() const;
+ const WasmSignature *FunctionType;
+
+protected:
+ FunctionSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F,
+ const WasmSignature *Type)
+ : Symbol(Name, K, Flags, F), FunctionType(Type) {}
+
+ uint32_t TableIndex = INVALID_INDEX;
+ uint32_t FunctionIndex = INVALID_INDEX;
+};
+
+class DefinedFunction : public FunctionSymbol {
+public:
+ DefinedFunction(StringRef Name, uint32_t Flags, InputFile *F,
+ InputFunction *Function);
+
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedFunctionKind;
+ }
+
+ InputFunction *Function;
+};
+
+class UndefinedFunction : public FunctionSymbol {
+public:
+ UndefinedFunction(StringRef Name, uint32_t Flags, InputFile *File = nullptr,
+ const WasmSignature *Type = nullptr)
+ : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type) {}
+
+ static bool classof(const Symbol *S) {
+ return S->kind() == UndefinedFunctionKind;
+ }
+};
+
+class SectionSymbol : public Symbol {
+public:
+ static bool classof(const Symbol *S) { return S->kind() == SectionKind; }
+
+ SectionSymbol(StringRef Name, uint32_t Flags, const InputSection *S,
+ InputFile *F = nullptr)
+ : Symbol(Name, SectionKind, Flags, F), Section(S) {}
+
+ const InputSection *Section;
+
+ uint32_t getOutputSectionIndex() const;
+ void setOutputSectionIndex(uint32_t Index);
+
+protected:
+ uint32_t OutputSectionIndex = INVALID_INDEX;
+};
+
+class DataSymbol : public Symbol {
+public:
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedDataKind || S->kind() == UndefinedDataKind;
+ }
+
+protected:
+ DataSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F)
+ : Symbol(Name, K, Flags, F) {}
+};
+
+class DefinedData : public DataSymbol {
+public:
+ // Constructor for regular data symbols originating from input files.
+ DefinedData(StringRef Name, uint32_t Flags, InputFile *F,
+ InputSegment *Segment, uint32_t Offset, uint32_t Size)
+ : DataSymbol(Name, DefinedDataKind, Flags, F), Segment(Segment),
+ Offset(Offset), Size(Size) {}
+
+ // Constructor for linker synthetic data symbols.
+ DefinedData(StringRef Name, uint32_t Flags)
+ : DataSymbol(Name, DefinedDataKind, Flags, nullptr) {}
+
+ static bool classof(const Symbol *S) { return S->kind() == DefinedDataKind; }
+
+ // Returns the output virtual address of a defined data symbol.
+ uint32_t getVirtualAddress() const;
void setVirtualAddress(uint32_t VA);
- void update(Kind K, InputFile *F = nullptr, const WasmSymbol *Sym = nullptr,
- const InputSegment *Segment = nullptr,
- const WasmSignature *Sig = nullptr);
+ // Returns the offset of a defined data symbol within its OutputSegment.
+ uint32_t getOutputSegmentOffset() const;
+ uint32_t getOutputSegmentIndex() const;
+ uint32_t getSize() const { return Size; }
+
+ InputSegment *Segment = nullptr;
+
+protected:
+ uint32_t Offset = 0;
+ uint32_t Size = 0;
+};
+
+class UndefinedData : public DataSymbol {
+public:
+ UndefinedData(StringRef Name, uint32_t Flags, InputFile *File = nullptr)
+ : DataSymbol(Name, UndefinedDataKind, Flags, File) {}
+ static bool classof(const Symbol *S) {
+ return S->kind() == UndefinedDataKind;
+ }
+};
- void setArchiveSymbol(const Archive::Symbol &Sym) { ArchiveSymbol = Sym; }
- const Archive::Symbol &getArchiveSymbol() { return ArchiveSymbol; }
+class GlobalSymbol : public Symbol {
+public:
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedGlobalKind || S->kind() == UndefinedGlobalKind;
+ }
- // This bit is used by Writer::writeNameSection() to prevent
- // symbols from being written to the symbol table more than once.
- unsigned WrittenToSymtab : 1;
- unsigned WrittenToNameSec : 1;
+ const WasmGlobalType *getGlobalType() const { return GlobalType; }
+
+ // Get/set the global index
+ uint32_t getGlobalIndex() const;
+ void setGlobalIndex(uint32_t Index);
+ bool hasGlobalIndex() const;
protected:
- unsigned IsLocal : 1;
+ GlobalSymbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F,
+ const WasmGlobalType *GlobalType)
+ : Symbol(Name, K, Flags, F), GlobalType(GlobalType) {}
- StringRef Name;
- Archive::Symbol ArchiveSymbol = {nullptr, 0, 0};
- Kind SymbolKind = InvalidKind;
- InputFile *File = nullptr;
- const WasmSymbol *Sym = nullptr;
- const InputSegment *Segment = nullptr;
- llvm::Optional<uint32_t> OutputIndex;
- llvm::Optional<uint32_t> TableIndex;
- llvm::Optional<uint32_t> VirtualAddress;
- const WasmSignature *FunctionType;
+ // Explicit function type, needed for undefined or synthetic functions only.
+ // For regular defined globals this information comes from the InputChunk.
+ const WasmGlobalType *GlobalType;
+ uint32_t GlobalIndex = INVALID_INDEX;
};
+class DefinedGlobal : public GlobalSymbol {
+public:
+ DefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File,
+ InputGlobal *Global);
+
+ static bool classof(const Symbol *S) {
+ return S->kind() == DefinedGlobalKind;
+ }
+
+ InputGlobal *Global;
+};
+
+class UndefinedGlobal : public GlobalSymbol {
+public:
+ UndefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File = nullptr,
+ const WasmGlobalType *Type = nullptr)
+ : GlobalSymbol(Name, UndefinedGlobalKind, Flags, File, Type) {}
+
+ static bool classof(const Symbol *S) {
+ return S->kind() == UndefinedGlobalKind;
+ }
+};
+
+class LazySymbol : public Symbol {
+public:
+ LazySymbol(StringRef Name, InputFile *File, const Archive::Symbol &Sym)
+ : Symbol(Name, LazyKind, 0, File), ArchiveSymbol(Sym) {}
+
+ static bool classof(const Symbol *S) { return S->kind() == LazyKind; }
+ void fetch();
+
+private:
+ Archive::Symbol ArchiveSymbol;
+};
+
+// linker-generated symbols
+struct WasmSym {
+ // __stack_pointer
+ // Global that holds the address of the top of the explicit value stack in
+ // linear memory.
+ static DefinedGlobal *StackPointer;
+
+ // __data_end
+ // Symbol marking the end of the data and bss.
+ static DefinedData *DataEnd;
+
+ // __heap_base
+ // Symbol marking the end of the data, bss and explicit stack. Any linear
+ // memory following this address is not used by the linked code and can
+ // therefore be used as a backing store for brk()/malloc() implementations.
+ static DefinedData *HeapBase;
+
+ // __wasm_call_ctors
+ // Function that directly calls all ctors in priority order.
+ static DefinedFunction *CallCtors;
+
+ // __dso_handle
+ // Symbol used in calls to __cxa_atexit to determine current DLL
+ static DefinedData *DsoHandle;
+};
+
+// A buffer class that is large enough to hold any Symbol-derived
+// object. We allocate memory using this class and instantiate a symbol
+// using the placement new.
+union SymbolUnion {
+ alignas(DefinedFunction) char A[sizeof(DefinedFunction)];
+ alignas(DefinedData) char B[sizeof(DefinedData)];
+ alignas(DefinedGlobal) char C[sizeof(DefinedGlobal)];
+ alignas(LazySymbol) char D[sizeof(LazySymbol)];
+ alignas(UndefinedFunction) char E[sizeof(UndefinedFunction)];
+ alignas(UndefinedData) char F[sizeof(UndefinedData)];
+ alignas(UndefinedGlobal) char G[sizeof(UndefinedGlobal)];
+ alignas(SectionSymbol) char I[sizeof(SectionSymbol)];
+};
+
+template <typename T, typename... ArgT>
+T *replaceSymbol(Symbol *S, ArgT &&... Arg) {
+ static_assert(std::is_trivially_destructible<T>(),
+ "Symbol types must be trivially destructible");
+ static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small");
+ static_assert(alignof(T) <= alignof(SymbolUnion),
+ "SymbolUnion not aligned enough");
+ assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
+ "Not a Symbol");
+
+ Symbol SymCopy = *S;
+
+ T *S2 = new (S) T(std::forward<ArgT>(Arg)...);
+ S2->IsUsedInRegularObj = SymCopy.IsUsedInRegularObj;
+ S2->ForceExport = SymCopy.ForceExport;
+ return S2;
+}
+
} // namespace wasm
// Returns a symbol name for an error message.
diff --git a/wasm/Writer.cpp b/wasm/Writer.cpp
index e7dd49d52213..37ad32452a91 100644
--- a/wasm/Writer.cpp
+++ b/wasm/Writer.cpp
@@ -8,21 +8,28 @@
//===----------------------------------------------------------------------===//
#include "Writer.h"
-
#include "Config.h"
+#include "InputChunks.h"
+#include "InputGlobal.h"
#include "OutputSections.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "WriterUtils.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
+#include "lld/Common/Strings.h"
#include "lld/Common/Threads.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/Object/WasmTraits.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#include <cstdarg>
+#include <map>
#define DEBUG_TYPE "lld"
@@ -32,31 +39,16 @@ using namespace lld;
using namespace lld::wasm;
static constexpr int kStackAlignment = 16;
+static constexpr int kInitialTableOffset = 1;
+static constexpr const char *kFunctionTableName = "__indirect_function_table";
namespace {
-// Traits for using WasmSignature in a DenseMap.
-struct WasmSignatureDenseMapInfo {
- static WasmSignature getEmptyKey() {
- WasmSignature Sig;
- Sig.ReturnType = 1;
- return Sig;
- }
- static WasmSignature getTombstoneKey() {
- WasmSignature Sig;
- Sig.ReturnType = 2;
- return Sig;
- }
- static unsigned getHashValue(const WasmSignature &Sig) {
- uintptr_t Value = 0;
- Value += DenseMapInfo<int32_t>::getHashValue(Sig.ReturnType);
- for (int32_t Param : Sig.ParamTypes)
- Value += DenseMapInfo<int32_t>::getHashValue(Param);
- return Value;
- }
- static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) {
- return LHS == RHS;
- }
+// An init entry to be written to either the synthetic init func or the
+// linking metadata.
+struct WasmInitEntry {
+ const FunctionSymbol *Sym;
+ uint32_t Priority;
};
// The writer writes a SymbolTable result to a file.
@@ -67,17 +59,22 @@ public:
private:
void openFile();
- uint32_t getTypeIndex(const WasmSignature &Sig);
- void assignSymbolIndexes();
+ uint32_t lookupType(const WasmSignature &Sig);
+ uint32_t registerType(const WasmSignature &Sig);
+
+ void createCtorFunction();
+ void calculateInitFunctions();
+ void assignIndexes();
void calculateImports();
- void calculateOffsets();
+ void calculateExports();
+ void calculateCustomSections();
+ void assignSymtab();
void calculateTypes();
void createOutputSegments();
void layoutMemory();
void createHeader();
void createSections();
- SyntheticSection *createSyntheticSection(uint32_t Type,
- std::string Name = "");
+ SyntheticSection *createSyntheticSection(uint32_t Type, StringRef Name = "");
// Builtin sections
void createTypeSection();
@@ -88,9 +85,9 @@ private:
void createImportSection();
void createMemorySection();
void createElemSection();
- void createStartSection();
void createCodeSection();
void createDataSection();
+ void createCustomSections();
// Custom sections
void createRelocSections();
@@ -101,17 +98,24 @@ private:
void writeSections();
uint64_t FileSize = 0;
- uint32_t DataSize = 0;
- uint32_t NumFunctions = 0;
uint32_t NumMemoryPages = 0;
- uint32_t InitialTableOffset = 0;
+ uint32_t MaxMemoryPages = 0;
std::vector<const WasmSignature *> Types;
- DenseMap<WasmSignature, int32_t, WasmSignatureDenseMapInfo> TypeIndices;
- std::vector<const Symbol *> FunctionImports;
- std::vector<const Symbol *> GlobalImports;
- std::vector<const Symbol *> DefinedGlobals;
- std::vector<const Symbol *> IndirectFunctions;
+ DenseMap<WasmSignature, int32_t> TypeIndices;
+ std::vector<const Symbol *> ImportedSymbols;
+ unsigned NumImportedFunctions = 0;
+ unsigned NumImportedGlobals = 0;
+ std::vector<WasmExport> Exports;
+ std::vector<const DefinedData *> DefinedFakeGlobals;
+ std::vector<InputGlobal *> InputGlobals;
+ std::vector<InputFunction *> InputFunctions;
+ std::vector<const FunctionSymbol *> IndirectFunctions;
+ std::vector<const Symbol *> SymtabEntries;
+ std::vector<WasmInitEntry> InitFunctions;
+
+ llvm::StringMap<std::vector<InputSection *>> CustomSectionMapping;
+ llvm::StringMap<SectionSymbol *> CustomSectionSymbols;
// Elements that are used to construct the final output
std::string Header;
@@ -125,20 +129,12 @@ private:
} // anonymous namespace
-static void debugPrint(const char *fmt, ...) {
- if (!errorHandler().Verbose)
- return;
- fprintf(stderr, "lld: ");
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
-}
-
void Writer::createImportSection() {
- uint32_t NumImports = FunctionImports.size() + GlobalImports.size();
+ uint32_t NumImports = ImportedSymbols.size();
if (Config->ImportMemory)
++NumImports;
+ if (Config->ImportTable)
+ ++NumImports;
if (NumImports == 0)
return;
@@ -148,16 +144,6 @@ void Writer::createImportSection() {
writeUleb128(OS, NumImports, "import count");
- for (const Symbol *Sym : FunctionImports) {
- WasmImport Import;
- Import.Module = "env";
- Import.Field = Sym->getName();
- Import.Kind = WASM_EXTERNAL_FUNCTION;
- assert(TypeIndices.count(Sym->getFunctionType()) > 0);
- Import.SigIndex = TypeIndices.lookup(Sym->getFunctionType());
- writeImport(OS, Import);
- }
-
if (Config->ImportMemory) {
WasmImport Import;
Import.Module = "env";
@@ -165,16 +151,36 @@ void Writer::createImportSection() {
Import.Kind = WASM_EXTERNAL_MEMORY;
Import.Memory.Flags = 0;
Import.Memory.Initial = NumMemoryPages;
+ if (MaxMemoryPages != 0) {
+ Import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
+ Import.Memory.Maximum = MaxMemoryPages;
+ }
writeImport(OS, Import);
}
- for (const Symbol *Sym : GlobalImports) {
+ if (Config->ImportTable) {
+ uint32_t TableSize = kInitialTableOffset + IndirectFunctions.size();
+ WasmImport Import;
+ Import.Module = "env";
+ Import.Field = kFunctionTableName;
+ Import.Kind = WASM_EXTERNAL_TABLE;
+ Import.Table.ElemType = WASM_TYPE_ANYFUNC;
+ Import.Table.Limits = {WASM_LIMITS_FLAG_HAS_MAX, TableSize, TableSize};
+ writeImport(OS, Import);
+ }
+
+ for (const Symbol *Sym : ImportedSymbols) {
WasmImport Import;
Import.Module = "env";
Import.Field = Sym->getName();
- Import.Kind = WASM_EXTERNAL_GLOBAL;
- Import.Global.Mutable = false;
- Import.Global.Type = WASM_TYPE_I32;
+ if (auto *FunctionSym = dyn_cast<FunctionSymbol>(Sym)) {
+ Import.Kind = WASM_EXTERNAL_FUNCTION;
+ Import.SigIndex = lookupType(*FunctionSym->FunctionType);
+ } else {
+ auto *GlobalSym = cast<GlobalSymbol>(Sym);
+ Import.Kind = WASM_EXTERNAL_GLOBAL;
+ Import.Global = *GlobalSym->getGlobalType();
+ }
writeImport(OS, Import);
}
}
@@ -188,16 +194,15 @@ void Writer::createTypeSection() {
}
void Writer::createFunctionSection() {
- if (!NumFunctions)
+ if (InputFunctions.empty())
return;
SyntheticSection *Section = createSyntheticSection(WASM_SEC_FUNCTION);
raw_ostream &OS = Section->getStream();
- writeUleb128(OS, NumFunctions, "function count");
- for (ObjFile *File : Symtab->ObjectFiles)
- for (uint32_t Sig : File->getWasmObj()->functionTypes())
- writeUleb128(OS, File->relocateTypeIndex(Sig), "sig index");
+ writeUleb128(OS, InputFunctions.size(), "function count");
+ for (const InputFunction *Func : InputFunctions)
+ writeUleb128(OS, lookupType(Func->Signature), "sig index");
}
void Writer::createMemorySection() {
@@ -207,23 +212,29 @@ void Writer::createMemorySection() {
SyntheticSection *Section = createSyntheticSection(WASM_SEC_MEMORY);
raw_ostream &OS = Section->getStream();
+ bool HasMax = MaxMemoryPages != 0;
writeUleb128(OS, 1, "memory count");
- writeUleb128(OS, 0, "memory limits flags");
+ writeUleb128(OS, HasMax ? static_cast<unsigned>(WASM_LIMITS_FLAG_HAS_MAX) : 0,
+ "memory limits flags");
writeUleb128(OS, NumMemoryPages, "initial pages");
+ if (HasMax)
+ writeUleb128(OS, MaxMemoryPages, "max pages");
}
void Writer::createGlobalSection() {
- if (DefinedGlobals.empty())
+ unsigned NumGlobals = InputGlobals.size() + DefinedFakeGlobals.size();
+ if (NumGlobals == 0)
return;
SyntheticSection *Section = createSyntheticSection(WASM_SEC_GLOBAL);
raw_ostream &OS = Section->getStream();
- writeUleb128(OS, DefinedGlobals.size(), "global count");
- for (const Symbol *Sym : DefinedGlobals) {
+ writeUleb128(OS, NumGlobals, "global count");
+ for (const InputGlobal *G : InputGlobals)
+ writeGlobal(OS, G->Global);
+ for (const DefinedData *Sym : DefinedFakeGlobals) {
WasmGlobal Global;
- Global.Type = WASM_TYPE_I32;
- Global.Mutable = Sym == Config->StackPointerSymbol;
+ Global.Type = {WASM_TYPE_I32, false};
Global.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
Global.InitExpr.Value.Int32 = Sym->getVirtualAddress();
writeGlobal(OS, Global);
@@ -231,88 +242,73 @@ void Writer::createGlobalSection() {
}
void Writer::createTableSection() {
- // Always output a table section, even if there are no indirect calls.
- // There are two reasons for this:
+ if (Config->ImportTable)
+ return;
+
+ // Always output a table section (or table import), even if there are no
+ // indirect calls. There are two reasons for this:
// 1. For executables it is useful to have an empty table slot at 0
// which can be filled with a null function call handler.
// 2. If we don't do this, any program that contains a call_indirect but
// no address-taken function will fail at validation time since it is
// a validation error to include a call_indirect instruction if there
// is not table.
- uint32_t TableSize = InitialTableOffset + IndirectFunctions.size();
+ uint32_t TableSize = kInitialTableOffset + IndirectFunctions.size();
SyntheticSection *Section = createSyntheticSection(WASM_SEC_TABLE);
raw_ostream &OS = Section->getStream();
writeUleb128(OS, 1, "table count");
- writeSleb128(OS, WASM_TYPE_ANYFUNC, "table type");
- writeUleb128(OS, WASM_LIMITS_FLAG_HAS_MAX, "table flags");
- writeUleb128(OS, TableSize, "table initial size");
- writeUleb128(OS, TableSize, "table max size");
+ WasmLimits Limits = {WASM_LIMITS_FLAG_HAS_MAX, TableSize, TableSize};
+ writeTableType(OS, WasmTable{WASM_TYPE_ANYFUNC, Limits});
}
void Writer::createExportSection() {
- bool ExportMemory = !Config->Relocatable && !Config->ImportMemory;
- Symbol *EntrySym = Symtab->find(Config->Entry);
- bool ExportEntry = !Config->Relocatable && EntrySym && EntrySym->isDefined();
- bool ExportHidden = Config->EmitRelocs;
-
- uint32_t NumExports = ExportMemory ? 1 : 0;
-
- std::vector<const Symbol *> SymbolExports;
- if (ExportEntry)
- SymbolExports.emplace_back(EntrySym);
-
- for (const Symbol *Sym : Symtab->getSymbols()) {
- if (Sym->isUndefined() || Sym->isGlobal())
- continue;
- if (Sym->isHidden() && !ExportHidden)
- continue;
- if (ExportEntry && Sym == EntrySym)
- continue;
- SymbolExports.emplace_back(Sym);
- }
-
- for (const Symbol *Sym : DefinedGlobals) {
- // Can't export the SP right now because it mutable and mutable globals
- // connot be exported.
- if (Sym == Config->StackPointerSymbol)
- continue;
- SymbolExports.emplace_back(Sym);
- }
-
- NumExports += SymbolExports.size();
- if (!NumExports)
+ if (!Exports.size())
return;
SyntheticSection *Section = createSyntheticSection(WASM_SEC_EXPORT);
raw_ostream &OS = Section->getStream();
- writeUleb128(OS, NumExports, "export count");
+ writeUleb128(OS, Exports.size(), "export count");
+ for (const WasmExport &Export : Exports)
+ writeExport(OS, Export);
+}
- if (ExportMemory) {
- WasmExport MemoryExport;
- MemoryExport.Name = "memory";
- MemoryExport.Kind = WASM_EXTERNAL_MEMORY;
- MemoryExport.Index = 0;
- writeExport(OS, MemoryExport);
+void Writer::calculateCustomSections() {
+ log("calculateCustomSections");
+ bool StripDebug = Config->StripDebug || Config->StripAll;
+ for (ObjFile *File : Symtab->ObjectFiles) {
+ for (InputSection *Section : File->CustomSections) {
+ StringRef Name = Section->getName();
+ // These custom sections are known the linker and synthesized rather than
+ // blindly copied
+ if (Name == "linking" || Name == "name" || Name.startswith("reloc."))
+ continue;
+ // .. or it is a debug section
+ if (StripDebug && Name.startswith(".debug_"))
+ continue;
+ CustomSectionMapping[Name].push_back(Section);
+ }
}
+}
- for (const Symbol *Sym : SymbolExports) {
- log("Export: " + Sym->getName());
- WasmExport Export;
- Export.Name = Sym->getName();
- Export.Index = Sym->getOutputIndex();
- if (Sym->isFunction())
- Export.Kind = WASM_EXTERNAL_FUNCTION;
- else
- Export.Kind = WASM_EXTERNAL_GLOBAL;
- writeExport(OS, Export);
+void Writer::createCustomSections() {
+ log("createCustomSections");
+ for (auto &Pair : CustomSectionMapping) {
+ StringRef Name = Pair.first();
+
+ auto P = CustomSectionSymbols.find(Name);
+ if (P != CustomSectionSymbols.end()) {
+ uint32_t SectionIndex = OutputSections.size();
+ P->second->setOutputSectionIndex(SectionIndex);
+ }
+
+ LLVM_DEBUG(dbgs() << "createCustomSection: " << Name << "\n");
+ OutputSections.push_back(make<CustomSection>(Name, Pair.second));
}
}
-void Writer::createStartSection() {}
-
void Writer::createElemSection() {
if (IndirectFunctions.empty())
return;
@@ -324,25 +320,25 @@ void Writer::createElemSection() {
writeUleb128(OS, 0, "table index");
WasmInitExpr InitExpr;
InitExpr.Opcode = WASM_OPCODE_I32_CONST;
- InitExpr.Value.Int32 = InitialTableOffset;
+ InitExpr.Value.Int32 = kInitialTableOffset;
writeInitExpr(OS, InitExpr);
writeUleb128(OS, IndirectFunctions.size(), "elem count");
- uint32_t TableIndex = InitialTableOffset;
- for (const Symbol *Sym : IndirectFunctions) {
+ uint32_t TableIndex = kInitialTableOffset;
+ for (const FunctionSymbol *Sym : IndirectFunctions) {
assert(Sym->getTableIndex() == TableIndex);
- writeUleb128(OS, Sym->getOutputIndex(), "function index");
+ writeUleb128(OS, Sym->getFunctionIndex(), "function index");
++TableIndex;
}
}
void Writer::createCodeSection() {
- if (!NumFunctions)
+ if (InputFunctions.empty())
return;
log("createCodeSection");
- auto Section = make<CodeSection>(NumFunctions, Symtab->ObjectFiles);
+ auto Section = make<CodeSection>(InputFunctions);
OutputSections.push_back(Section);
}
@@ -361,28 +357,68 @@ void Writer::createRelocSections() {
log("createRelocSections");
// Don't use iterator here since we are adding to OutputSection
size_t OrigSize = OutputSections.size();
- for (size_t i = 0; i < OrigSize; i++) {
- OutputSection *S = OutputSections[i];
- const char *name;
- uint32_t Count = S->numRelocations();
+ for (size_t I = 0; I < OrigSize; I++) {
+ OutputSection *OSec = OutputSections[I];
+ uint32_t Count = OSec->numRelocations();
if (!Count)
continue;
- if (S->Type == WASM_SEC_DATA)
- name = "reloc.DATA";
- else if (S->Type == WASM_SEC_CODE)
- name = "reloc.CODE";
+ StringRef Name;
+ if (OSec->Type == WASM_SEC_DATA)
+ Name = "reloc.DATA";
+ else if (OSec->Type == WASM_SEC_CODE)
+ Name = "reloc.CODE";
+ else if (OSec->Type == WASM_SEC_CUSTOM)
+ Name = Saver.save("reloc." + OSec->Name);
else
- llvm_unreachable("relocations only supported for code and data");
+ llvm_unreachable(
+ "relocations only supported for code, data, or custom sections");
- SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, name);
+ SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, Name);
raw_ostream &OS = Section->getStream();
- writeUleb128(OS, S->Type, "reloc section");
+ writeUleb128(OS, I, "reloc section");
writeUleb128(OS, Count, "reloc count");
- S->writeRelocations(OS);
+ OSec->writeRelocations(OS);
}
}
+static uint32_t getWasmFlags(const Symbol *Sym) {
+ uint32_t Flags = 0;
+ if (Sym->isLocal())
+ Flags |= WASM_SYMBOL_BINDING_LOCAL;
+ if (Sym->isWeak())
+ Flags |= WASM_SYMBOL_BINDING_WEAK;
+ if (Sym->isHidden())
+ Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN;
+ if (Sym->isUndefined())
+ Flags |= WASM_SYMBOL_UNDEFINED;
+ return Flags;
+}
+
+// Some synthetic sections (e.g. "name" and "linking") have subsections.
+// Just like the synthetic sections themselves these need to be created before
+// they can be written out (since they are preceded by their length). This
+// class is used to create subsections and then write them into the stream
+// of the parent section.
+class SubSection {
+public:
+ explicit SubSection(uint32_t Type) : Type(Type) {}
+
+ void writeTo(raw_ostream &To) {
+ OS.flush();
+ writeUleb128(To, Type, "subsection type");
+ writeUleb128(To, Body.size(), "subsection size");
+ To.write(Body.data(), Body.size());
+ }
+
+private:
+ uint32_t Type;
+ std::string Body;
+
+public:
+ raw_string_ostream OS{Body};
+};
+
// Create the custom "linking" section containing linker metadata.
// This is only created when relocatable output is requested.
void Writer::createLinkingSection() {
@@ -390,82 +426,145 @@ void Writer::createLinkingSection() {
createSyntheticSection(WASM_SEC_CUSTOM, "linking");
raw_ostream &OS = Section->getStream();
- SubSection DataSizeSubSection(WASM_DATA_SIZE);
- writeUleb128(DataSizeSubSection.getStream(), DataSize, "data size");
- DataSizeSubSection.finalizeContents();
- DataSizeSubSection.writeToStream(OS);
+ writeUleb128(OS, WasmMetadataVersion, "Version");
+
+ if (!SymtabEntries.empty()) {
+ SubSection Sub(WASM_SYMBOL_TABLE);
+ writeUleb128(Sub.OS, SymtabEntries.size(), "num symbols");
+
+ for (const Symbol *Sym : SymtabEntries) {
+ assert(Sym->isDefined() || Sym->isUndefined());
+ WasmSymbolType Kind = Sym->getWasmType();
+ uint32_t Flags = getWasmFlags(Sym);
+
+ writeU8(Sub.OS, Kind, "sym kind");
+ writeUleb128(Sub.OS, Flags, "sym flags");
+
+ if (auto *F = dyn_cast<FunctionSymbol>(Sym)) {
+ writeUleb128(Sub.OS, F->getFunctionIndex(), "index");
+ if (Sym->isDefined())
+ writeStr(Sub.OS, Sym->getName(), "sym name");
+ } else if (auto *G = dyn_cast<GlobalSymbol>(Sym)) {
+ writeUleb128(Sub.OS, G->getGlobalIndex(), "index");
+ if (Sym->isDefined())
+ writeStr(Sub.OS, Sym->getName(), "sym name");
+ } else if (isa<DataSymbol>(Sym)) {
+ writeStr(Sub.OS, Sym->getName(), "sym name");
+ if (auto *DataSym = dyn_cast<DefinedData>(Sym)) {
+ writeUleb128(Sub.OS, DataSym->getOutputSegmentIndex(), "index");
+ writeUleb128(Sub.OS, DataSym->getOutputSegmentOffset(),
+ "data offset");
+ writeUleb128(Sub.OS, DataSym->getSize(), "data size");
+ }
+ } else {
+ auto *S = cast<SectionSymbol>(Sym);
+ writeUleb128(Sub.OS, S->getOutputSectionIndex(), "sym section index");
+ }
+ }
- if (!Config->Relocatable)
- return;
+ Sub.writeTo(OS);
+ }
if (Segments.size()) {
- SubSection SubSection(WASM_SEGMENT_INFO);
- writeUleb128(SubSection.getStream(), Segments.size(), "num data segments");
+ SubSection Sub(WASM_SEGMENT_INFO);
+ writeUleb128(Sub.OS, Segments.size(), "num data segments");
for (const OutputSegment *S : Segments) {
- writeStr(SubSection.getStream(), S->Name, "segment name");
- writeUleb128(SubSection.getStream(), S->Alignment, "alignment");
- writeUleb128(SubSection.getStream(), 0, "flags");
+ writeStr(Sub.OS, S->Name, "segment name");
+ writeUleb128(Sub.OS, S->Alignment, "alignment");
+ writeUleb128(Sub.OS, 0, "flags");
}
- SubSection.finalizeContents();
- SubSection.writeToStream(OS);
+ Sub.writeTo(OS);
}
- std::vector<WasmInitFunc> InitFunctions;
- for (ObjFile *File : Symtab->ObjectFiles) {
- const WasmLinkingData &L = File->getWasmObj()->linkingData();
- InitFunctions.reserve(InitFunctions.size() + L.InitFunctions.size());
- for (const WasmInitFunc &F : L.InitFunctions)
- InitFunctions.emplace_back(WasmInitFunc{
- F.Priority, File->relocateFunctionIndex(F.FunctionIndex)});
+ if (!InitFunctions.empty()) {
+ SubSection Sub(WASM_INIT_FUNCS);
+ writeUleb128(Sub.OS, InitFunctions.size(), "num init functions");
+ for (const WasmInitEntry &F : InitFunctions) {
+ writeUleb128(Sub.OS, F.Priority, "priority");
+ writeUleb128(Sub.OS, F.Sym->getOutputSymbolIndex(), "function index");
+ }
+ Sub.writeTo(OS);
}
- if (!InitFunctions.empty()) {
- SubSection SubSection(WASM_INIT_FUNCS);
- writeUleb128(SubSection.getStream(), InitFunctions.size(),
- "num init functionsw");
- for (const WasmInitFunc &F : InitFunctions) {
- writeUleb128(SubSection.getStream(), F.Priority, "priority");
- writeUleb128(SubSection.getStream(), F.FunctionIndex, "function index");
+ struct ComdatEntry {
+ unsigned Kind;
+ uint32_t Index;
+ };
+ std::map<StringRef, std::vector<ComdatEntry>> Comdats;
+
+ for (const InputFunction *F : InputFunctions) {
+ StringRef Comdat = F->getComdatName();
+ if (!Comdat.empty())
+ Comdats[Comdat].emplace_back(
+ ComdatEntry{WASM_COMDAT_FUNCTION, F->getFunctionIndex()});
+ }
+ for (uint32_t I = 0; I < Segments.size(); ++I) {
+ const auto &InputSegments = Segments[I]->InputSegments;
+ if (InputSegments.empty())
+ continue;
+ StringRef Comdat = InputSegments[0]->getComdatName();
+#ifndef NDEBUG
+ for (const InputSegment *IS : InputSegments)
+ assert(IS->getComdatName() == Comdat);
+#endif
+ if (!Comdat.empty())
+ Comdats[Comdat].emplace_back(ComdatEntry{WASM_COMDAT_DATA, I});
+ }
+
+ if (!Comdats.empty()) {
+ SubSection Sub(WASM_COMDAT_INFO);
+ writeUleb128(Sub.OS, Comdats.size(), "num comdats");
+ for (const auto &C : Comdats) {
+ writeStr(Sub.OS, C.first, "comdat name");
+ writeUleb128(Sub.OS, 0, "comdat flags"); // flags for future use
+ writeUleb128(Sub.OS, C.second.size(), "num entries");
+ for (const ComdatEntry &Entry : C.second) {
+ writeU8(Sub.OS, Entry.Kind, "entry kind");
+ writeUleb128(Sub.OS, Entry.Index, "entry index");
+ }
}
- SubSection.finalizeContents();
- SubSection.writeToStream(OS);
+ Sub.writeTo(OS);
}
}
// Create the custom "name" section containing debug symbol names.
void Writer::createNameSection() {
- // Create an array of all function sorted by function index space
- std::vector<const Symbol *> Names;
+ unsigned NumNames = NumImportedFunctions;
+ for (const InputFunction *F : InputFunctions)
+ if (!F->getName().empty() || !F->getDebugName().empty())
+ ++NumNames;
- for (ObjFile *File : Symtab->ObjectFiles) {
- Names.reserve(Names.size() + File->getSymbols().size());
- for (Symbol *S : File->getSymbols()) {
- if (!S->isFunction() || S->isWeak() || S->WrittenToNameSec)
- continue;
- S->WrittenToNameSec = true;
- Names.emplace_back(S);
- }
- }
+ if (NumNames == 0)
+ return;
SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, "name");
- std::sort(Names.begin(), Names.end(), [](const Symbol *A, const Symbol *B) {
- return A->getOutputIndex() < B->getOutputIndex();
- });
-
- SubSection FunctionSubsection(WASM_NAMES_FUNCTION);
- raw_ostream &OS = FunctionSubsection.getStream();
- writeUleb128(OS, Names.size(), "name count");
-
- // We have to iterate through the inputs twice so that all the imports
- // appear first before any of the local function names.
- for (const Symbol *S : Names) {
- writeUleb128(OS, S->getOutputIndex(), "func index");
- writeStr(OS, S->getName(), "symbol name");
+ SubSection Sub(WASM_NAMES_FUNCTION);
+ writeUleb128(Sub.OS, NumNames, "name count");
+
+ // Names must appear in function index order. As it happens ImportedSymbols
+ // and InputFunctions are numbered in order with imported functions coming
+ // first.
+ for (const Symbol *S : ImportedSymbols) {
+ if (auto *F = dyn_cast<FunctionSymbol>(S)) {
+ writeUleb128(Sub.OS, F->getFunctionIndex(), "func index");
+ Optional<std::string> Name = demangleItanium(F->getName());
+ writeStr(Sub.OS, Name ? StringRef(*Name) : F->getName(), "symbol name");
+ }
+ }
+ for (const InputFunction *F : InputFunctions) {
+ if (!F->getName().empty()) {
+ writeUleb128(Sub.OS, F->getFunctionIndex(), "func index");
+ if (!F->getDebugName().empty()) {
+ writeStr(Sub.OS, F->getDebugName(), "symbol name");
+ } else {
+ Optional<std::string> Name = demangleItanium(F->getName());
+ writeStr(Sub.OS, Name ? StringRef(*Name) : F->getName(), "symbol name");
+ }
+ }
}
- FunctionSubsection.finalizeContents();
- FunctionSubsection.writeToStream(Section->getStream());
+ Sub.writeTo(Section->getStream());
}
void Writer::writeHeader() {
@@ -479,48 +578,98 @@ void Writer::writeSections() {
// Fix the memory layout of the output binary. This assigns memory offsets
// to each of the input data sections as well as the explicit stack region.
+// The default memory layout is as follows, from low to high.
+//
+// - initialized data (starting at Config->GlobalBase)
+// - BSS data (not currently implemented in llvm)
+// - explicit stack (Config->ZStackSize)
+// - heap start / unallocated
+//
+// The --stack-first option means that stack is placed before any static data.
+// This can be useful since it means that stack overflow traps immediately rather
+// than overwriting global data, but also increases code size since all static
+// data loads and stores requires larger offsets.
void Writer::layoutMemory() {
+ createOutputSegments();
+
uint32_t MemoryPtr = 0;
- if (!Config->Relocatable) {
+
+ auto PlaceStack = [&]() {
+ if (Config->Relocatable)
+ return;
+ MemoryPtr = alignTo(MemoryPtr, kStackAlignment);
+ if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment))
+ error("stack size must be " + Twine(kStackAlignment) + "-byte aligned");
+ log("mem: stack size = " + Twine(Config->ZStackSize));
+ log("mem: stack base = " + Twine(MemoryPtr));
+ MemoryPtr += Config->ZStackSize;
+ WasmSym::StackPointer->Global->Global.InitExpr.Value.Int32 = MemoryPtr;
+ log("mem: stack top = " + Twine(MemoryPtr));
+ };
+
+ if (Config->StackFirst) {
+ PlaceStack();
+ } else {
MemoryPtr = Config->GlobalBase;
- debugPrint("mem: global base = %d\n", Config->GlobalBase);
+ log("mem: global base = " + Twine(Config->GlobalBase));
}
- createOutputSegments();
+ uint32_t DataStart = MemoryPtr;
+
+ // Arbitrarily set __dso_handle handle to point to the start of the data
+ // segments.
+ if (WasmSym::DsoHandle)
+ WasmSym::DsoHandle->setVirtualAddress(DataStart);
- // Static data comes first
for (OutputSegment *Seg : Segments) {
MemoryPtr = alignTo(MemoryPtr, Seg->Alignment);
Seg->StartVA = MemoryPtr;
- debugPrint("mem: %-10s offset=%-8d size=%-4d align=%d\n",
- Seg->Name.str().c_str(), MemoryPtr, Seg->Size, Seg->Alignment);
+ log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", Seg->Name,
+ MemoryPtr, Seg->Size, Seg->Alignment));
MemoryPtr += Seg->Size;
}
- DataSize = MemoryPtr;
- if (!Config->Relocatable)
- DataSize -= Config->GlobalBase;
- debugPrint("mem: static data = %d\n", DataSize);
+ // TODO: Add .bss space here.
+ if (WasmSym::DataEnd)
+ WasmSym::DataEnd->setVirtualAddress(MemoryPtr);
+
+ log("mem: static data = " + Twine(MemoryPtr - DataStart));
- // Stack comes after static data
+ if (!Config->StackFirst)
+ PlaceStack();
+
+ // Set `__heap_base` to directly follow the end of the stack or global data.
+ // The fact that this comes last means that a malloc/brk implementation
+ // can grow the heap at runtime.
if (!Config->Relocatable) {
- MemoryPtr = alignTo(MemoryPtr, kStackAlignment);
- if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment))
- error("stack size must be " + Twine(kStackAlignment) + "-byte aligned");
- debugPrint("mem: stack size = %d\n", Config->ZStackSize);
- debugPrint("mem: stack base = %d\n", MemoryPtr);
- MemoryPtr += Config->ZStackSize;
- Config->StackPointerSymbol->setVirtualAddress(MemoryPtr);
- debugPrint("mem: stack top = %d\n", MemoryPtr);
+ WasmSym::HeapBase->setVirtualAddress(MemoryPtr);
+ log("mem: heap base = " + Twine(MemoryPtr));
}
+ if (Config->InitialMemory != 0) {
+ if (Config->InitialMemory != alignTo(Config->InitialMemory, WasmPageSize))
+ error("initial memory must be " + Twine(WasmPageSize) + "-byte aligned");
+ if (MemoryPtr > Config->InitialMemory)
+ error("initial memory too small, " + Twine(MemoryPtr) + " bytes needed");
+ else
+ MemoryPtr = Config->InitialMemory;
+ }
uint32_t MemSize = alignTo(MemoryPtr, WasmPageSize);
NumMemoryPages = MemSize / WasmPageSize;
- debugPrint("mem: total pages = %d\n", NumMemoryPages);
+ log("mem: total pages = " + Twine(NumMemoryPages));
+
+ if (Config->MaxMemory != 0) {
+ if (Config->MaxMemory != alignTo(Config->MaxMemory, WasmPageSize))
+ error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned");
+ if (MemoryPtr > Config->MaxMemory)
+ error("maximum memory too small, " + Twine(MemoryPtr) + " bytes needed");
+ MaxMemoryPages = Config->MaxMemory / WasmPageSize;
+ log("mem: max pages = " + Twine(MaxMemoryPages));
+ }
}
SyntheticSection *Writer::createSyntheticSection(uint32_t Type,
- std::string Name) {
+ StringRef Name) {
auto Sec = make<SyntheticSection>(Type, Name);
log("createSection: " + toString(*Sec));
OutputSections.push_back(Sec);
@@ -536,15 +685,16 @@ void Writer::createSections() {
createMemorySection();
createGlobalSection();
createExportSection();
- createStartSection();
createElemSection();
createCodeSection();
createDataSection();
+ createCustomSections();
// Custom sections
- if (Config->EmitRelocs)
+ if (Config->Relocatable) {
+ createLinkingSection();
createRelocSections();
- createLinkingSection();
+ }
if (!Config->StripDebug && !Config->StripAll)
createNameSection();
@@ -555,149 +705,336 @@ void Writer::createSections() {
}
}
-void Writer::calculateOffsets() {
- for (ObjFile *File : Symtab->ObjectFiles) {
- const WasmObjectFile *WasmFile = File->getWasmObj();
-
- // Function Index
- File->FunctionIndexOffset =
- FunctionImports.size() - File->NumFunctionImports() + NumFunctions;
- NumFunctions += WasmFile->functions().size();
+void Writer::calculateImports() {
+ for (Symbol *Sym : Symtab->getSymbols()) {
+ if (!Sym->isUndefined())
+ continue;
+ if (isa<DataSymbol>(Sym))
+ continue;
+ if (Sym->isWeak() && !Config->Relocatable)
+ continue;
+ if (!Sym->isLive())
+ continue;
+ if (!Sym->IsUsedInRegularObj)
+ continue;
- // Memory
- if (WasmFile->memories().size() > 1)
- fatal(File->getName() + ": contains more than one memory");
+ LLVM_DEBUG(dbgs() << "import: " << Sym->getName() << "\n");
+ ImportedSymbols.emplace_back(Sym);
+ if (auto *F = dyn_cast<FunctionSymbol>(Sym))
+ F->setFunctionIndex(NumImportedFunctions++);
+ else
+ cast<GlobalSymbol>(Sym)->setGlobalIndex(NumImportedGlobals++);
}
}
-void Writer::calculateImports() {
+void Writer::calculateExports() {
+ if (Config->Relocatable)
+ return;
+
+ if (!Config->Relocatable && !Config->ImportMemory)
+ Exports.push_back(WasmExport{"memory", WASM_EXTERNAL_MEMORY, 0});
+
+ if (!Config->Relocatable && Config->ExportTable)
+ Exports.push_back(WasmExport{kFunctionTableName, WASM_EXTERNAL_TABLE, 0});
+
+ unsigned FakeGlobalIndex = NumImportedGlobals + InputGlobals.size();
+
for (Symbol *Sym : Symtab->getSymbols()) {
- if (!Sym->isUndefined() || Sym->isWeak())
+ if (!Sym->isExported())
+ continue;
+ if (!Sym->isLive())
continue;
- if (Sym->isFunction()) {
- Sym->setOutputIndex(FunctionImports.size());
- FunctionImports.push_back(Sym);
+ StringRef Name = Sym->getName();
+ WasmExport Export;
+ if (auto *F = dyn_cast<DefinedFunction>(Sym)) {
+ Export = {Name, WASM_EXTERNAL_FUNCTION, F->getFunctionIndex()};
+ } else if (auto *G = dyn_cast<DefinedGlobal>(Sym)) {
+ // TODO(sbc): Remove this check once to mutable global proposal is
+ // implement in all major browsers.
+ // See: https://github.com/WebAssembly/mutable-global
+ if (G->getGlobalType()->Mutable) {
+ // Only the __stack_pointer should ever be create as mutable.
+ assert(G == WasmSym::StackPointer);
+ continue;
+ }
+ Export = {Name, WASM_EXTERNAL_GLOBAL, G->getGlobalIndex()};
} else {
- Sym->setOutputIndex(GlobalImports.size());
- GlobalImports.push_back(Sym);
+ auto *D = cast<DefinedData>(Sym);
+ DefinedFakeGlobals.emplace_back(D);
+ Export = {Name, WASM_EXTERNAL_GLOBAL, FakeGlobalIndex++};
}
+
+ LLVM_DEBUG(dbgs() << "Export: " << Name << "\n");
+ Exports.push_back(Export);
+ }
+}
+
+void Writer::assignSymtab() {
+ if (!Config->Relocatable)
+ return;
+
+ StringMap<uint32_t> SectionSymbolIndices;
+
+ unsigned SymbolIndex = SymtabEntries.size();
+ for (ObjFile *File : Symtab->ObjectFiles) {
+ LLVM_DEBUG(dbgs() << "Symtab entries: " << File->getName() << "\n");
+ for (Symbol *Sym : File->getSymbols()) {
+ if (Sym->getFile() != File)
+ continue;
+
+ if (auto *S = dyn_cast<SectionSymbol>(Sym)) {
+ StringRef Name = S->getName();
+ if (CustomSectionMapping.count(Name) == 0)
+ continue;
+
+ auto SSI = SectionSymbolIndices.find(Name);
+ if (SSI != SectionSymbolIndices.end()) {
+ Sym->setOutputSymbolIndex(SSI->second);
+ continue;
+ }
+
+ SectionSymbolIndices[Name] = SymbolIndex;
+ CustomSectionSymbols[Name] = cast<SectionSymbol>(Sym);
+
+ Sym->markLive();
+ }
+
+ // (Since this is relocatable output, GC is not performed so symbols must
+ // be live.)
+ assert(Sym->isLive());
+ Sym->setOutputSymbolIndex(SymbolIndex++);
+ SymtabEntries.emplace_back(Sym);
+ }
+ }
+
+ // For the moment, relocatable output doesn't contain any synthetic functions,
+ // so no need to look through the Symtab for symbols not referenced by
+ // Symtab->ObjectFiles.
+}
+
+uint32_t Writer::lookupType(const WasmSignature &Sig) {
+ auto It = TypeIndices.find(Sig);
+ if (It == TypeIndices.end()) {
+ error("type not found: " + toString(Sig));
+ return 0;
}
+ return It->second;
}
-uint32_t Writer::getTypeIndex(const WasmSignature &Sig) {
+uint32_t Writer::registerType(const WasmSignature &Sig) {
auto Pair = TypeIndices.insert(std::make_pair(Sig, Types.size()));
- if (Pair.second)
+ if (Pair.second) {
+ LLVM_DEBUG(dbgs() << "type " << toString(Sig) << "\n");
Types.push_back(&Sig);
+ }
return Pair.first->second;
}
void Writer::calculateTypes() {
+ // The output type section is the union of the following sets:
+ // 1. Any signature used in the TYPE relocation
+ // 2. The signatures of all imported functions
+ // 3. The signatures of all defined functions
+
for (ObjFile *File : Symtab->ObjectFiles) {
- File->TypeMap.reserve(File->getWasmObj()->types().size());
- for (const WasmSignature &Sig : File->getWasmObj()->types())
- File->TypeMap.push_back(getTypeIndex(Sig));
+ ArrayRef<WasmSignature> Types = File->getWasmObj()->types();
+ for (uint32_t I = 0; I < Types.size(); I++)
+ if (File->TypeIsUsed[I])
+ File->TypeMap[I] = registerType(Types[I]);
}
-}
-void Writer::assignSymbolIndexes() {
- uint32_t GlobalIndex = GlobalImports.size();
+ for (const Symbol *Sym : ImportedSymbols)
+ if (auto *F = dyn_cast<FunctionSymbol>(Sym))
+ registerType(*F->FunctionType);
- if (Config->StackPointerSymbol) {
- DefinedGlobals.emplace_back(Config->StackPointerSymbol);
- Config->StackPointerSymbol->setOutputIndex(GlobalIndex++);
- }
+ for (const InputFunction *F : InputFunctions)
+ registerType(F->Signature);
+}
- if (Config->EmitRelocs)
- DefinedGlobals.reserve(Symtab->getSymbols().size());
+void Writer::assignIndexes() {
+ uint32_t FunctionIndex = NumImportedFunctions + InputFunctions.size();
+ auto AddDefinedFunction = [&](InputFunction *Func) {
+ if (!Func->Live)
+ return;
+ InputFunctions.emplace_back(Func);
+ Func->setFunctionIndex(FunctionIndex++);
+ };
- uint32_t TableIndex = InitialTableOffset;
+ for (InputFunction *Func : Symtab->SyntheticFunctions)
+ AddDefinedFunction(Func);
for (ObjFile *File : Symtab->ObjectFiles) {
- DEBUG(dbgs() << "assignSymbolIndexes: " << File->getName() << "\n");
-
- for (Symbol *Sym : File->getSymbols()) {
- // Assign indexes for symbols defined with this file.
- if (!Sym->isDefined() || File != Sym->getFile())
- continue;
- if (Sym->isFunction()) {
- auto *Obj = cast<ObjFile>(Sym->getFile());
- Sym->setOutputIndex(Obj->FunctionIndexOffset +
- Sym->getFunctionIndex());
- } else if (Config->EmitRelocs) {
- DefinedGlobals.emplace_back(Sym);
- Sym->setOutputIndex(GlobalIndex++);
- }
- }
+ LLVM_DEBUG(dbgs() << "Functions: " << File->getName() << "\n");
+ for (InputFunction *Func : File->Functions)
+ AddDefinedFunction(Func);
+ }
- for (Symbol *Sym : File->getTableSymbols()) {
- if (!Sym->hasTableIndex()) {
+ uint32_t TableIndex = kInitialTableOffset;
+ auto HandleRelocs = [&](InputChunk *Chunk) {
+ if (!Chunk->Live)
+ return;
+ ObjFile *File = Chunk->File;
+ ArrayRef<WasmSignature> Types = File->getWasmObj()->types();
+ for (const WasmRelocation &Reloc : Chunk->getRelocations()) {
+ if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32 ||
+ Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB) {
+ FunctionSymbol *Sym = File->getFunctionSymbol(Reloc.Index);
+ if (Sym->hasTableIndex() || !Sym->hasFunctionIndex())
+ continue;
Sym->setTableIndex(TableIndex++);
IndirectFunctions.emplace_back(Sym);
+ } else if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) {
+ // Mark target type as live
+ File->TypeMap[Reloc.Index] = registerType(Types[Reloc.Index]);
+ File->TypeIsUsed[Reloc.Index] = true;
}
}
+ };
+
+ for (ObjFile *File : Symtab->ObjectFiles) {
+ LLVM_DEBUG(dbgs() << "Handle relocs: " << File->getName() << "\n");
+ for (InputChunk *Chunk : File->Functions)
+ HandleRelocs(Chunk);
+ for (InputChunk *Chunk : File->Segments)
+ HandleRelocs(Chunk);
+ for (auto &P : File->CustomSections)
+ HandleRelocs(P);
+ }
+
+ uint32_t GlobalIndex = NumImportedGlobals + InputGlobals.size();
+ auto AddDefinedGlobal = [&](InputGlobal *Global) {
+ if (Global->Live) {
+ LLVM_DEBUG(dbgs() << "AddDefinedGlobal: " << GlobalIndex << "\n");
+ Global->setGlobalIndex(GlobalIndex++);
+ InputGlobals.push_back(Global);
+ }
+ };
+
+ for (InputGlobal *Global : Symtab->SyntheticGlobals)
+ AddDefinedGlobal(Global);
+
+ for (ObjFile *File : Symtab->ObjectFiles) {
+ LLVM_DEBUG(dbgs() << "Globals: " << File->getName() << "\n");
+ for (InputGlobal *Global : File->Globals)
+ AddDefinedGlobal(Global);
}
}
static StringRef getOutputDataSegmentName(StringRef Name) {
- if (Config->Relocatable)
+ if (!Config->MergeDataSegments)
return Name;
-
- for (StringRef V :
- {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.",
- ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.",
- ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) {
- StringRef Prefix = V.drop_back();
- if (Name.startswith(V) || Name == Prefix)
- return Prefix;
- }
-
+ if (Name.startswith(".text."))
+ return ".text";
+ if (Name.startswith(".data."))
+ return ".data";
+ if (Name.startswith(".bss."))
+ return ".bss";
return Name;
}
void Writer::createOutputSegments() {
for (ObjFile *File : Symtab->ObjectFiles) {
for (InputSegment *Segment : File->Segments) {
+ if (!Segment->Live)
+ continue;
StringRef Name = getOutputDataSegmentName(Segment->getName());
OutputSegment *&S = SegmentMap[Name];
if (S == nullptr) {
- DEBUG(dbgs() << "new segment: " << Name << "\n");
- S = make<OutputSegment>(Name);
+ LLVM_DEBUG(dbgs() << "new segment: " << Name << "\n");
+ S = make<OutputSegment>(Name, Segments.size());
Segments.push_back(S);
}
S->addInputSegment(Segment);
- DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n");
+ LLVM_DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n");
}
}
}
+static const int OPCODE_CALL = 0x10;
+static const int OPCODE_END = 0xb;
+
+// Create synthetic "__wasm_call_ctors" function based on ctor functions
+// in input object.
+void Writer::createCtorFunction() {
+ // First write the body's contents to a string.
+ std::string BodyContent;
+ {
+ raw_string_ostream OS(BodyContent);
+ writeUleb128(OS, 0, "num locals");
+ for (const WasmInitEntry &F : InitFunctions) {
+ writeU8(OS, OPCODE_CALL, "CALL");
+ writeUleb128(OS, F.Sym->getFunctionIndex(), "function index");
+ }
+ writeU8(OS, OPCODE_END, "END");
+ }
+
+ // Once we know the size of the body we can create the final function body
+ std::string FunctionBody;
+ {
+ raw_string_ostream OS(FunctionBody);
+ writeUleb128(OS, BodyContent.size(), "function size");
+ OS << BodyContent;
+ }
+
+ ArrayRef<uint8_t> Body = toArrayRef(Saver.save(FunctionBody));
+ cast<SyntheticFunction>(WasmSym::CallCtors->Function)->setBody(Body);
+}
+
+// Populate InitFunctions vector with init functions from all input objects.
+// This is then used either when creating the output linking section or to
+// synthesize the "__wasm_call_ctors" function.
+void Writer::calculateInitFunctions() {
+ for (ObjFile *File : Symtab->ObjectFiles) {
+ const WasmLinkingData &L = File->getWasmObj()->linkingData();
+ for (const WasmInitFunc &F : L.InitFunctions) {
+ FunctionSymbol *Sym = File->getFunctionSymbol(F.Symbol);
+ if (*Sym->FunctionType != WasmSignature{{}, WASM_TYPE_NORESULT})
+ error("invalid signature for init func: " + toString(*Sym));
+ InitFunctions.emplace_back(WasmInitEntry{Sym, F.Priority});
+ }
+ }
+
+ // Sort in order of priority (lowest first) so that they are called
+ // in the correct order.
+ std::stable_sort(InitFunctions.begin(), InitFunctions.end(),
+ [](const WasmInitEntry &L, const WasmInitEntry &R) {
+ return L.Priority < R.Priority;
+ });
+}
+
void Writer::run() {
- if (!Config->Relocatable)
- InitialTableOffset = 1;
+ if (Config->Relocatable)
+ Config->GlobalBase = 0;
- log("-- calculateTypes");
- calculateTypes();
log("-- calculateImports");
calculateImports();
- log("-- calculateOffsets");
- calculateOffsets();
+ log("-- assignIndexes");
+ assignIndexes();
+ log("-- calculateInitFunctions");
+ calculateInitFunctions();
+ if (!Config->Relocatable)
+ createCtorFunction();
+ log("-- calculateTypes");
+ calculateTypes();
+ log("-- layoutMemory");
+ layoutMemory();
+ log("-- calculateExports");
+ calculateExports();
+ log("-- calculateCustomSections");
+ calculateCustomSections();
+ log("-- assignSymtab");
+ assignSymtab();
if (errorHandler().Verbose) {
- log("Defined Functions: " + Twine(NumFunctions));
- log("Defined Globals : " + Twine(DefinedGlobals.size()));
- log("Function Imports : " + Twine(FunctionImports.size()));
- log("Global Imports : " + Twine(GlobalImports.size()));
- log("Total Imports : " +
- Twine(FunctionImports.size() + GlobalImports.size()));
+ log("Defined Functions: " + Twine(InputFunctions.size()));
+ log("Defined Globals : " + Twine(InputGlobals.size()));
+ log("Function Imports : " + Twine(NumImportedFunctions));
+ log("Global Imports : " + Twine(NumImportedGlobals));
for (ObjFile *File : Symtab->ObjectFiles)
File->dumpInfo();
}
- log("-- assignSymbolIndexes");
- assignSymbolIndexes();
- log("-- layoutMemory");
- layoutMemory();
-
createHeader();
log("-- createSections");
createSections();
@@ -721,7 +1058,6 @@ void Writer::run() {
// Open a result file.
void Writer::openFile() {
log("writing: " + Config->OutputFile);
- ::remove(Config->OutputFile.str().c_str());
Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
FileOutputBuffer::create(Config->OutputFile, FileSize,
diff --git a/wasm/WriterUtils.cpp b/wasm/WriterUtils.cpp
index 5bdf0d2e3f65..201529edeaa6 100644
--- a/wasm/WriterUtils.cpp
+++ b/wasm/WriterUtils.cpp
@@ -8,12 +8,9 @@
//===----------------------------------------------------------------------===//
#include "WriterUtils.h"
-
#include "lld/Common/ErrorHandler.h"
-
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#define DEBUG_TYPE "lld"
@@ -22,7 +19,7 @@ using namespace llvm;
using namespace llvm::wasm;
using namespace lld::wasm;
-static const char *valueTypeToString(int32_t Type) {
+static const char *valueTypeToString(uint8_t Type) {
switch (Type) {
case WASM_TYPE_I32:
return "i32";
@@ -39,61 +36,57 @@ static const char *valueTypeToString(int32_t Type) {
namespace lld {
-void wasm::debugWrite(uint64_t offset, Twine msg) {
- DEBUG(dbgs() << format(" | %08" PRIx64 ": ", offset) << msg << "\n");
+void wasm::debugWrite(uint64_t Offset, const Twine &Msg) {
+ LLVM_DEBUG(dbgs() << format(" | %08lld: ", Offset) << Msg << "\n");
}
-void wasm::writeUleb128(raw_ostream &OS, uint32_t Number, const char *msg) {
- if (msg)
- debugWrite(OS.tell(), msg + formatv(" [{0:x}]", Number));
+void wasm::writeUleb128(raw_ostream &OS, uint32_t Number, const Twine &Msg) {
+ debugWrite(OS.tell(), Msg + "[" + utohexstr(Number) + "]");
encodeULEB128(Number, OS);
}
-void wasm::writeSleb128(raw_ostream &OS, int32_t Number, const char *msg) {
- if (msg)
- debugWrite(OS.tell(), msg + formatv(" [{0:x}]", Number));
+void wasm::writeSleb128(raw_ostream &OS, int32_t Number, const Twine &Msg) {
+ debugWrite(OS.tell(), Msg + "[" + utohexstr(Number) + "]");
encodeSLEB128(Number, OS);
}
-void wasm::writeBytes(raw_ostream &OS, const char *bytes, size_t count,
- const char *msg) {
- if (msg)
- debugWrite(OS.tell(), msg + formatv(" [data[{0}]]", count));
- OS.write(bytes, count);
+void wasm::writeBytes(raw_ostream &OS, const char *Bytes, size_t Count,
+ const Twine &Msg) {
+ debugWrite(OS.tell(), Msg + " [data[" + Twine(Count) + "]]");
+ OS.write(Bytes, Count);
}
-void wasm::writeStr(raw_ostream &OS, const StringRef String, const char *msg) {
- if (msg)
- debugWrite(OS.tell(),
- msg + formatv(" [str[{0}]: {1}]", String.size(), String));
- writeUleb128(OS, String.size(), nullptr);
- writeBytes(OS, String.data(), String.size());
+void wasm::writeStr(raw_ostream &OS, StringRef String, const Twine &Msg) {
+ debugWrite(OS.tell(),
+ Msg + " [str[" + Twine(String.size()) + "]: " + String + "]");
+ encodeULEB128(String.size(), OS);
+ OS.write(String.data(), String.size());
}
-void wasm::writeU8(raw_ostream &OS, uint8_t byte, const char *msg) {
- OS << byte;
+void wasm::writeU8(raw_ostream &OS, uint8_t Byte, const Twine &Msg) {
+ debugWrite(OS.tell(), Msg + " [0x" + utohexstr(Byte) + "]");
+ OS << Byte;
}
-void wasm::writeU32(raw_ostream &OS, uint32_t Number, const char *msg) {
- debugWrite(OS.tell(), msg + formatv("[{0:x}]", Number));
- support::endian::Writer<support::little>(OS).write(Number);
+void wasm::writeU32(raw_ostream &OS, uint32_t Number, const Twine &Msg) {
+ debugWrite(OS.tell(), Msg + "[0x" + utohexstr(Number) + "]");
+ support::endian::write(OS, Number, support::little);
}
-void wasm::writeValueType(raw_ostream &OS, int32_t Type, const char *msg) {
- debugWrite(OS.tell(), msg + formatv("[type: {0}]", valueTypeToString(Type)));
- writeSleb128(OS, Type, nullptr);
+void wasm::writeValueType(raw_ostream &OS, uint8_t Type, const Twine &Msg) {
+ writeU8(OS, Type, Msg + "[type: " + valueTypeToString(Type) + "]");
}
void wasm::writeSig(raw_ostream &OS, const WasmSignature &Sig) {
- writeSleb128(OS, WASM_TYPE_FUNC, "signature type");
- writeUleb128(OS, Sig.ParamTypes.size(), "param count");
- for (int32_t ParamType : Sig.ParamTypes) {
+ writeU8(OS, WASM_TYPE_FUNC, "signature type");
+ writeUleb128(OS, Sig.ParamTypes.size(), "param Count");
+ for (uint8_t ParamType : Sig.ParamTypes) {
writeValueType(OS, ParamType, "param type");
}
if (Sig.ReturnType == WASM_TYPE_NORESULT) {
- writeUleb128(OS, 0, "result count");
+ writeUleb128(OS, 0, "result Count");
} else {
- writeUleb128(OS, 1, "result count");
+ writeUleb128(OS, 1, "result Count");
writeValueType(OS, Sig.ReturnType, "result type");
}
}
@@ -117,18 +110,27 @@ void wasm::writeInitExpr(raw_ostream &OS, const WasmInitExpr &InitExpr) {
}
void wasm::writeLimits(raw_ostream &OS, const WasmLimits &Limits) {
- writeUleb128(OS, Limits.Flags, "limits flags");
+ writeU8(OS, Limits.Flags, "limits flags");
writeUleb128(OS, Limits.Initial, "limits initial");
if (Limits.Flags & WASM_LIMITS_FLAG_HAS_MAX)
writeUleb128(OS, Limits.Maximum, "limits max");
}
+void wasm::writeGlobalType(raw_ostream &OS, const WasmGlobalType &Type) {
+ writeValueType(OS, Type.Type, "global type");
+ writeU8(OS, Type.Mutable, "global mutable");
+}
+
void wasm::writeGlobal(raw_ostream &OS, const WasmGlobal &Global) {
- writeValueType(OS, Global.Type, "global type");
- writeUleb128(OS, Global.Mutable, "global mutable");
+ writeGlobalType(OS, Global.Type);
writeInitExpr(OS, Global.InitExpr);
}
+void wasm::writeTableType(raw_ostream &OS, const llvm::wasm::WasmTable &Type) {
+ writeU8(OS, WASM_TYPE_ANYFUNC, "table type");
+ writeLimits(OS, Type.Limits);
+}
+
void wasm::writeImport(raw_ostream &OS, const WasmImport &Import) {
writeStr(OS, Import.Module, "import module name");
writeStr(OS, Import.Field, "import field name");
@@ -138,12 +140,14 @@ void wasm::writeImport(raw_ostream &OS, const WasmImport &Import) {
writeUleb128(OS, Import.SigIndex, "import sig index");
break;
case WASM_EXTERNAL_GLOBAL:
- writeValueType(OS, Import.Global.Type, "import global type");
- writeUleb128(OS, Import.Global.Mutable, "import global mutable");
+ writeGlobalType(OS, Import.Global);
break;
case WASM_EXTERNAL_MEMORY:
writeLimits(OS, Import.Memory);
break;
+ case WASM_EXTERNAL_TABLE:
+ writeTableType(OS, Import.Table);
+ break;
default:
fatal("unsupported import type: " + Twine(Import.Kind));
}
@@ -162,27 +166,13 @@ void wasm::writeExport(raw_ostream &OS, const WasmExport &Export) {
case WASM_EXTERNAL_MEMORY:
writeUleb128(OS, Export.Index, "memory index");
break;
- default:
- fatal("unsupported export type: " + Twine(Export.Kind));
- }
-}
-
-void wasm::writeReloc(raw_ostream &OS, const OutputRelocation &Reloc) {
- writeUleb128(OS, Reloc.Reloc.Type, "reloc type");
- writeUleb128(OS, Reloc.Reloc.Offset, "reloc offset");
- writeUleb128(OS, Reloc.NewIndex, "reloc index");
-
- switch (Reloc.Reloc.Type) {
- case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case R_WEBASSEMBLY_MEMORY_ADDR_I32:
- writeUleb128(OS, Reloc.Reloc.Addend, "reloc addend");
+ case WASM_EXTERNAL_TABLE:
+ writeUleb128(OS, Export.Index, "table index");
break;
default:
- break;
+ fatal("unsupported export type: " + Twine(Export.Kind));
}
}
-
} // namespace lld
std::string lld::toString(ValType Type) {
@@ -195,6 +185,8 @@ std::string lld::toString(ValType Type) {
return "F32";
case ValType::F64:
return "F64";
+ case ValType::EXCEPT_REF:
+ return "except_ref";
}
llvm_unreachable("Invalid wasm::ValType");
}
@@ -213,3 +205,8 @@ std::string lld::toString(const WasmSignature &Sig) {
S += toString(static_cast<ValType>(Sig.ReturnType));
return S.str();
}
+
+std::string lld::toString(const WasmGlobalType &Sig) {
+ return (Sig.Mutable ? "var " : "const ") +
+ toString(static_cast<ValType>(Sig.Type));
+}
diff --git a/wasm/WriterUtils.h b/wasm/WriterUtils.h
index c1ed90793f78..74d727b24b40 100644
--- a/wasm/WriterUtils.h
+++ b/wasm/WriterUtils.h
@@ -10,49 +10,32 @@
#ifndef LLD_WASM_WRITERUTILS_H
#define LLD_WASM_WRITERUTILS_H
+#include "lld/Common/LLVM.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/raw_ostream.h"
using llvm::raw_ostream;
-// Needed for WasmSignatureDenseMapInfo
-inline bool operator==(const llvm::wasm::WasmSignature &LHS,
- const llvm::wasm::WasmSignature &RHS) {
- return LHS.ReturnType == RHS.ReturnType && LHS.ParamTypes == RHS.ParamTypes;
-}
-
-inline bool operator!=(const llvm::wasm::WasmSignature &LHS,
- const llvm::wasm::WasmSignature &RHS) {
- return !(LHS == RHS);
-}
-
namespace lld {
namespace wasm {
-struct OutputRelocation {
- llvm::wasm::WasmRelocation Reloc;
- uint32_t NewIndex;
- uint32_t Value;
-};
-
-void debugWrite(uint64_t offset, llvm::Twine msg);
+void debugWrite(uint64_t Offset, const Twine &Msg);
-void writeUleb128(raw_ostream &OS, uint32_t Number, const char *msg);
+void writeUleb128(raw_ostream &OS, uint32_t Number, const Twine &Msg);
-void writeSleb128(raw_ostream &OS, int32_t Number, const char *msg);
+void writeSleb128(raw_ostream &OS, int32_t Number, const Twine &Msg);
-void writeBytes(raw_ostream &OS, const char *bytes, size_t count,
- const char *msg = nullptr);
+void writeBytes(raw_ostream &OS, const char *Bytes, size_t count,
+ const Twine &Msg);
-void writeStr(raw_ostream &OS, const llvm::StringRef String,
- const char *msg = nullptr);
+void writeStr(raw_ostream &OS, StringRef String, const Twine &Msg);
-void writeU8(raw_ostream &OS, uint8_t byte, const char *msg);
+void writeU8(raw_ostream &OS, uint8_t byte, const Twine &Msg);
-void writeU32(raw_ostream &OS, uint32_t Number, const char *msg);
+void writeU32(raw_ostream &OS, uint32_t Number, const Twine &Msg);
-void writeValueType(raw_ostream &OS, int32_t Type, const char *msg);
+void writeValueType(raw_ostream &OS, uint8_t Type, const Twine &Msg);
void writeSig(raw_ostream &OS, const llvm::wasm::WasmSignature &Sig);
@@ -60,18 +43,21 @@ void writeInitExpr(raw_ostream &OS, const llvm::wasm::WasmInitExpr &InitExpr);
void writeLimits(raw_ostream &OS, const llvm::wasm::WasmLimits &Limits);
+void writeGlobalType(raw_ostream &OS, const llvm::wasm::WasmGlobalType &Type);
+
void writeGlobal(raw_ostream &OS, const llvm::wasm::WasmGlobal &Global);
+void writeTableType(raw_ostream &OS, const llvm::wasm::WasmTable &Type);
+
void writeImport(raw_ostream &OS, const llvm::wasm::WasmImport &Import);
void writeExport(raw_ostream &OS, const llvm::wasm::WasmExport &Export);
-void writeReloc(raw_ostream &OS, const OutputRelocation &Reloc);
-
} // namespace wasm
-std::string toString(const llvm::wasm::ValType Type);
+std::string toString(llvm::wasm::ValType Type);
std::string toString(const llvm::wasm::WasmSignature &Sig);
+std::string toString(const llvm::wasm::WasmGlobalType &Sig);
} // namespace lld