diff options
Diffstat (limited to 'wasm/Writer.cpp')
| -rw-r--r-- | wasm/Writer.cpp | 954 |
1 files changed, 645 insertions, 309 deletions
diff --git a/wasm/Writer.cpp b/wasm/Writer.cpp index e7dd49d522134..37ad32452a91f 100644 --- a/wasm/Writer.cpp +++ b/wasm/Writer.cpp @@ -8,21 +8,28 @@ //===----------------------------------------------------------------------===// #include "Writer.h" - #include "Config.h" +#include "InputChunks.h" +#include "InputGlobal.h" #include "OutputSections.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Strings.h" #include "lld/Common/Threads.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Object/WasmTraits.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/LEB128.h" #include <cstdarg> +#include <map> #define DEBUG_TYPE "lld" @@ -32,31 +39,16 @@ using namespace lld; using namespace lld::wasm; static constexpr int kStackAlignment = 16; +static constexpr int kInitialTableOffset = 1; +static constexpr const char *kFunctionTableName = "__indirect_function_table"; namespace { -// Traits for using WasmSignature in a DenseMap. -struct WasmSignatureDenseMapInfo { - static WasmSignature getEmptyKey() { - WasmSignature Sig; - Sig.ReturnType = 1; - return Sig; - } - static WasmSignature getTombstoneKey() { - WasmSignature Sig; - Sig.ReturnType = 2; - return Sig; - } - static unsigned getHashValue(const WasmSignature &Sig) { - uintptr_t Value = 0; - Value += DenseMapInfo<int32_t>::getHashValue(Sig.ReturnType); - for (int32_t Param : Sig.ParamTypes) - Value += DenseMapInfo<int32_t>::getHashValue(Param); - return Value; - } - static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) { - return LHS == RHS; - } +// An init entry to be written to either the synthetic init func or the +// linking metadata. +struct WasmInitEntry { + const FunctionSymbol *Sym; + uint32_t Priority; }; // The writer writes a SymbolTable result to a file. @@ -67,17 +59,22 @@ public: private: void openFile(); - uint32_t getTypeIndex(const WasmSignature &Sig); - void assignSymbolIndexes(); + uint32_t lookupType(const WasmSignature &Sig); + uint32_t registerType(const WasmSignature &Sig); + + void createCtorFunction(); + void calculateInitFunctions(); + void assignIndexes(); void calculateImports(); - void calculateOffsets(); + void calculateExports(); + void calculateCustomSections(); + void assignSymtab(); void calculateTypes(); void createOutputSegments(); void layoutMemory(); void createHeader(); void createSections(); - SyntheticSection *createSyntheticSection(uint32_t Type, - std::string Name = ""); + SyntheticSection *createSyntheticSection(uint32_t Type, StringRef Name = ""); // Builtin sections void createTypeSection(); @@ -88,9 +85,9 @@ private: void createImportSection(); void createMemorySection(); void createElemSection(); - void createStartSection(); void createCodeSection(); void createDataSection(); + void createCustomSections(); // Custom sections void createRelocSections(); @@ -101,17 +98,24 @@ private: void writeSections(); uint64_t FileSize = 0; - uint32_t DataSize = 0; - uint32_t NumFunctions = 0; uint32_t NumMemoryPages = 0; - uint32_t InitialTableOffset = 0; + uint32_t MaxMemoryPages = 0; std::vector<const WasmSignature *> Types; - DenseMap<WasmSignature, int32_t, WasmSignatureDenseMapInfo> TypeIndices; - std::vector<const Symbol *> FunctionImports; - std::vector<const Symbol *> GlobalImports; - std::vector<const Symbol *> DefinedGlobals; - std::vector<const Symbol *> IndirectFunctions; + DenseMap<WasmSignature, int32_t> TypeIndices; + std::vector<const Symbol *> ImportedSymbols; + unsigned NumImportedFunctions = 0; + unsigned NumImportedGlobals = 0; + std::vector<WasmExport> Exports; + std::vector<const DefinedData *> DefinedFakeGlobals; + std::vector<InputGlobal *> InputGlobals; + std::vector<InputFunction *> InputFunctions; + std::vector<const FunctionSymbol *> IndirectFunctions; + std::vector<const Symbol *> SymtabEntries; + std::vector<WasmInitEntry> InitFunctions; + + llvm::StringMap<std::vector<InputSection *>> CustomSectionMapping; + llvm::StringMap<SectionSymbol *> CustomSectionSymbols; // Elements that are used to construct the final output std::string Header; @@ -125,20 +129,12 @@ private: } // anonymous namespace -static void debugPrint(const char *fmt, ...) { - if (!errorHandler().Verbose) - return; - fprintf(stderr, "lld: "); - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); -} - void Writer::createImportSection() { - uint32_t NumImports = FunctionImports.size() + GlobalImports.size(); + uint32_t NumImports = ImportedSymbols.size(); if (Config->ImportMemory) ++NumImports; + if (Config->ImportTable) + ++NumImports; if (NumImports == 0) return; @@ -148,16 +144,6 @@ void Writer::createImportSection() { writeUleb128(OS, NumImports, "import count"); - for (const Symbol *Sym : FunctionImports) { - WasmImport Import; - Import.Module = "env"; - Import.Field = Sym->getName(); - Import.Kind = WASM_EXTERNAL_FUNCTION; - assert(TypeIndices.count(Sym->getFunctionType()) > 0); - Import.SigIndex = TypeIndices.lookup(Sym->getFunctionType()); - writeImport(OS, Import); - } - if (Config->ImportMemory) { WasmImport Import; Import.Module = "env"; @@ -165,16 +151,36 @@ void Writer::createImportSection() { Import.Kind = WASM_EXTERNAL_MEMORY; Import.Memory.Flags = 0; Import.Memory.Initial = NumMemoryPages; + if (MaxMemoryPages != 0) { + Import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX; + Import.Memory.Maximum = MaxMemoryPages; + } writeImport(OS, Import); } - for (const Symbol *Sym : GlobalImports) { + if (Config->ImportTable) { + uint32_t TableSize = kInitialTableOffset + IndirectFunctions.size(); + WasmImport Import; + Import.Module = "env"; + Import.Field = kFunctionTableName; + Import.Kind = WASM_EXTERNAL_TABLE; + Import.Table.ElemType = WASM_TYPE_ANYFUNC; + Import.Table.Limits = {WASM_LIMITS_FLAG_HAS_MAX, TableSize, TableSize}; + writeImport(OS, Import); + } + + for (const Symbol *Sym : ImportedSymbols) { WasmImport Import; Import.Module = "env"; Import.Field = Sym->getName(); - Import.Kind = WASM_EXTERNAL_GLOBAL; - Import.Global.Mutable = false; - Import.Global.Type = WASM_TYPE_I32; + if (auto *FunctionSym = dyn_cast<FunctionSymbol>(Sym)) { + Import.Kind = WASM_EXTERNAL_FUNCTION; + Import.SigIndex = lookupType(*FunctionSym->FunctionType); + } else { + auto *GlobalSym = cast<GlobalSymbol>(Sym); + Import.Kind = WASM_EXTERNAL_GLOBAL; + Import.Global = *GlobalSym->getGlobalType(); + } writeImport(OS, Import); } } @@ -188,16 +194,15 @@ void Writer::createTypeSection() { } void Writer::createFunctionSection() { - if (!NumFunctions) + if (InputFunctions.empty()) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_FUNCTION); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, NumFunctions, "function count"); - for (ObjFile *File : Symtab->ObjectFiles) - for (uint32_t Sig : File->getWasmObj()->functionTypes()) - writeUleb128(OS, File->relocateTypeIndex(Sig), "sig index"); + writeUleb128(OS, InputFunctions.size(), "function count"); + for (const InputFunction *Func : InputFunctions) + writeUleb128(OS, lookupType(Func->Signature), "sig index"); } void Writer::createMemorySection() { @@ -207,23 +212,29 @@ void Writer::createMemorySection() { SyntheticSection *Section = createSyntheticSection(WASM_SEC_MEMORY); raw_ostream &OS = Section->getStream(); + bool HasMax = MaxMemoryPages != 0; writeUleb128(OS, 1, "memory count"); - writeUleb128(OS, 0, "memory limits flags"); + writeUleb128(OS, HasMax ? static_cast<unsigned>(WASM_LIMITS_FLAG_HAS_MAX) : 0, + "memory limits flags"); writeUleb128(OS, NumMemoryPages, "initial pages"); + if (HasMax) + writeUleb128(OS, MaxMemoryPages, "max pages"); } void Writer::createGlobalSection() { - if (DefinedGlobals.empty()) + unsigned NumGlobals = InputGlobals.size() + DefinedFakeGlobals.size(); + if (NumGlobals == 0) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_GLOBAL); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, DefinedGlobals.size(), "global count"); - for (const Symbol *Sym : DefinedGlobals) { + writeUleb128(OS, NumGlobals, "global count"); + for (const InputGlobal *G : InputGlobals) + writeGlobal(OS, G->Global); + for (const DefinedData *Sym : DefinedFakeGlobals) { WasmGlobal Global; - Global.Type = WASM_TYPE_I32; - Global.Mutable = Sym == Config->StackPointerSymbol; + Global.Type = {WASM_TYPE_I32, false}; Global.InitExpr.Opcode = WASM_OPCODE_I32_CONST; Global.InitExpr.Value.Int32 = Sym->getVirtualAddress(); writeGlobal(OS, Global); @@ -231,88 +242,73 @@ void Writer::createGlobalSection() { } void Writer::createTableSection() { - // Always output a table section, even if there are no indirect calls. - // There are two reasons for this: + if (Config->ImportTable) + return; + + // Always output a table section (or table import), even if there are no + // indirect calls. There are two reasons for this: // 1. For executables it is useful to have an empty table slot at 0 // which can be filled with a null function call handler. // 2. If we don't do this, any program that contains a call_indirect but // no address-taken function will fail at validation time since it is // a validation error to include a call_indirect instruction if there // is not table. - uint32_t TableSize = InitialTableOffset + IndirectFunctions.size(); + uint32_t TableSize = kInitialTableOffset + IndirectFunctions.size(); SyntheticSection *Section = createSyntheticSection(WASM_SEC_TABLE); raw_ostream &OS = Section->getStream(); writeUleb128(OS, 1, "table count"); - writeSleb128(OS, WASM_TYPE_ANYFUNC, "table type"); - writeUleb128(OS, WASM_LIMITS_FLAG_HAS_MAX, "table flags"); - writeUleb128(OS, TableSize, "table initial size"); - writeUleb128(OS, TableSize, "table max size"); + WasmLimits Limits = {WASM_LIMITS_FLAG_HAS_MAX, TableSize, TableSize}; + writeTableType(OS, WasmTable{WASM_TYPE_ANYFUNC, Limits}); } void Writer::createExportSection() { - bool ExportMemory = !Config->Relocatable && !Config->ImportMemory; - Symbol *EntrySym = Symtab->find(Config->Entry); - bool ExportEntry = !Config->Relocatable && EntrySym && EntrySym->isDefined(); - bool ExportHidden = Config->EmitRelocs; - - uint32_t NumExports = ExportMemory ? 1 : 0; - - std::vector<const Symbol *> SymbolExports; - if (ExportEntry) - SymbolExports.emplace_back(EntrySym); - - for (const Symbol *Sym : Symtab->getSymbols()) { - if (Sym->isUndefined() || Sym->isGlobal()) - continue; - if (Sym->isHidden() && !ExportHidden) - continue; - if (ExportEntry && Sym == EntrySym) - continue; - SymbolExports.emplace_back(Sym); - } - - for (const Symbol *Sym : DefinedGlobals) { - // Can't export the SP right now because it mutable and mutable globals - // connot be exported. - if (Sym == Config->StackPointerSymbol) - continue; - SymbolExports.emplace_back(Sym); - } - - NumExports += SymbolExports.size(); - if (!NumExports) + if (!Exports.size()) return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_EXPORT); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, NumExports, "export count"); + writeUleb128(OS, Exports.size(), "export count"); + for (const WasmExport &Export : Exports) + writeExport(OS, Export); +} - if (ExportMemory) { - WasmExport MemoryExport; - MemoryExport.Name = "memory"; - MemoryExport.Kind = WASM_EXTERNAL_MEMORY; - MemoryExport.Index = 0; - writeExport(OS, MemoryExport); +void Writer::calculateCustomSections() { + log("calculateCustomSections"); + bool StripDebug = Config->StripDebug || Config->StripAll; + for (ObjFile *File : Symtab->ObjectFiles) { + for (InputSection *Section : File->CustomSections) { + StringRef Name = Section->getName(); + // These custom sections are known the linker and synthesized rather than + // blindly copied + if (Name == "linking" || Name == "name" || Name.startswith("reloc.")) + continue; + // .. or it is a debug section + if (StripDebug && Name.startswith(".debug_")) + continue; + CustomSectionMapping[Name].push_back(Section); + } } +} - for (const Symbol *Sym : SymbolExports) { - log("Export: " + Sym->getName()); - WasmExport Export; - Export.Name = Sym->getName(); - Export.Index = Sym->getOutputIndex(); - if (Sym->isFunction()) - Export.Kind = WASM_EXTERNAL_FUNCTION; - else - Export.Kind = WASM_EXTERNAL_GLOBAL; - writeExport(OS, Export); +void Writer::createCustomSections() { + log("createCustomSections"); + for (auto &Pair : CustomSectionMapping) { + StringRef Name = Pair.first(); + + auto P = CustomSectionSymbols.find(Name); + if (P != CustomSectionSymbols.end()) { + uint32_t SectionIndex = OutputSections.size(); + P->second->setOutputSectionIndex(SectionIndex); + } + + LLVM_DEBUG(dbgs() << "createCustomSection: " << Name << "\n"); + OutputSections.push_back(make<CustomSection>(Name, Pair.second)); } } -void Writer::createStartSection() {} - void Writer::createElemSection() { if (IndirectFunctions.empty()) return; @@ -324,25 +320,25 @@ void Writer::createElemSection() { writeUleb128(OS, 0, "table index"); WasmInitExpr InitExpr; InitExpr.Opcode = WASM_OPCODE_I32_CONST; - InitExpr.Value.Int32 = InitialTableOffset; + InitExpr.Value.Int32 = kInitialTableOffset; writeInitExpr(OS, InitExpr); writeUleb128(OS, IndirectFunctions.size(), "elem count"); - uint32_t TableIndex = InitialTableOffset; - for (const Symbol *Sym : IndirectFunctions) { + uint32_t TableIndex = kInitialTableOffset; + for (const FunctionSymbol *Sym : IndirectFunctions) { assert(Sym->getTableIndex() == TableIndex); - writeUleb128(OS, Sym->getOutputIndex(), "function index"); + writeUleb128(OS, Sym->getFunctionIndex(), "function index"); ++TableIndex; } } void Writer::createCodeSection() { - if (!NumFunctions) + if (InputFunctions.empty()) return; log("createCodeSection"); - auto Section = make<CodeSection>(NumFunctions, Symtab->ObjectFiles); + auto Section = make<CodeSection>(InputFunctions); OutputSections.push_back(Section); } @@ -361,28 +357,68 @@ void Writer::createRelocSections() { log("createRelocSections"); // Don't use iterator here since we are adding to OutputSection size_t OrigSize = OutputSections.size(); - for (size_t i = 0; i < OrigSize; i++) { - OutputSection *S = OutputSections[i]; - const char *name; - uint32_t Count = S->numRelocations(); + for (size_t I = 0; I < OrigSize; I++) { + OutputSection *OSec = OutputSections[I]; + uint32_t Count = OSec->numRelocations(); if (!Count) continue; - if (S->Type == WASM_SEC_DATA) - name = "reloc.DATA"; - else if (S->Type == WASM_SEC_CODE) - name = "reloc.CODE"; + StringRef Name; + if (OSec->Type == WASM_SEC_DATA) + Name = "reloc.DATA"; + else if (OSec->Type == WASM_SEC_CODE) + Name = "reloc.CODE"; + else if (OSec->Type == WASM_SEC_CUSTOM) + Name = Saver.save("reloc." + OSec->Name); else - llvm_unreachable("relocations only supported for code and data"); + llvm_unreachable( + "relocations only supported for code, data, or custom sections"); - SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, name); + SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, Name); raw_ostream &OS = Section->getStream(); - writeUleb128(OS, S->Type, "reloc section"); + writeUleb128(OS, I, "reloc section"); writeUleb128(OS, Count, "reloc count"); - S->writeRelocations(OS); + OSec->writeRelocations(OS); } } +static uint32_t getWasmFlags(const Symbol *Sym) { + uint32_t Flags = 0; + if (Sym->isLocal()) + Flags |= WASM_SYMBOL_BINDING_LOCAL; + if (Sym->isWeak()) + Flags |= WASM_SYMBOL_BINDING_WEAK; + if (Sym->isHidden()) + Flags |= WASM_SYMBOL_VISIBILITY_HIDDEN; + if (Sym->isUndefined()) + Flags |= WASM_SYMBOL_UNDEFINED; + return Flags; +} + +// Some synthetic sections (e.g. "name" and "linking") have subsections. +// Just like the synthetic sections themselves these need to be created before +// they can be written out (since they are preceded by their length). This +// class is used to create subsections and then write them into the stream +// of the parent section. +class SubSection { +public: + explicit SubSection(uint32_t Type) : Type(Type) {} + + void writeTo(raw_ostream &To) { + OS.flush(); + writeUleb128(To, Type, "subsection type"); + writeUleb128(To, Body.size(), "subsection size"); + To.write(Body.data(), Body.size()); + } + +private: + uint32_t Type; + std::string Body; + +public: + raw_string_ostream OS{Body}; +}; + // Create the custom "linking" section containing linker metadata. // This is only created when relocatable output is requested. void Writer::createLinkingSection() { @@ -390,82 +426,145 @@ void Writer::createLinkingSection() { createSyntheticSection(WASM_SEC_CUSTOM, "linking"); raw_ostream &OS = Section->getStream(); - SubSection DataSizeSubSection(WASM_DATA_SIZE); - writeUleb128(DataSizeSubSection.getStream(), DataSize, "data size"); - DataSizeSubSection.finalizeContents(); - DataSizeSubSection.writeToStream(OS); + writeUleb128(OS, WasmMetadataVersion, "Version"); + + if (!SymtabEntries.empty()) { + SubSection Sub(WASM_SYMBOL_TABLE); + writeUleb128(Sub.OS, SymtabEntries.size(), "num symbols"); + + for (const Symbol *Sym : SymtabEntries) { + assert(Sym->isDefined() || Sym->isUndefined()); + WasmSymbolType Kind = Sym->getWasmType(); + uint32_t Flags = getWasmFlags(Sym); + + writeU8(Sub.OS, Kind, "sym kind"); + writeUleb128(Sub.OS, Flags, "sym flags"); + + if (auto *F = dyn_cast<FunctionSymbol>(Sym)) { + writeUleb128(Sub.OS, F->getFunctionIndex(), "index"); + if (Sym->isDefined()) + writeStr(Sub.OS, Sym->getName(), "sym name"); + } else if (auto *G = dyn_cast<GlobalSymbol>(Sym)) { + writeUleb128(Sub.OS, G->getGlobalIndex(), "index"); + if (Sym->isDefined()) + writeStr(Sub.OS, Sym->getName(), "sym name"); + } else if (isa<DataSymbol>(Sym)) { + writeStr(Sub.OS, Sym->getName(), "sym name"); + if (auto *DataSym = dyn_cast<DefinedData>(Sym)) { + writeUleb128(Sub.OS, DataSym->getOutputSegmentIndex(), "index"); + writeUleb128(Sub.OS, DataSym->getOutputSegmentOffset(), + "data offset"); + writeUleb128(Sub.OS, DataSym->getSize(), "data size"); + } + } else { + auto *S = cast<SectionSymbol>(Sym); + writeUleb128(Sub.OS, S->getOutputSectionIndex(), "sym section index"); + } + } - if (!Config->Relocatable) - return; + Sub.writeTo(OS); + } if (Segments.size()) { - SubSection SubSection(WASM_SEGMENT_INFO); - writeUleb128(SubSection.getStream(), Segments.size(), "num data segments"); + SubSection Sub(WASM_SEGMENT_INFO); + writeUleb128(Sub.OS, Segments.size(), "num data segments"); for (const OutputSegment *S : Segments) { - writeStr(SubSection.getStream(), S->Name, "segment name"); - writeUleb128(SubSection.getStream(), S->Alignment, "alignment"); - writeUleb128(SubSection.getStream(), 0, "flags"); + writeStr(Sub.OS, S->Name, "segment name"); + writeUleb128(Sub.OS, S->Alignment, "alignment"); + writeUleb128(Sub.OS, 0, "flags"); } - SubSection.finalizeContents(); - SubSection.writeToStream(OS); + Sub.writeTo(OS); } - std::vector<WasmInitFunc> InitFunctions; - for (ObjFile *File : Symtab->ObjectFiles) { - const WasmLinkingData &L = File->getWasmObj()->linkingData(); - InitFunctions.reserve(InitFunctions.size() + L.InitFunctions.size()); - for (const WasmInitFunc &F : L.InitFunctions) - InitFunctions.emplace_back(WasmInitFunc{ - F.Priority, File->relocateFunctionIndex(F.FunctionIndex)}); + if (!InitFunctions.empty()) { + SubSection Sub(WASM_INIT_FUNCS); + writeUleb128(Sub.OS, InitFunctions.size(), "num init functions"); + for (const WasmInitEntry &F : InitFunctions) { + writeUleb128(Sub.OS, F.Priority, "priority"); + writeUleb128(Sub.OS, F.Sym->getOutputSymbolIndex(), "function index"); + } + Sub.writeTo(OS); } - if (!InitFunctions.empty()) { - SubSection SubSection(WASM_INIT_FUNCS); - writeUleb128(SubSection.getStream(), InitFunctions.size(), - "num init functionsw"); - for (const WasmInitFunc &F : InitFunctions) { - writeUleb128(SubSection.getStream(), F.Priority, "priority"); - writeUleb128(SubSection.getStream(), F.FunctionIndex, "function index"); + struct ComdatEntry { + unsigned Kind; + uint32_t Index; + }; + std::map<StringRef, std::vector<ComdatEntry>> Comdats; + + for (const InputFunction *F : InputFunctions) { + StringRef Comdat = F->getComdatName(); + if (!Comdat.empty()) + Comdats[Comdat].emplace_back( + ComdatEntry{WASM_COMDAT_FUNCTION, F->getFunctionIndex()}); + } + for (uint32_t I = 0; I < Segments.size(); ++I) { + const auto &InputSegments = Segments[I]->InputSegments; + if (InputSegments.empty()) + continue; + StringRef Comdat = InputSegments[0]->getComdatName(); +#ifndef NDEBUG + for (const InputSegment *IS : InputSegments) + assert(IS->getComdatName() == Comdat); +#endif + if (!Comdat.empty()) + Comdats[Comdat].emplace_back(ComdatEntry{WASM_COMDAT_DATA, I}); + } + + if (!Comdats.empty()) { + SubSection Sub(WASM_COMDAT_INFO); + writeUleb128(Sub.OS, Comdats.size(), "num comdats"); + for (const auto &C : Comdats) { + writeStr(Sub.OS, C.first, "comdat name"); + writeUleb128(Sub.OS, 0, "comdat flags"); // flags for future use + writeUleb128(Sub.OS, C.second.size(), "num entries"); + for (const ComdatEntry &Entry : C.second) { + writeU8(Sub.OS, Entry.Kind, "entry kind"); + writeUleb128(Sub.OS, Entry.Index, "entry index"); + } } - SubSection.finalizeContents(); - SubSection.writeToStream(OS); + Sub.writeTo(OS); } } // Create the custom "name" section containing debug symbol names. void Writer::createNameSection() { - // Create an array of all function sorted by function index space - std::vector<const Symbol *> Names; + unsigned NumNames = NumImportedFunctions; + for (const InputFunction *F : InputFunctions) + if (!F->getName().empty() || !F->getDebugName().empty()) + ++NumNames; - for (ObjFile *File : Symtab->ObjectFiles) { - Names.reserve(Names.size() + File->getSymbols().size()); - for (Symbol *S : File->getSymbols()) { - if (!S->isFunction() || S->isWeak() || S->WrittenToNameSec) - continue; - S->WrittenToNameSec = true; - Names.emplace_back(S); - } - } + if (NumNames == 0) + return; SyntheticSection *Section = createSyntheticSection(WASM_SEC_CUSTOM, "name"); - std::sort(Names.begin(), Names.end(), [](const Symbol *A, const Symbol *B) { - return A->getOutputIndex() < B->getOutputIndex(); - }); - - SubSection FunctionSubsection(WASM_NAMES_FUNCTION); - raw_ostream &OS = FunctionSubsection.getStream(); - writeUleb128(OS, Names.size(), "name count"); - - // We have to iterate through the inputs twice so that all the imports - // appear first before any of the local function names. - for (const Symbol *S : Names) { - writeUleb128(OS, S->getOutputIndex(), "func index"); - writeStr(OS, S->getName(), "symbol name"); + SubSection Sub(WASM_NAMES_FUNCTION); + writeUleb128(Sub.OS, NumNames, "name count"); + + // Names must appear in function index order. As it happens ImportedSymbols + // and InputFunctions are numbered in order with imported functions coming + // first. + for (const Symbol *S : ImportedSymbols) { + if (auto *F = dyn_cast<FunctionSymbol>(S)) { + writeUleb128(Sub.OS, F->getFunctionIndex(), "func index"); + Optional<std::string> Name = demangleItanium(F->getName()); + writeStr(Sub.OS, Name ? StringRef(*Name) : F->getName(), "symbol name"); + } + } + for (const InputFunction *F : InputFunctions) { + if (!F->getName().empty()) { + writeUleb128(Sub.OS, F->getFunctionIndex(), "func index"); + if (!F->getDebugName().empty()) { + writeStr(Sub.OS, F->getDebugName(), "symbol name"); + } else { + Optional<std::string> Name = demangleItanium(F->getName()); + writeStr(Sub.OS, Name ? StringRef(*Name) : F->getName(), "symbol name"); + } + } } - FunctionSubsection.finalizeContents(); - FunctionSubsection.writeToStream(Section->getStream()); + Sub.writeTo(Section->getStream()); } void Writer::writeHeader() { @@ -479,48 +578,98 @@ void Writer::writeSections() { // Fix the memory layout of the output binary. This assigns memory offsets // to each of the input data sections as well as the explicit stack region. +// The default memory layout is as follows, from low to high. +// +// - initialized data (starting at Config->GlobalBase) +// - BSS data (not currently implemented in llvm) +// - explicit stack (Config->ZStackSize) +// - heap start / unallocated +// +// The --stack-first option means that stack is placed before any static data. +// This can be useful since it means that stack overflow traps immediately rather +// than overwriting global data, but also increases code size since all static +// data loads and stores requires larger offsets. void Writer::layoutMemory() { + createOutputSegments(); + uint32_t MemoryPtr = 0; - if (!Config->Relocatable) { + + auto PlaceStack = [&]() { + if (Config->Relocatable) + return; + MemoryPtr = alignTo(MemoryPtr, kStackAlignment); + if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment)) + error("stack size must be " + Twine(kStackAlignment) + "-byte aligned"); + log("mem: stack size = " + Twine(Config->ZStackSize)); + log("mem: stack base = " + Twine(MemoryPtr)); + MemoryPtr += Config->ZStackSize; + WasmSym::StackPointer->Global->Global.InitExpr.Value.Int32 = MemoryPtr; + log("mem: stack top = " + Twine(MemoryPtr)); + }; + + if (Config->StackFirst) { + PlaceStack(); + } else { MemoryPtr = Config->GlobalBase; - debugPrint("mem: global base = %d\n", Config->GlobalBase); + log("mem: global base = " + Twine(Config->GlobalBase)); } - createOutputSegments(); + uint32_t DataStart = MemoryPtr; + + // Arbitrarily set __dso_handle handle to point to the start of the data + // segments. + if (WasmSym::DsoHandle) + WasmSym::DsoHandle->setVirtualAddress(DataStart); - // Static data comes first for (OutputSegment *Seg : Segments) { MemoryPtr = alignTo(MemoryPtr, Seg->Alignment); Seg->StartVA = MemoryPtr; - debugPrint("mem: %-10s offset=%-8d size=%-4d align=%d\n", - Seg->Name.str().c_str(), MemoryPtr, Seg->Size, Seg->Alignment); + log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", Seg->Name, + MemoryPtr, Seg->Size, Seg->Alignment)); MemoryPtr += Seg->Size; } - DataSize = MemoryPtr; - if (!Config->Relocatable) - DataSize -= Config->GlobalBase; - debugPrint("mem: static data = %d\n", DataSize); + // TODO: Add .bss space here. + if (WasmSym::DataEnd) + WasmSym::DataEnd->setVirtualAddress(MemoryPtr); + + log("mem: static data = " + Twine(MemoryPtr - DataStart)); - // Stack comes after static data + if (!Config->StackFirst) + PlaceStack(); + + // Set `__heap_base` to directly follow the end of the stack or global data. + // The fact that this comes last means that a malloc/brk implementation + // can grow the heap at runtime. if (!Config->Relocatable) { - MemoryPtr = alignTo(MemoryPtr, kStackAlignment); - if (Config->ZStackSize != alignTo(Config->ZStackSize, kStackAlignment)) - error("stack size must be " + Twine(kStackAlignment) + "-byte aligned"); - debugPrint("mem: stack size = %d\n", Config->ZStackSize); - debugPrint("mem: stack base = %d\n", MemoryPtr); - MemoryPtr += Config->ZStackSize; - Config->StackPointerSymbol->setVirtualAddress(MemoryPtr); - debugPrint("mem: stack top = %d\n", MemoryPtr); + WasmSym::HeapBase->setVirtualAddress(MemoryPtr); + log("mem: heap base = " + Twine(MemoryPtr)); } + if (Config->InitialMemory != 0) { + if (Config->InitialMemory != alignTo(Config->InitialMemory, WasmPageSize)) + error("initial memory must be " + Twine(WasmPageSize) + "-byte aligned"); + if (MemoryPtr > Config->InitialMemory) + error("initial memory too small, " + Twine(MemoryPtr) + " bytes needed"); + else + MemoryPtr = Config->InitialMemory; + } uint32_t MemSize = alignTo(MemoryPtr, WasmPageSize); NumMemoryPages = MemSize / WasmPageSize; - debugPrint("mem: total pages = %d\n", NumMemoryPages); + log("mem: total pages = " + Twine(NumMemoryPages)); + + if (Config->MaxMemory != 0) { + if (Config->MaxMemory != alignTo(Config->MaxMemory, WasmPageSize)) + error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned"); + if (MemoryPtr > Config->MaxMemory) + error("maximum memory too small, " + Twine(MemoryPtr) + " bytes needed"); + MaxMemoryPages = Config->MaxMemory / WasmPageSize; + log("mem: max pages = " + Twine(MaxMemoryPages)); + } } SyntheticSection *Writer::createSyntheticSection(uint32_t Type, - std::string Name) { + StringRef Name) { auto Sec = make<SyntheticSection>(Type, Name); log("createSection: " + toString(*Sec)); OutputSections.push_back(Sec); @@ -536,15 +685,16 @@ void Writer::createSections() { createMemorySection(); createGlobalSection(); createExportSection(); - createStartSection(); createElemSection(); createCodeSection(); createDataSection(); + createCustomSections(); // Custom sections - if (Config->EmitRelocs) + if (Config->Relocatable) { + createLinkingSection(); createRelocSections(); - createLinkingSection(); + } if (!Config->StripDebug && !Config->StripAll) createNameSection(); @@ -555,149 +705,336 @@ void Writer::createSections() { } } -void Writer::calculateOffsets() { - for (ObjFile *File : Symtab->ObjectFiles) { - const WasmObjectFile *WasmFile = File->getWasmObj(); - - // Function Index - File->FunctionIndexOffset = - FunctionImports.size() - File->NumFunctionImports() + NumFunctions; - NumFunctions += WasmFile->functions().size(); +void Writer::calculateImports() { + for (Symbol *Sym : Symtab->getSymbols()) { + if (!Sym->isUndefined()) + continue; + if (isa<DataSymbol>(Sym)) + continue; + if (Sym->isWeak() && !Config->Relocatable) + continue; + if (!Sym->isLive()) + continue; + if (!Sym->IsUsedInRegularObj) + continue; - // Memory - if (WasmFile->memories().size() > 1) - fatal(File->getName() + ": contains more than one memory"); + LLVM_DEBUG(dbgs() << "import: " << Sym->getName() << "\n"); + ImportedSymbols.emplace_back(Sym); + if (auto *F = dyn_cast<FunctionSymbol>(Sym)) + F->setFunctionIndex(NumImportedFunctions++); + else + cast<GlobalSymbol>(Sym)->setGlobalIndex(NumImportedGlobals++); } } -void Writer::calculateImports() { +void Writer::calculateExports() { + if (Config->Relocatable) + return; + + if (!Config->Relocatable && !Config->ImportMemory) + Exports.push_back(WasmExport{"memory", WASM_EXTERNAL_MEMORY, 0}); + + if (!Config->Relocatable && Config->ExportTable) + Exports.push_back(WasmExport{kFunctionTableName, WASM_EXTERNAL_TABLE, 0}); + + unsigned FakeGlobalIndex = NumImportedGlobals + InputGlobals.size(); + for (Symbol *Sym : Symtab->getSymbols()) { - if (!Sym->isUndefined() || Sym->isWeak()) + if (!Sym->isExported()) + continue; + if (!Sym->isLive()) continue; - if (Sym->isFunction()) { - Sym->setOutputIndex(FunctionImports.size()); - FunctionImports.push_back(Sym); + StringRef Name = Sym->getName(); + WasmExport Export; + if (auto *F = dyn_cast<DefinedFunction>(Sym)) { + Export = {Name, WASM_EXTERNAL_FUNCTION, F->getFunctionIndex()}; + } else if (auto *G = dyn_cast<DefinedGlobal>(Sym)) { + // TODO(sbc): Remove this check once to mutable global proposal is + // implement in all major browsers. + // See: https://github.com/WebAssembly/mutable-global + if (G->getGlobalType()->Mutable) { + // Only the __stack_pointer should ever be create as mutable. + assert(G == WasmSym::StackPointer); + continue; + } + Export = {Name, WASM_EXTERNAL_GLOBAL, G->getGlobalIndex()}; } else { - Sym->setOutputIndex(GlobalImports.size()); - GlobalImports.push_back(Sym); + auto *D = cast<DefinedData>(Sym); + DefinedFakeGlobals.emplace_back(D); + Export = {Name, WASM_EXTERNAL_GLOBAL, FakeGlobalIndex++}; } + + LLVM_DEBUG(dbgs() << "Export: " << Name << "\n"); + Exports.push_back(Export); + } +} + +void Writer::assignSymtab() { + if (!Config->Relocatable) + return; + + StringMap<uint32_t> SectionSymbolIndices; + + unsigned SymbolIndex = SymtabEntries.size(); + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Symtab entries: " << File->getName() << "\n"); + for (Symbol *Sym : File->getSymbols()) { + if (Sym->getFile() != File) + continue; + + if (auto *S = dyn_cast<SectionSymbol>(Sym)) { + StringRef Name = S->getName(); + if (CustomSectionMapping.count(Name) == 0) + continue; + + auto SSI = SectionSymbolIndices.find(Name); + if (SSI != SectionSymbolIndices.end()) { + Sym->setOutputSymbolIndex(SSI->second); + continue; + } + + SectionSymbolIndices[Name] = SymbolIndex; + CustomSectionSymbols[Name] = cast<SectionSymbol>(Sym); + + Sym->markLive(); + } + + // (Since this is relocatable output, GC is not performed so symbols must + // be live.) + assert(Sym->isLive()); + Sym->setOutputSymbolIndex(SymbolIndex++); + SymtabEntries.emplace_back(Sym); + } + } + + // For the moment, relocatable output doesn't contain any synthetic functions, + // so no need to look through the Symtab for symbols not referenced by + // Symtab->ObjectFiles. +} + +uint32_t Writer::lookupType(const WasmSignature &Sig) { + auto It = TypeIndices.find(Sig); + if (It == TypeIndices.end()) { + error("type not found: " + toString(Sig)); + return 0; } + return It->second; } -uint32_t Writer::getTypeIndex(const WasmSignature &Sig) { +uint32_t Writer::registerType(const WasmSignature &Sig) { auto Pair = TypeIndices.insert(std::make_pair(Sig, Types.size())); - if (Pair.second) + if (Pair.second) { + LLVM_DEBUG(dbgs() << "type " << toString(Sig) << "\n"); Types.push_back(&Sig); + } return Pair.first->second; } void Writer::calculateTypes() { + // The output type section is the union of the following sets: + // 1. Any signature used in the TYPE relocation + // 2. The signatures of all imported functions + // 3. The signatures of all defined functions + for (ObjFile *File : Symtab->ObjectFiles) { - File->TypeMap.reserve(File->getWasmObj()->types().size()); - for (const WasmSignature &Sig : File->getWasmObj()->types()) - File->TypeMap.push_back(getTypeIndex(Sig)); + ArrayRef<WasmSignature> Types = File->getWasmObj()->types(); + for (uint32_t I = 0; I < Types.size(); I++) + if (File->TypeIsUsed[I]) + File->TypeMap[I] = registerType(Types[I]); } -} -void Writer::assignSymbolIndexes() { - uint32_t GlobalIndex = GlobalImports.size(); + for (const Symbol *Sym : ImportedSymbols) + if (auto *F = dyn_cast<FunctionSymbol>(Sym)) + registerType(*F->FunctionType); - if (Config->StackPointerSymbol) { - DefinedGlobals.emplace_back(Config->StackPointerSymbol); - Config->StackPointerSymbol->setOutputIndex(GlobalIndex++); - } + for (const InputFunction *F : InputFunctions) + registerType(F->Signature); +} - if (Config->EmitRelocs) - DefinedGlobals.reserve(Symtab->getSymbols().size()); +void Writer::assignIndexes() { + uint32_t FunctionIndex = NumImportedFunctions + InputFunctions.size(); + auto AddDefinedFunction = [&](InputFunction *Func) { + if (!Func->Live) + return; + InputFunctions.emplace_back(Func); + Func->setFunctionIndex(FunctionIndex++); + }; - uint32_t TableIndex = InitialTableOffset; + for (InputFunction *Func : Symtab->SyntheticFunctions) + AddDefinedFunction(Func); for (ObjFile *File : Symtab->ObjectFiles) { - DEBUG(dbgs() << "assignSymbolIndexes: " << File->getName() << "\n"); - - for (Symbol *Sym : File->getSymbols()) { - // Assign indexes for symbols defined with this file. - if (!Sym->isDefined() || File != Sym->getFile()) - continue; - if (Sym->isFunction()) { - auto *Obj = cast<ObjFile>(Sym->getFile()); - Sym->setOutputIndex(Obj->FunctionIndexOffset + - Sym->getFunctionIndex()); - } else if (Config->EmitRelocs) { - DefinedGlobals.emplace_back(Sym); - Sym->setOutputIndex(GlobalIndex++); - } - } + LLVM_DEBUG(dbgs() << "Functions: " << File->getName() << "\n"); + for (InputFunction *Func : File->Functions) + AddDefinedFunction(Func); + } - for (Symbol *Sym : File->getTableSymbols()) { - if (!Sym->hasTableIndex()) { + uint32_t TableIndex = kInitialTableOffset; + auto HandleRelocs = [&](InputChunk *Chunk) { + if (!Chunk->Live) + return; + ObjFile *File = Chunk->File; + ArrayRef<WasmSignature> Types = File->getWasmObj()->types(); + for (const WasmRelocation &Reloc : Chunk->getRelocations()) { + if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32 || + Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB) { + FunctionSymbol *Sym = File->getFunctionSymbol(Reloc.Index); + if (Sym->hasTableIndex() || !Sym->hasFunctionIndex()) + continue; Sym->setTableIndex(TableIndex++); IndirectFunctions.emplace_back(Sym); + } else if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB) { + // Mark target type as live + File->TypeMap[Reloc.Index] = registerType(Types[Reloc.Index]); + File->TypeIsUsed[Reloc.Index] = true; } } + }; + + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Handle relocs: " << File->getName() << "\n"); + for (InputChunk *Chunk : File->Functions) + HandleRelocs(Chunk); + for (InputChunk *Chunk : File->Segments) + HandleRelocs(Chunk); + for (auto &P : File->CustomSections) + HandleRelocs(P); + } + + uint32_t GlobalIndex = NumImportedGlobals + InputGlobals.size(); + auto AddDefinedGlobal = [&](InputGlobal *Global) { + if (Global->Live) { + LLVM_DEBUG(dbgs() << "AddDefinedGlobal: " << GlobalIndex << "\n"); + Global->setGlobalIndex(GlobalIndex++); + InputGlobals.push_back(Global); + } + }; + + for (InputGlobal *Global : Symtab->SyntheticGlobals) + AddDefinedGlobal(Global); + + for (ObjFile *File : Symtab->ObjectFiles) { + LLVM_DEBUG(dbgs() << "Globals: " << File->getName() << "\n"); + for (InputGlobal *Global : File->Globals) + AddDefinedGlobal(Global); } } static StringRef getOutputDataSegmentName(StringRef Name) { - if (Config->Relocatable) + if (!Config->MergeDataSegments) return Name; - - for (StringRef V : - {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", - ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", - ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) { - StringRef Prefix = V.drop_back(); - if (Name.startswith(V) || Name == Prefix) - return Prefix; - } - + if (Name.startswith(".text.")) + return ".text"; + if (Name.startswith(".data.")) + return ".data"; + if (Name.startswith(".bss.")) + return ".bss"; return Name; } void Writer::createOutputSegments() { for (ObjFile *File : Symtab->ObjectFiles) { for (InputSegment *Segment : File->Segments) { + if (!Segment->Live) + continue; StringRef Name = getOutputDataSegmentName(Segment->getName()); OutputSegment *&S = SegmentMap[Name]; if (S == nullptr) { - DEBUG(dbgs() << "new segment: " << Name << "\n"); - S = make<OutputSegment>(Name); + LLVM_DEBUG(dbgs() << "new segment: " << Name << "\n"); + S = make<OutputSegment>(Name, Segments.size()); Segments.push_back(S); } S->addInputSegment(Segment); - DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); + LLVM_DEBUG(dbgs() << "added data: " << Name << ": " << S->Size << "\n"); } } } +static const int OPCODE_CALL = 0x10; +static const int OPCODE_END = 0xb; + +// Create synthetic "__wasm_call_ctors" function based on ctor functions +// in input object. +void Writer::createCtorFunction() { + // First write the body's contents to a string. + std::string BodyContent; + { + raw_string_ostream OS(BodyContent); + writeUleb128(OS, 0, "num locals"); + for (const WasmInitEntry &F : InitFunctions) { + writeU8(OS, OPCODE_CALL, "CALL"); + writeUleb128(OS, F.Sym->getFunctionIndex(), "function index"); + } + writeU8(OS, OPCODE_END, "END"); + } + + // Once we know the size of the body we can create the final function body + std::string FunctionBody; + { + raw_string_ostream OS(FunctionBody); + writeUleb128(OS, BodyContent.size(), "function size"); + OS << BodyContent; + } + + ArrayRef<uint8_t> Body = toArrayRef(Saver.save(FunctionBody)); + cast<SyntheticFunction>(WasmSym::CallCtors->Function)->setBody(Body); +} + +// Populate InitFunctions vector with init functions from all input objects. +// This is then used either when creating the output linking section or to +// synthesize the "__wasm_call_ctors" function. +void Writer::calculateInitFunctions() { + for (ObjFile *File : Symtab->ObjectFiles) { + const WasmLinkingData &L = File->getWasmObj()->linkingData(); + for (const WasmInitFunc &F : L.InitFunctions) { + FunctionSymbol *Sym = File->getFunctionSymbol(F.Symbol); + if (*Sym->FunctionType != WasmSignature{{}, WASM_TYPE_NORESULT}) + error("invalid signature for init func: " + toString(*Sym)); + InitFunctions.emplace_back(WasmInitEntry{Sym, F.Priority}); + } + } + + // Sort in order of priority (lowest first) so that they are called + // in the correct order. + std::stable_sort(InitFunctions.begin(), InitFunctions.end(), + [](const WasmInitEntry &L, const WasmInitEntry &R) { + return L.Priority < R.Priority; + }); +} + void Writer::run() { - if (!Config->Relocatable) - InitialTableOffset = 1; + if (Config->Relocatable) + Config->GlobalBase = 0; - log("-- calculateTypes"); - calculateTypes(); log("-- calculateImports"); calculateImports(); - log("-- calculateOffsets"); - calculateOffsets(); + log("-- assignIndexes"); + assignIndexes(); + log("-- calculateInitFunctions"); + calculateInitFunctions(); + if (!Config->Relocatable) + createCtorFunction(); + log("-- calculateTypes"); + calculateTypes(); + log("-- layoutMemory"); + layoutMemory(); + log("-- calculateExports"); + calculateExports(); + log("-- calculateCustomSections"); + calculateCustomSections(); + log("-- assignSymtab"); + assignSymtab(); if (errorHandler().Verbose) { - log("Defined Functions: " + Twine(NumFunctions)); - log("Defined Globals : " + Twine(DefinedGlobals.size())); - log("Function Imports : " + Twine(FunctionImports.size())); - log("Global Imports : " + Twine(GlobalImports.size())); - log("Total Imports : " + - Twine(FunctionImports.size() + GlobalImports.size())); + log("Defined Functions: " + Twine(InputFunctions.size())); + log("Defined Globals : " + Twine(InputGlobals.size())); + log("Function Imports : " + Twine(NumImportedFunctions)); + log("Global Imports : " + Twine(NumImportedGlobals)); for (ObjFile *File : Symtab->ObjectFiles) File->dumpInfo(); } - log("-- assignSymbolIndexes"); - assignSymbolIndexes(); - log("-- layoutMemory"); - layoutMemory(); - createHeader(); log("-- createSections"); createSections(); @@ -721,7 +1058,6 @@ void Writer::run() { // Open a result file. void Writer::openFile() { log("writing: " + Config->OutputFile); - ::remove(Config->OutputFile.str().c_str()); Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = FileOutputBuffer::create(Config->OutputFile, FileSize, |
