diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp')
-rw-r--r-- | lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp new file mode 100644 index 0000000000000..91150fa02ebca --- /dev/null +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -0,0 +1,466 @@ +//===-- ObjectFileWasm.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFileWasm.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/SectionLoadList.h" +#include "lldb/Target/Target.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/Log.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Format.h" + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::wasm; + +LLDB_PLUGIN_DEFINE(ObjectFileWasm) + +static const uint32_t kWasmHeaderSize = + sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); + +/// Checks whether the data buffer starts with a valid Wasm module header. +static bool ValidateModuleHeader(const DataBufferSP &data_sp) { + if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) + return false; + + if (llvm::identify_magic(toStringRef(data_sp->GetData())) != + llvm::file_magic::wasm_object) + return false; + + uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); + + uint32_t version = llvm::support::endian::read32le(Ptr); + return version == llvm::wasm::WasmVersion; +} + +static llvm::Optional<ConstString> +GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { + // A Wasm string is encoded as a vector of UTF-8 codes. + // Vectors are encoded with their u32 length followed by the element + // sequence. + uint64_t len = data.getULEB128(c); + if (!c) { + consumeError(c.takeError()); + return llvm::None; + } + + if (len >= (uint64_t(1) << 32)) { + return llvm::None; + } + + llvm::SmallVector<uint8_t, 32> str_storage; + data.getU8(c, str_storage, len); + if (!c) { + consumeError(c.takeError()); + return llvm::None; + } + + llvm::StringRef str = toStringRef(makeArrayRef(str_storage)); + return ConstString(str); +} + +char ObjectFileWasm::ID; + +void ObjectFileWasm::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFileWasm::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ConstString ObjectFileWasm::GetPluginNameStatic() { + static ConstString g_name("wasm"); + return g_name; +} + +ObjectFile * +ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp, + offset_t data_offset, const FileSpec *file, + offset_t file_offset, offset_t length) { + Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); + + if (!data_sp) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) { + LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s", + file->GetPath().c_str()); + return nullptr; + } + data_offset = 0; + } + + assert(data_sp); + if (!ValidateModuleHeader(data_sp)) { + LLDB_LOGF(log, + "Failed to create ObjectFileWasm instance: invalid Wasm header"); + return nullptr; + } + + // Update the data to contain the entire file if it doesn't contain it + // already. + if (data_sp->GetByteSize() < length) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) { + LLDB_LOGF(log, + "Failed to create ObjectFileWasm instance: cannot read file %s", + file->GetPath().c_str()); + return nullptr; + } + data_offset = 0; + } + + std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( + module_sp, data_sp, data_offset, file, file_offset, length)); + ArchSpec spec = objfile_up->GetArchitecture(); + if (spec && objfile_up->SetModulesArchitecture(spec)) { + LLDB_LOGF(log, + "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s", + static_cast<void *>(objfile_up.get()), + static_cast<void *>(objfile_up->GetModule().get()), + objfile_up->GetModule()->GetSpecificationDescription().c_str(), + file ? file->GetPath().c_str() : "<NULL>"); + return objfile_up.release(); + } + + LLDB_LOGF(log, "Failed to create ObjectFileWasm instance"); + return nullptr; +} + +ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, + DataBufferSP &data_sp, + const ProcessSP &process_sp, + addr_t header_addr) { + if (!ValidateModuleHeader(data_sp)) + return nullptr; + + std::unique_ptr<ObjectFileWasm> objfile_up( + new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); + ArchSpec spec = objfile_up->GetArchitecture(); + if (spec && objfile_up->SetModulesArchitecture(spec)) + return objfile_up.release(); + return nullptr; +} + +bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { + // Buffer sufficient to read a section header and find the pointer to the next + // section. + const uint32_t kBufferSize = 1024; + DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize); + + llvm::DataExtractor data = section_header_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + + // Each section consists of: + // - a one-byte section id, + // - the u32 size of the contents, in bytes, + // - the actual contents. + uint8_t section_id = data.getU8(c); + uint64_t payload_len = data.getULEB128(c); + if (!c) + return !llvm::errorToBool(c.takeError()); + + if (payload_len >= (uint64_t(1) << 32)) + return false; + + if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { + // Custom sections have the id 0. Their contents consist of a name + // identifying the custom section, followed by an uninterpreted sequence + // of bytes. + lldb::offset_t prev_offset = c.tell(); + llvm::Optional<ConstString> sect_name = GetWasmString(data, c); + if (!sect_name) + return false; + + if (payload_len < c.tell() - prev_offset) + return false; + + uint32_t section_length = payload_len - (c.tell() - prev_offset); + m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, + section_id, *sect_name}); + *offset_ptr += (c.tell() + section_length); + } else if (section_id <= llvm::wasm::WASM_SEC_EVENT) { + m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), + static_cast<uint32_t>(payload_len), + section_id, ConstString()}); + *offset_ptr += (c.tell() + payload_len); + } else { + // Invalid section id. + return false; + } + return true; +} + +bool ObjectFileWasm::DecodeSections() { + lldb::offset_t offset = kWasmHeaderSize; + if (IsInMemory()) { + offset += m_memory_addr; + } + + while (DecodeNextSection(&offset)) + ; + return true; +} + +size_t ObjectFileWasm::GetModuleSpecifications( + const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, + offset_t file_offset, offset_t length, ModuleSpecList &specs) { + if (!ValidateModuleHeader(data_sp)) { + return 0; + } + + ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm")); + specs.Append(spec); + return 1; +} + +ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP &data_sp, + offset_t data_offset, const FileSpec *file, + offset_t offset, offset_t length) + : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), + m_arch("wasm32-unknown-unknown-wasm") { + m_data.SetAddressByteSize(4); +} + +ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, + lldb::DataBufferSP &header_data_sp, + const lldb::ProcessSP &process_sp, + lldb::addr_t header_addr) + : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), + m_arch("wasm32-unknown-unknown-wasm") {} + +bool ObjectFileWasm::ParseHeader() { + // We already parsed the header during initialization. + return true; +} + +Symtab *ObjectFileWasm::GetSymtab() { return nullptr; } + +void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { + if (m_sections_up) + return; + + m_sections_up = std::make_unique<SectionList>(); + + if (m_sect_infos.empty()) { + DecodeSections(); + } + + for (const section_info §_info : m_sect_infos) { + SectionType section_type = eSectionTypeOther; + ConstString section_name; + offset_t file_offset = sect_info.offset & 0xffffffff; + addr_t vm_addr = file_offset; + size_t vm_size = sect_info.size; + + if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { + section_type = eSectionTypeCode; + section_name = ConstString("code"); + + // A code address in DWARF for WebAssembly is the offset of an + // instruction relative within the Code section of the WebAssembly file. + // For this reason Section::GetFileAddress() must return zero for the + // Code section. + vm_addr = 0; + } else { + section_type = + llvm::StringSwitch<SectionType>(sect_info.name.GetStringRef()) + .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev) + .Case(".debug_addr", eSectionTypeDWARFDebugAddr) + .Case(".debug_aranges", eSectionTypeDWARFDebugAranges) + .Case(".debug_cu_index", eSectionTypeDWARFDebugCuIndex) + .Case(".debug_frame", eSectionTypeDWARFDebugFrame) + .Case(".debug_info", eSectionTypeDWARFDebugInfo) + .Case(".debug_line", eSectionTypeDWARFDebugLine) + .Case(".debug_line_str", eSectionTypeDWARFDebugLineStr) + .Case(".debug_loc", eSectionTypeDWARFDebugLoc) + .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists) + .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo) + .Case(".debug_macro", eSectionTypeDWARFDebugMacro) + .Case(".debug_names", eSectionTypeDWARFDebugNames) + .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames) + .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes) + .Case(".debug_ranges", eSectionTypeDWARFDebugRanges) + .Case(".debug_rnglists", eSectionTypeDWARFDebugRngLists) + .Case(".debug_str", eSectionTypeDWARFDebugStr) + .Case(".debug_str_offsets", eSectionTypeDWARFDebugStrOffsets) + .Case(".debug_types", eSectionTypeDWARFDebugTypes) + .Default(eSectionTypeOther); + if (section_type == eSectionTypeOther) + continue; + section_name = sect_info.name; + if (!IsInMemory()) { + vm_size = 0; + vm_addr = 0; + } + } + + SectionSP section_sp( + new Section(GetModule(), // Module to which this section belongs. + this, // ObjectFile to which this section belongs and + // should read section data from. + section_type, // Section ID. + section_name, // Section name. + section_type, // Section type. + vm_addr, // VM address. + vm_size, // VM size in bytes of this section. + file_offset, // Offset of this section in the file. + sect_info.size, // Size of the section as found in the file. + 0, // Alignment of the section + 0, // Flags for this section. + 1)); // Number of host bytes per target byte + m_sections_up->AddSection(section_sp); + unified_section_list.AddSection(section_sp); + } +} + +bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, + bool value_is_offset) { + /// In WebAssembly, linear memory is disjointed from code space. The VM can + /// load multiple instances of a module, which logically share the same code. + /// We represent a wasm32 code address with 64-bits, like: + /// 63 32 31 0 + /// +---------------+---------------+ + /// + module_id | offset | + /// +---------------+---------------+ + /// where the lower 32 bits represent a module offset (relative to the module + /// start not to the beginning of the code section) and the higher 32 bits + /// uniquely identify the module in the WebAssembly VM. + /// In other words, we assume that each WebAssembly module is loaded by the + /// engine at a 64-bit address that starts at the boundary of 4GB pages, like + /// 0x0000000400000000 for module_id == 4. + /// These 64-bit addresses will be used to request code ranges for a specific + /// module from the WebAssembly engine. + + assert(m_memory_addr == LLDB_INVALID_ADDRESS || + m_memory_addr == load_address); + + ModuleSP module_sp = GetModule(); + if (!module_sp) + return false; + + DecodeSections(); + + size_t num_loaded_sections = 0; + SectionList *section_list = GetSectionList(); + if (!section_list) + return false; + + const size_t num_sections = section_list->GetSize(); + for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { + SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); + if (target.SetSectionLoadAddress( + section_sp, load_address | section_sp->GetFileOffset())) { + ++num_loaded_sections; + } + } + + return num_loaded_sections > 0; +} + +DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { + DataExtractor data; + if (m_file) { + if (offset < GetByteSize()) { + size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset); + auto buffer_sp = MapFileData(m_file, size, offset); + return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); + } + } else { + ProcessSP process_sp(m_process_wp.lock()); + if (process_sp) { + auto data_up = std::make_unique<DataBufferHeap>(size, 0); + Status readmem_error; + size_t bytes_read = process_sp->ReadMemory( + offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error); + if (bytes_read > 0) { + DataBufferSP buffer_sp(data_up.release()); + data.SetData(buffer_sp, 0, buffer_sp->GetByteSize()); + } + } + } + + data.SetByteOrder(GetByteOrder()); + return data; +} + +llvm::Optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { + static ConstString g_sect_name_external_debug_info("external_debug_info"); + + for (const section_info §_info : m_sect_infos) { + if (g_sect_name_external_debug_info == sect_info.name) { + const uint32_t kBufferSize = 1024; + DataExtractor section_header_data = + ReadImageData(sect_info.offset, kBufferSize); + llvm::DataExtractor data = section_header_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + llvm::Optional<ConstString> symbols_url = GetWasmString(data, c); + if (symbols_url) + return FileSpec(symbols_url->GetStringRef()); + } + } + return llvm::None; +} + +void ObjectFileWasm::Dump(Stream *s) { + ModuleSP module_sp(GetModule()); + if (!module_sp) + return; + + std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); + + llvm::raw_ostream &ostream = s->AsRawOstream(); + ostream << static_cast<void *>(this) << ": "; + s->Indent(); + ostream << "ObjectFileWasm, file = '"; + m_file.Dump(ostream); + ostream << "', arch = "; + ostream << GetArchitecture().GetArchitectureName() << "\n"; + + SectionList *sections = GetSectionList(); + if (sections) { + sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, + UINT32_MAX); + } + ostream << "\n"; + DumpSectionHeaders(ostream); + ostream << "\n"; +} + +void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, + const section_info_t &sh) { + ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " + << llvm::format_hex(sh.offset, 10) << " " + << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) + << "\n"; +} + +void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) { + ostream << "Section Headers\n"; + ostream << "IDX name addr size id\n"; + ostream << "==== ---------------- ---------- ---------- ------\n"; + + uint32_t idx = 0; + for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); + ++pos, ++idx) { + ostream << "[" << llvm::format_decimal(idx, 2) << "] "; + ObjectFileWasm::DumpSectionHeader(ostream, *pos); + } +} |