//===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // Project includes #include "MinidumpParser.h" #include "NtStructures.h" #include "RegisterContextMinidump_x86_32.h" // Other libraries and framework includes #include "lldb/Target/MemoryRegionInfo.h" #include "lldb/Utility/LLDBAssert.h" // C includes // C++ includes #include #include #include using namespace lldb_private; using namespace minidump; llvm::Optional MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) { if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) { return llvm::None; } return MinidumpParser(data_buf_sp); } MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp) : m_data_sp(data_buf_sp) {} llvm::ArrayRef MinidumpParser::GetData() { return llvm::ArrayRef(m_data_sp->GetBytes(), m_data_sp->GetByteSize()); } llvm::ArrayRef MinidumpParser::GetStream(MinidumpStreamType stream_type) { auto iter = m_directory_map.find(static_cast(stream_type)); if (iter == m_directory_map.end()) return {}; // check if there is enough data if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize()) return {}; return llvm::ArrayRef(m_data_sp->GetBytes() + iter->second.rva, iter->second.data_size); } llvm::Optional MinidumpParser::GetMinidumpString(uint32_t rva) { auto arr_ref = m_data_sp->GetData(); if (rva > arr_ref.size()) return llvm::None; arr_ref = arr_ref.drop_front(rva); return parseMinidumpString(arr_ref); } UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) { auto cv_record = GetData().slice(module->CV_record.rva, module->CV_record.data_size); // Read the CV record signature const llvm::support::ulittle32_t *signature = nullptr; Status error = consumeObject(cv_record, signature); if (error.Fail()) return UUID(); const CvSignature cv_signature = static_cast(static_cast(*signature)); if (cv_signature == CvSignature::Pdb70) { // PDB70 record const CvRecordPdb70 *pdb70_uuid = nullptr; Status error = consumeObject(cv_record, pdb70_uuid); if (!error.Fail()) return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid)); } else if (cv_signature == CvSignature::ElfBuildId) return UUID::fromData(cv_record); return UUID(); } llvm::ArrayRef MinidumpParser::GetThreads() { llvm::ArrayRef data = GetStream(MinidumpStreamType::ThreadList); if (data.size() == 0) return llvm::None; return MinidumpThread::ParseThreadList(data); } llvm::ArrayRef MinidumpParser::GetThreadContext(const MinidumpThread &td) { if (td.thread_context.rva + td.thread_context.data_size > GetData().size()) return {}; return GetData().slice(td.thread_context.rva, td.thread_context.data_size); } llvm::ArrayRef MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) { // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If // the minidump was captured with a 64-bit debugger, then the CONTEXT we just // grabbed from the mini_dump_thread is the one for the 64-bit "native" // process rather than the 32-bit "guest" process we care about. In this // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment // Block) of the 64-bit process. auto teb_mem = GetMemory(td.teb, sizeof(TEB64)); if (teb_mem.empty()) return {}; const TEB64 *wow64teb; Status error = consumeObject(teb_mem, wow64teb); if (error.Fail()) return {}; // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure // that includes the 32-bit CONTEXT (after a ULONG). See: // https://msdn.microsoft.com/en-us/library/ms681670.aspx auto context = GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32)); if (context.size() < sizeof(MinidumpContext_x86_32)) return {}; return context; // NOTE: We don't currently use the TEB for anything else. If we // need it in the future, the 32-bit TEB is located according to the address // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]). } const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() { llvm::ArrayRef data = GetStream(MinidumpStreamType::SystemInfo); if (data.size() == 0) return nullptr; return MinidumpSystemInfo::Parse(data); } ArchSpec MinidumpParser::GetArchitecture() { ArchSpec arch_spec; const MinidumpSystemInfo *system_info = GetSystemInfo(); if (!system_info) return arch_spec; // TODO what to do about big endiand flavors of arm ? // TODO set the arm subarch stuff if the minidump has info about it llvm::Triple triple; triple.setVendor(llvm::Triple::VendorType::UnknownVendor); const MinidumpCPUArchitecture arch = static_cast( static_cast(system_info->processor_arch)); switch (arch) { case MinidumpCPUArchitecture::X86: triple.setArch(llvm::Triple::ArchType::x86); break; case MinidumpCPUArchitecture::AMD64: triple.setArch(llvm::Triple::ArchType::x86_64); break; case MinidumpCPUArchitecture::ARM: triple.setArch(llvm::Triple::ArchType::arm); break; case MinidumpCPUArchitecture::ARM64: triple.setArch(llvm::Triple::ArchType::aarch64); break; default: triple.setArch(llvm::Triple::ArchType::UnknownArch); break; } const MinidumpOSPlatform os = static_cast( static_cast(system_info->platform_id)); // TODO add all of the OSes that Minidump/breakpad distinguishes? switch (os) { case MinidumpOSPlatform::Win32S: case MinidumpOSPlatform::Win32Windows: case MinidumpOSPlatform::Win32NT: case MinidumpOSPlatform::Win32CE: triple.setOS(llvm::Triple::OSType::Win32); break; case MinidumpOSPlatform::Linux: triple.setOS(llvm::Triple::OSType::Linux); break; case MinidumpOSPlatform::MacOSX: triple.setOS(llvm::Triple::OSType::MacOSX); break; case MinidumpOSPlatform::Android: triple.setOS(llvm::Triple::OSType::Linux); triple.setEnvironment(llvm::Triple::EnvironmentType::Android); break; default: triple.setOS(llvm::Triple::OSType::UnknownOS); break; } arch_spec.SetTriple(triple); return arch_spec; } const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() { llvm::ArrayRef data = GetStream(MinidumpStreamType::MiscInfo); if (data.size() == 0) return nullptr; return MinidumpMiscInfo::Parse(data); } llvm::Optional MinidumpParser::GetLinuxProcStatus() { llvm::ArrayRef data = GetStream(MinidumpStreamType::LinuxProcStatus); if (data.size() == 0) return llvm::None; return LinuxProcStatus::Parse(data); } llvm::Optional MinidumpParser::GetPid() { const MinidumpMiscInfo *misc_info = GetMiscInfo(); if (misc_info != nullptr) { return misc_info->GetPid(); } llvm::Optional proc_status = GetLinuxProcStatus(); if (proc_status.hasValue()) { return proc_status->GetPid(); } return llvm::None; } llvm::ArrayRef MinidumpParser::GetModuleList() { llvm::ArrayRef data = GetStream(MinidumpStreamType::ModuleList); if (data.size() == 0) return {}; return MinidumpModule::ParseModuleList(data); } std::vector MinidumpParser::GetFilteredModuleList() { llvm::ArrayRef modules = GetModuleList(); // map module_name -> pair(load_address, pointer to module struct in memory) llvm::StringMap> lowest_addr; std::vector filtered_modules; llvm::Optional name; std::string module_name; for (const auto &module : modules) { name = GetMinidumpString(module.module_name_rva); if (!name) continue; module_name = name.getValue(); auto iter = lowest_addr.end(); bool exists; std::tie(iter, exists) = lowest_addr.try_emplace( module_name, std::make_pair(module.base_of_image, &module)); if (exists && module.base_of_image < iter->second.first) iter->second = std::make_pair(module.base_of_image, &module); } filtered_modules.reserve(lowest_addr.size()); for (const auto &module : lowest_addr) { filtered_modules.push_back(module.second.second); } return filtered_modules; } const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() { llvm::ArrayRef data = GetStream(MinidumpStreamType::Exception); if (data.size() == 0) return nullptr; return MinidumpExceptionStream::Parse(data); } llvm::Optional MinidumpParser::FindMemoryRange(lldb::addr_t addr) { llvm::ArrayRef data = GetStream(MinidumpStreamType::MemoryList); llvm::ArrayRef data64 = GetStream(MinidumpStreamType::Memory64List); if (data.empty() && data64.empty()) return llvm::None; if (!data.empty()) { llvm::ArrayRef memory_list = MinidumpMemoryDescriptor::ParseMemoryList(data); if (memory_list.empty()) return llvm::None; for (const auto &memory_desc : memory_list) { const MinidumpLocationDescriptor &loc_desc = memory_desc.memory; const lldb::addr_t range_start = memory_desc.start_of_memory_range; const size_t range_size = loc_desc.data_size; if (loc_desc.rva + loc_desc.data_size > GetData().size()) return llvm::None; if (range_start <= addr && addr < range_start + range_size) { return minidump::Range(range_start, GetData().slice(loc_desc.rva, range_size)); } } } // Some Minidumps have a Memory64ListStream that captures all the heap memory // (full-memory Minidumps). We can't exactly use the same loop as above, // because the Minidump uses slightly different data structures to describe // those if (!data64.empty()) { llvm::ArrayRef memory64_list; uint64_t base_rva; std::tie(memory64_list, base_rva) = MinidumpMemoryDescriptor64::ParseMemory64List(data64); if (memory64_list.empty()) return llvm::None; for (const auto &memory_desc64 : memory64_list) { const lldb::addr_t range_start = memory_desc64.start_of_memory_range; const size_t range_size = memory_desc64.data_size; if (base_rva + range_size > GetData().size()) return llvm::None; if (range_start <= addr && addr < range_start + range_size) { return minidump::Range(range_start, GetData().slice(base_rva, range_size)); } base_rva += range_size; } } return llvm::None; } llvm::ArrayRef MinidumpParser::GetMemory(lldb::addr_t addr, size_t size) { // I don't have a sense of how frequently this is called or how many memory // ranges a Minidump typically has, so I'm not sure if searching for the // appropriate range linearly each time is stupid. Perhaps we should build // an index for faster lookups. llvm::Optional range = FindMemoryRange(addr); if (!range) return {}; // There's at least some overlap between the beginning of the desired range // (addr) and the current range. Figure out where the overlap begins and how // much overlap there is. const size_t offset = addr - range->start; if (addr < range->start || offset >= range->range_ref.size()) return {}; const size_t overlap = std::min(size, range->range_ref.size() - offset); return range->range_ref.slice(offset, overlap); } llvm::Optional MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) { MemoryRegionInfo info; llvm::ArrayRef data = GetStream(MinidumpStreamType::MemoryInfoList); if (data.empty()) return llvm::None; std::vector mem_info_list = MinidumpMemoryInfo::ParseMemoryInfoList(data); if (mem_info_list.empty()) return llvm::None; const auto yes = MemoryRegionInfo::eYes; const auto no = MemoryRegionInfo::eNo; const MinidumpMemoryInfo *next_entry = nullptr; for (const auto &entry : mem_info_list) { const auto head = entry->base_address; const auto tail = head + entry->region_size; if (head <= load_addr && load_addr < tail) { info.GetRange().SetRangeBase( (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree)) ? head : load_addr); info.GetRange().SetRangeEnd(tail); const uint32_t PageNoAccess = static_cast(MinidumpMemoryProtectionContants::PageNoAccess); info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no); const uint32_t PageWritable = static_cast(MinidumpMemoryProtectionContants::PageWritable); info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no); const uint32_t PageExecutable = static_cast( MinidumpMemoryProtectionContants::PageExecutable); info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no); const uint32_t MemFree = static_cast(MinidumpMemoryInfoState::MemFree); info.SetMapped((entry->state != MemFree) ? yes : no); return info; } else if (head > load_addr && (next_entry == nullptr || head < next_entry->base_address)) { // In case there is no region containing load_addr keep track of the // nearest region after load_addr so we can return the distance to it. next_entry = entry; } } // No containing region found. Create an unmapped region that extends to the // next region or LLDB_INVALID_ADDRESS info.GetRange().SetRangeBase(load_addr); info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address : LLDB_INVALID_ADDRESS); info.SetReadable(no); info.SetWritable(no); info.SetExecutable(no); info.SetMapped(no); // Note that the memory info list doesn't seem to contain ranges in kernel // space, so if you're walking a stack that has kernel frames, the stack may // appear truncated. return info; } Status MinidumpParser::Initialize() { Status error; lldbassert(m_directory_map.empty()); llvm::ArrayRef header_data(m_data_sp->GetBytes(), sizeof(MinidumpHeader)); const MinidumpHeader *header = MinidumpHeader::Parse(header_data); if (header == nullptr) { error.SetErrorString("invalid minidump: can't parse the header"); return error; } // A minidump without at least one stream is clearly ill-formed if (header->streams_count == 0) { error.SetErrorString("invalid minidump: no streams present"); return error; } struct FileRange { uint32_t offset = 0; uint32_t size = 0; FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {} uint32_t end() const { return offset + size; } }; const uint32_t file_size = m_data_sp->GetByteSize(); // Build a global minidump file map, checking for: // - overlapping streams/data structures // - truncation (streams pointing past the end of file) std::vector minidump_map; // Add the minidump header to the file map if (sizeof(MinidumpHeader) > file_size) { error.SetErrorString("invalid minidump: truncated header"); return error; } minidump_map.emplace_back( 0, sizeof(MinidumpHeader) ); // Add the directory entries to the file map FileRange directory_range(header->stream_directory_rva, header->streams_count * sizeof(MinidumpDirectory)); if (directory_range.end() > file_size) { error.SetErrorString("invalid minidump: truncated streams directory"); return error; } minidump_map.push_back(directory_range); // Parse stream directory entries llvm::ArrayRef directory_data( m_data_sp->GetBytes() + directory_range.offset, directory_range.size); for (uint32_t i = 0; i < header->streams_count; ++i) { const MinidumpDirectory *directory_entry = nullptr; error = consumeObject(directory_data, directory_entry); if (error.Fail()) return error; if (directory_entry->stream_type == 0) { // Ignore dummy streams (technically ill-formed, but a number of // existing minidumps seem to contain such streams) if (directory_entry->location.data_size == 0) continue; error.SetErrorString("invalid minidump: bad stream type"); return error; } // Update the streams map, checking for duplicate stream types if (!m_directory_map .insert({directory_entry->stream_type, directory_entry->location}) .second) { error.SetErrorString("invalid minidump: duplicate stream type"); return error; } // Ignore the zero-length streams for layout checks if (directory_entry->location.data_size != 0) { minidump_map.emplace_back(directory_entry->location.rva, directory_entry->location.data_size); } } // Sort the file map ranges by start offset std::sort(minidump_map.begin(), minidump_map.end(), [](const FileRange &a, const FileRange &b) { return a.offset < b.offset; }); // Check for overlapping streams/data structures for (size_t i = 1; i < minidump_map.size(); ++i) { const auto &prev_range = minidump_map[i - 1]; if (prev_range.end() > minidump_map[i].offset) { error.SetErrorString("invalid minidump: overlapping streams"); return error; } } // Check for streams past the end of file const auto &last_range = minidump_map.back(); if (last_range.end() > file_size) { error.SetErrorString("invalid minidump: truncated stream"); return error; } return error; }