diff options
Diffstat (limited to 'lib/sanitizer_common/sanitizer_symbolizer.cc')
| -rw-r--r-- | lib/sanitizer_common/sanitizer_symbolizer.cc | 325 |
1 files changed, 262 insertions, 63 deletions
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc index 85eb0764f19c0..a1d95ae0e0b28 100644 --- a/lib/sanitizer_common/sanitizer_symbolizer.cc +++ b/lib/sanitizer_common/sanitizer_symbolizer.cc @@ -7,9 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This is a stub for LLVM-based symbolizer. // This file is shared between AddressSanitizer and ThreadSanitizer -// run-time libraries. See sanitizer.h for details. +// run-time libraries. See sanitizer_symbolizer.h for details. //===----------------------------------------------------------------------===// #include "sanitizer_common.h" @@ -19,18 +18,6 @@ namespace __sanitizer { -bool IsFullNameOfDWARFSection(const char *full_name, const char *short_name) { - // Skip "__DWARF," prefix. - if (0 == internal_strncmp(full_name, "__DWARF,", 8)) { - full_name += 8; - } - // Skip . and _ prefices. - while (*full_name == '.' || *full_name == '_') { - full_name++; - } - return 0 == internal_strcmp(full_name, short_name); -} - void AddressInfo::Clear() { InternalFree(module); InternalFree(function); @@ -38,28 +25,20 @@ void AddressInfo::Clear() { internal_memset(this, 0, sizeof(AddressInfo)); } -ModuleDIContext::ModuleDIContext(const char *module_name, uptr base_address) { +LoadedModule::LoadedModule(const char *module_name, uptr base_address) { full_name_ = internal_strdup(module_name); - short_name_ = internal_strrchr(module_name, '/'); - if (short_name_ == 0) { - short_name_ = full_name_; - } else { - short_name_++; - } base_address_ = base_address; n_ranges_ = 0; - mapped_addr_ = 0; - mapped_size_ = 0; } -void ModuleDIContext::addAddressRange(uptr beg, uptr end) { +void LoadedModule::addAddressRange(uptr beg, uptr end) { CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); ranges_[n_ranges_].beg = beg; ranges_[n_ranges_].end = end; n_ranges_++; } -bool ModuleDIContext::containsAddress(uptr address) const { +bool LoadedModule::containsAddress(uptr address) const { for (uptr i = 0; i < n_ranges_; i++) { if (ranges_[i].beg <= address && address < ranges_[i].end) return true; @@ -67,56 +46,256 @@ bool ModuleDIContext::containsAddress(uptr address) const { return false; } -void ModuleDIContext::getAddressInfo(AddressInfo *info) { - info->module = internal_strdup(full_name_); - info->module_offset = info->address - base_address_; - if (mapped_addr_ == 0) - CreateDIContext(); - // FIXME: Use the actual debug info context here. - info->function = 0; - info->file = 0; - info->line = 0; - info->column = 0; +// Extracts the prefix of "str" that consists of any characters not +// present in "delims" string, and copies this prefix to "result", allocating +// space for it. +// Returns a pointer to "str" after skipping extracted prefix and first +// delimiter char. +static const char *ExtractToken(const char *str, const char *delims, + char **result) { + uptr prefix_len = internal_strcspn(str, delims); + *result = (char*)InternalAlloc(prefix_len + 1); + internal_memcpy(*result, str, prefix_len); + (*result)[prefix_len] = '\0'; + const char *prefix_end = str + prefix_len; + if (*prefix_end != '\0') prefix_end++; + return prefix_end; +} + +// Same as ExtractToken, but converts extracted token to integer. +static const char *ExtractInt(const char *str, const char *delims, + int *result) { + char *buff; + const char *ret = ExtractToken(str, delims, &buff); + if (buff != 0) { + *result = (int)internal_atoll(buff); + } + InternalFree(buff); + return ret; } -void ModuleDIContext::CreateDIContext() { - mapped_addr_ = (uptr)MapFileToMemory(full_name_, &mapped_size_); - CHECK(mapped_addr_); - DWARFSection debug_info; - DWARFSection debug_abbrev; - DWARFSection debug_line; - DWARFSection debug_aranges; - DWARFSection debug_str; - FindDWARFSection(mapped_addr_, "debug_info", &debug_info); - FindDWARFSection(mapped_addr_, "debug_abbrev", &debug_abbrev); - FindDWARFSection(mapped_addr_, "debug_line", &debug_line); - FindDWARFSection(mapped_addr_, "debug_aranges", &debug_aranges); - FindDWARFSection(mapped_addr_, "debug_str", &debug_str); - // FIXME: Construct actual debug info context using mapped_addr, - // mapped_size and pointers to DWARF sections in memory. +static const char *ExtractUptr(const char *str, const char *delims, + uptr *result) { + char *buff; + const char *ret = ExtractToken(str, delims, &buff); + if (buff != 0) { + *result = (uptr)internal_atoll(buff); + } + InternalFree(buff); + return ret; } +// ExternalSymbolizer encapsulates communication between the tool and +// external symbolizer program, running in a different subprocess, +// For now we assume the following protocol: +// For each request of the form +// <module_name> <module_offset> +// passed to STDIN, external symbolizer prints to STDOUT response: +// <function_name> +// <file_name>:<line_number>:<column_number> +// <function_name> +// <file_name>:<line_number>:<column_number> +// ... +// <empty line> +class ExternalSymbolizer { + public: + ExternalSymbolizer(const char *path, int input_fd, int output_fd) + : path_(path), + input_fd_(input_fd), + output_fd_(output_fd), + times_restarted_(0) { + CHECK(path_); + CHECK_NE(input_fd_, kInvalidFd); + CHECK_NE(output_fd_, kInvalidFd); + } + + char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { + CHECK(module_name); + internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n", + is_data ? "DATA " : "", module_name, module_offset); + if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) + return 0; + if (!readFromSymbolizer(buffer_, kBufferSize)) + return 0; + return buffer_; + } + + bool Restart() { + if (times_restarted_ >= kMaxTimesRestarted) return false; + times_restarted_++; + internal_close(input_fd_); + internal_close(output_fd_); + return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); + } + + private: + bool readFromSymbolizer(char *buffer, uptr max_length) { + if (max_length == 0) + return true; + uptr read_len = 0; + while (true) { + uptr just_read = internal_read(input_fd_, buffer + read_len, + max_length - read_len); + // We can't read 0 bytes, as we don't expect external symbolizer to close + // its stdout. + if (just_read == 0 || just_read == (uptr)-1) { + Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); + return false; + } + read_len += just_read; + // Empty line marks the end of symbolizer output. + if (read_len >= 2 && buffer[read_len - 1] == '\n' && + buffer[read_len - 2] == '\n') { + break; + } + } + return true; + } + + bool writeToSymbolizer(const char *buffer, uptr length) { + if (length == 0) + return true; + uptr write_len = internal_write(output_fd_, buffer, length); + if (write_len == 0 || write_len == (uptr)-1) { + Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); + return false; + } + return true; + } + + const char *path_; + int input_fd_; + int output_fd_; + + static const uptr kBufferSize = 16 * 1024; + char buffer_[kBufferSize]; + + static const uptr kMaxTimesRestarted = 5; + uptr times_restarted_; +}; + +static LowLevelAllocator symbolizer_allocator; // Linker initialized. + class Symbolizer { public: uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { if (max_frames == 0) return 0; - AddressInfo *info = &frames[0]; - info->Clear(); - info->address = addr; - ModuleDIContext *module = FindModuleForAddress(addr); - if (module) { - module->getAddressInfo(info); + LoadedModule *module = FindModuleForAddress(addr); + if (module == 0) + return 0; + const char *module_name = module->full_name(); + uptr module_offset = addr - module->base_address(); + const char *str = SendCommand(false, module_name, module_offset); + if (str == 0) { + // External symbolizer was not initialized or failed. Fill only data + // about module name and offset. + AddressInfo *info = &frames[0]; + info->Clear(); + info->FillAddressAndModuleInfo(addr, module_name, module_offset); return 1; } - return 0; + uptr frame_id = 0; + for (frame_id = 0; frame_id < max_frames; frame_id++) { + AddressInfo *info = &frames[frame_id]; + char *function_name = 0; + str = ExtractToken(str, "\n", &function_name); + CHECK(function_name); + if (function_name[0] == '\0') { + // There are no more frames. + break; + } + info->Clear(); + info->FillAddressAndModuleInfo(addr, module_name, module_offset); + info->function = function_name; + // Parse <file>:<line>:<column> buffer. + char *file_line_info = 0; + str = ExtractToken(str, "\n", &file_line_info); + CHECK(file_line_info); + const char *line_info = ExtractToken(file_line_info, ":", &info->file); + line_info = ExtractInt(line_info, ":", &info->line); + line_info = ExtractInt(line_info, "", &info->column); + InternalFree(file_line_info); + + // Functions and filenames can be "??", in which case we write 0 + // to address info to mark that names are unknown. + if (0 == internal_strcmp(info->function, "??")) { + InternalFree(info->function); + info->function = 0; + } + if (0 == internal_strcmp(info->file, "??")) { + InternalFree(info->file); + info->file = 0; + } + } + if (frame_id == 0) { + // Make sure we return at least one frame. + AddressInfo *info = &frames[0]; + info->Clear(); + info->FillAddressAndModuleInfo(addr, module_name, module_offset); + frame_id = 1; + } + return frame_id; + } + + bool SymbolizeData(uptr addr, DataInfo *info) { + LoadedModule *module = FindModuleForAddress(addr); + if (module == 0) + return false; + const char *module_name = module->full_name(); + uptr module_offset = addr - module->base_address(); + internal_memset(info, 0, sizeof(*info)); + info->address = addr; + info->module = internal_strdup(module_name); + info->module_offset = module_offset; + const char *str = SendCommand(true, module_name, module_offset); + if (str == 0) + return true; + str = ExtractToken(str, "\n", &info->name); + str = ExtractUptr(str, " ", &info->start); + str = ExtractUptr(str, "\n", &info->size); + info->start += module->base_address(); + return true; + } + + bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { + int input_fd, output_fd; + if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) + return false; + void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); + external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, + input_fd, output_fd); + return true; } private: - ModuleDIContext *FindModuleForAddress(uptr address) { + char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { + if (external_symbolizer_ == 0) { + ReportExternalSymbolizerError( + "WARNING: Trying to symbolize code, but external " + "symbolizer is not initialized!\n"); + return 0; + } + for (;;) { + char *reply = external_symbolizer_->SendCommand(is_data, module_name, + module_offset); + if (reply) + return reply; + // Try to restart symbolizer subprocess. If we don't succeed, forget + // about it and don't try to use it later. + if (!external_symbolizer_->Restart()) { + ReportExternalSymbolizerError( + "WARNING: Failed to use and restart external symbolizer!\n"); + external_symbolizer_ = 0; + return 0; + } + } + } + + LoadedModule *FindModuleForAddress(uptr address) { if (modules_ == 0) { - modules_ = (ModuleDIContext*)InternalAlloc( - kMaxNumberOfModuleContexts * sizeof(ModuleDIContext)); + modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( + kMaxNumberOfModuleContexts * sizeof(LoadedModule))); CHECK(modules_); n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts); CHECK_GT(n_modules_, 0); @@ -129,10 +308,22 @@ class Symbolizer { } return 0; } - static const uptr kMaxNumberOfModuleContexts = 4096; - // Array of module debug info contexts is leaked. - ModuleDIContext *modules_; + void ReportExternalSymbolizerError(const char *msg) { + // Don't use atomics here for now, as SymbolizeCode can't be called + // from multiple threads anyway. + static bool reported; + if (!reported) { + Report(msg); + reported = true; + } + } + + // 16K loaded modules should be enough for everyone. + static const uptr kMaxNumberOfModuleContexts = 1 << 14; + LoadedModule *modules_; // Array of module descriptions is leaked. uptr n_modules_; + + ExternalSymbolizer *external_symbolizer_; // Leaked. }; static Symbolizer symbolizer; // Linker initialized. @@ -141,4 +332,12 @@ uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { return symbolizer.SymbolizeCode(address, frames, max_frames); } +bool SymbolizeData(uptr address, DataInfo *info) { + return symbolizer.SymbolizeData(address, info); +} + +bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { + return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); +} + } // namespace __sanitizer |
