summaryrefslogtreecommitdiff
path: root/lib/sanitizer_common/sanitizer_symbolizer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sanitizer_common/sanitizer_symbolizer.cc')
-rw-r--r--lib/sanitizer_common/sanitizer_symbolizer.cc325
1 files changed, 262 insertions, 63 deletions
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc
index 85eb0764f19c0..a1d95ae0e0b28 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer.cc
@@ -7,9 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This is a stub for LLVM-based symbolizer.
// This file is shared between AddressSanitizer and ThreadSanitizer
-// run-time libraries. See sanitizer.h for details.
+// run-time libraries. See sanitizer_symbolizer.h for details.
//===----------------------------------------------------------------------===//
#include "sanitizer_common.h"
@@ -19,18 +18,6 @@
namespace __sanitizer {
-bool IsFullNameOfDWARFSection(const char *full_name, const char *short_name) {
- // Skip "__DWARF," prefix.
- if (0 == internal_strncmp(full_name, "__DWARF,", 8)) {
- full_name += 8;
- }
- // Skip . and _ prefices.
- while (*full_name == '.' || *full_name == '_') {
- full_name++;
- }
- return 0 == internal_strcmp(full_name, short_name);
-}
-
void AddressInfo::Clear() {
InternalFree(module);
InternalFree(function);
@@ -38,28 +25,20 @@ void AddressInfo::Clear() {
internal_memset(this, 0, sizeof(AddressInfo));
}
-ModuleDIContext::ModuleDIContext(const char *module_name, uptr base_address) {
+LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
full_name_ = internal_strdup(module_name);
- short_name_ = internal_strrchr(module_name, '/');
- if (short_name_ == 0) {
- short_name_ = full_name_;
- } else {
- short_name_++;
- }
base_address_ = base_address;
n_ranges_ = 0;
- mapped_addr_ = 0;
- mapped_size_ = 0;
}
-void ModuleDIContext::addAddressRange(uptr beg, uptr end) {
+void LoadedModule::addAddressRange(uptr beg, uptr end) {
CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
ranges_[n_ranges_].beg = beg;
ranges_[n_ranges_].end = end;
n_ranges_++;
}
-bool ModuleDIContext::containsAddress(uptr address) const {
+bool LoadedModule::containsAddress(uptr address) const {
for (uptr i = 0; i < n_ranges_; i++) {
if (ranges_[i].beg <= address && address < ranges_[i].end)
return true;
@@ -67,56 +46,256 @@ bool ModuleDIContext::containsAddress(uptr address) const {
return false;
}
-void ModuleDIContext::getAddressInfo(AddressInfo *info) {
- info->module = internal_strdup(full_name_);
- info->module_offset = info->address - base_address_;
- if (mapped_addr_ == 0)
- CreateDIContext();
- // FIXME: Use the actual debug info context here.
- info->function = 0;
- info->file = 0;
- info->line = 0;
- info->column = 0;
+// Extracts the prefix of "str" that consists of any characters not
+// present in "delims" string, and copies this prefix to "result", allocating
+// space for it.
+// Returns a pointer to "str" after skipping extracted prefix and first
+// delimiter char.
+static const char *ExtractToken(const char *str, const char *delims,
+ char **result) {
+ uptr prefix_len = internal_strcspn(str, delims);
+ *result = (char*)InternalAlloc(prefix_len + 1);
+ internal_memcpy(*result, str, prefix_len);
+ (*result)[prefix_len] = '\0';
+ const char *prefix_end = str + prefix_len;
+ if (*prefix_end != '\0') prefix_end++;
+ return prefix_end;
+}
+
+// Same as ExtractToken, but converts extracted token to integer.
+static const char *ExtractInt(const char *str, const char *delims,
+ int *result) {
+ char *buff;
+ const char *ret = ExtractToken(str, delims, &buff);
+ if (buff != 0) {
+ *result = (int)internal_atoll(buff);
+ }
+ InternalFree(buff);
+ return ret;
}
-void ModuleDIContext::CreateDIContext() {
- mapped_addr_ = (uptr)MapFileToMemory(full_name_, &mapped_size_);
- CHECK(mapped_addr_);
- DWARFSection debug_info;
- DWARFSection debug_abbrev;
- DWARFSection debug_line;
- DWARFSection debug_aranges;
- DWARFSection debug_str;
- FindDWARFSection(mapped_addr_, "debug_info", &debug_info);
- FindDWARFSection(mapped_addr_, "debug_abbrev", &debug_abbrev);
- FindDWARFSection(mapped_addr_, "debug_line", &debug_line);
- FindDWARFSection(mapped_addr_, "debug_aranges", &debug_aranges);
- FindDWARFSection(mapped_addr_, "debug_str", &debug_str);
- // FIXME: Construct actual debug info context using mapped_addr,
- // mapped_size and pointers to DWARF sections in memory.
+static const char *ExtractUptr(const char *str, const char *delims,
+ uptr *result) {
+ char *buff;
+ const char *ret = ExtractToken(str, delims, &buff);
+ if (buff != 0) {
+ *result = (uptr)internal_atoll(buff);
+ }
+ InternalFree(buff);
+ return ret;
}
+// ExternalSymbolizer encapsulates communication between the tool and
+// external symbolizer program, running in a different subprocess,
+// For now we assume the following protocol:
+// For each request of the form
+// <module_name> <module_offset>
+// passed to STDIN, external symbolizer prints to STDOUT response:
+// <function_name>
+// <file_name>:<line_number>:<column_number>
+// <function_name>
+// <file_name>:<line_number>:<column_number>
+// ...
+// <empty line>
+class ExternalSymbolizer {
+ public:
+ ExternalSymbolizer(const char *path, int input_fd, int output_fd)
+ : path_(path),
+ input_fd_(input_fd),
+ output_fd_(output_fd),
+ times_restarted_(0) {
+ CHECK(path_);
+ CHECK_NE(input_fd_, kInvalidFd);
+ CHECK_NE(output_fd_, kInvalidFd);
+ }
+
+ char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
+ CHECK(module_name);
+ internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n",
+ is_data ? "DATA " : "", module_name, module_offset);
+ if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
+ return 0;
+ if (!readFromSymbolizer(buffer_, kBufferSize))
+ return 0;
+ return buffer_;
+ }
+
+ bool Restart() {
+ if (times_restarted_ >= kMaxTimesRestarted) return false;
+ times_restarted_++;
+ internal_close(input_fd_);
+ internal_close(output_fd_);
+ return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
+ }
+
+ private:
+ bool readFromSymbolizer(char *buffer, uptr max_length) {
+ if (max_length == 0)
+ return true;
+ uptr read_len = 0;
+ while (true) {
+ uptr just_read = internal_read(input_fd_, buffer + read_len,
+ max_length - read_len);
+ // We can't read 0 bytes, as we don't expect external symbolizer to close
+ // its stdout.
+ if (just_read == 0 || just_read == (uptr)-1) {
+ Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
+ return false;
+ }
+ read_len += just_read;
+ // Empty line marks the end of symbolizer output.
+ if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
+ buffer[read_len - 2] == '\n') {
+ break;
+ }
+ }
+ return true;
+ }
+
+ bool writeToSymbolizer(const char *buffer, uptr length) {
+ if (length == 0)
+ return true;
+ uptr write_len = internal_write(output_fd_, buffer, length);
+ if (write_len == 0 || write_len == (uptr)-1) {
+ Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
+ return false;
+ }
+ return true;
+ }
+
+ const char *path_;
+ int input_fd_;
+ int output_fd_;
+
+ static const uptr kBufferSize = 16 * 1024;
+ char buffer_[kBufferSize];
+
+ static const uptr kMaxTimesRestarted = 5;
+ uptr times_restarted_;
+};
+
+static LowLevelAllocator symbolizer_allocator; // Linker initialized.
+
class Symbolizer {
public:
uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
if (max_frames == 0)
return 0;
- AddressInfo *info = &frames[0];
- info->Clear();
- info->address = addr;
- ModuleDIContext *module = FindModuleForAddress(addr);
- if (module) {
- module->getAddressInfo(info);
+ LoadedModule *module = FindModuleForAddress(addr);
+ if (module == 0)
+ return 0;
+ const char *module_name = module->full_name();
+ uptr module_offset = addr - module->base_address();
+ const char *str = SendCommand(false, module_name, module_offset);
+ if (str == 0) {
+ // External symbolizer was not initialized or failed. Fill only data
+ // about module name and offset.
+ AddressInfo *info = &frames[0];
+ info->Clear();
+ info->FillAddressAndModuleInfo(addr, module_name, module_offset);
return 1;
}
- return 0;
+ uptr frame_id = 0;
+ for (frame_id = 0; frame_id < max_frames; frame_id++) {
+ AddressInfo *info = &frames[frame_id];
+ char *function_name = 0;
+ str = ExtractToken(str, "\n", &function_name);
+ CHECK(function_name);
+ if (function_name[0] == '\0') {
+ // There are no more frames.
+ break;
+ }
+ info->Clear();
+ info->FillAddressAndModuleInfo(addr, module_name, module_offset);
+ info->function = function_name;
+ // Parse <file>:<line>:<column> buffer.
+ char *file_line_info = 0;
+ str = ExtractToken(str, "\n", &file_line_info);
+ CHECK(file_line_info);
+ const char *line_info = ExtractToken(file_line_info, ":", &info->file);
+ line_info = ExtractInt(line_info, ":", &info->line);
+ line_info = ExtractInt(line_info, "", &info->column);
+ InternalFree(file_line_info);
+
+ // Functions and filenames can be "??", in which case we write 0
+ // to address info to mark that names are unknown.
+ if (0 == internal_strcmp(info->function, "??")) {
+ InternalFree(info->function);
+ info->function = 0;
+ }
+ if (0 == internal_strcmp(info->file, "??")) {
+ InternalFree(info->file);
+ info->file = 0;
+ }
+ }
+ if (frame_id == 0) {
+ // Make sure we return at least one frame.
+ AddressInfo *info = &frames[0];
+ info->Clear();
+ info->FillAddressAndModuleInfo(addr, module_name, module_offset);
+ frame_id = 1;
+ }
+ return frame_id;
+ }
+
+ bool SymbolizeData(uptr addr, DataInfo *info) {
+ LoadedModule *module = FindModuleForAddress(addr);
+ if (module == 0)
+ return false;
+ const char *module_name = module->full_name();
+ uptr module_offset = addr - module->base_address();
+ internal_memset(info, 0, sizeof(*info));
+ info->address = addr;
+ info->module = internal_strdup(module_name);
+ info->module_offset = module_offset;
+ const char *str = SendCommand(true, module_name, module_offset);
+ if (str == 0)
+ return true;
+ str = ExtractToken(str, "\n", &info->name);
+ str = ExtractUptr(str, " ", &info->start);
+ str = ExtractUptr(str, "\n", &info->size);
+ info->start += module->base_address();
+ return true;
+ }
+
+ bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
+ int input_fd, output_fd;
+ if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
+ return false;
+ void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
+ external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
+ input_fd, output_fd);
+ return true;
}
private:
- ModuleDIContext *FindModuleForAddress(uptr address) {
+ char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
+ if (external_symbolizer_ == 0) {
+ ReportExternalSymbolizerError(
+ "WARNING: Trying to symbolize code, but external "
+ "symbolizer is not initialized!\n");
+ return 0;
+ }
+ for (;;) {
+ char *reply = external_symbolizer_->SendCommand(is_data, module_name,
+ module_offset);
+ if (reply)
+ return reply;
+ // Try to restart symbolizer subprocess. If we don't succeed, forget
+ // about it and don't try to use it later.
+ if (!external_symbolizer_->Restart()) {
+ ReportExternalSymbolizerError(
+ "WARNING: Failed to use and restart external symbolizer!\n");
+ external_symbolizer_ = 0;
+ return 0;
+ }
+ }
+ }
+
+ LoadedModule *FindModuleForAddress(uptr address) {
if (modules_ == 0) {
- modules_ = (ModuleDIContext*)InternalAlloc(
- kMaxNumberOfModuleContexts * sizeof(ModuleDIContext));
+ modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
+ kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
CHECK(modules_);
n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
CHECK_GT(n_modules_, 0);
@@ -129,10 +308,22 @@ class Symbolizer {
}
return 0;
}
- static const uptr kMaxNumberOfModuleContexts = 4096;
- // Array of module debug info contexts is leaked.
- ModuleDIContext *modules_;
+ void ReportExternalSymbolizerError(const char *msg) {
+ // Don't use atomics here for now, as SymbolizeCode can't be called
+ // from multiple threads anyway.
+ static bool reported;
+ if (!reported) {
+ Report(msg);
+ reported = true;
+ }
+ }
+
+ // 16K loaded modules should be enough for everyone.
+ static const uptr kMaxNumberOfModuleContexts = 1 << 14;
+ LoadedModule *modules_; // Array of module descriptions is leaked.
uptr n_modules_;
+
+ ExternalSymbolizer *external_symbolizer_; // Leaked.
};
static Symbolizer symbolizer; // Linker initialized.
@@ -141,4 +332,12 @@ uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
return symbolizer.SymbolizeCode(address, frames, max_frames);
}
+bool SymbolizeData(uptr address, DataInfo *info) {
+ return symbolizer.SymbolizeData(address, info);
+}
+
+bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
+ return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
+}
+
} // namespace __sanitizer