diff options
Diffstat (limited to 'lib/asan/scripts/asan_symbolize.py')
-rwxr-xr-x | lib/asan/scripts/asan_symbolize.py | 419 |
1 files changed, 324 insertions, 95 deletions
diff --git a/lib/asan/scripts/asan_symbolize.py b/lib/asan/scripts/asan_symbolize.py index e4897d0c7649..7b30bb55914e 100755 --- a/lib/asan/scripts/asan_symbolize.py +++ b/lib/asan/scripts/asan_symbolize.py @@ -7,121 +7,350 @@ # License. See LICENSE.TXT for details. # #===------------------------------------------------------------------------===# +import bisect import os import re -import sys -import string import subprocess +import sys -pipes = {} +llvm_symbolizer = None +symbolizers = {} filetypes = {} -DEBUG=False +vmaddrs = {} +DEBUG = False + +# FIXME: merge the code that calls fix_filename(). def fix_filename(file_name): for path_to_cut in sys.argv[1:]: - file_name = re.sub(".*" + path_to_cut, "", file_name) - file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) - file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) + file_name = re.sub('.*' + path_to_cut, '', file_name) + file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) + file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) return file_name -# TODO(glider): need some refactoring here -def symbolize_addr2line(line): - #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) - match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) - if match: - frameno = match.group(2) - binary = match.group(3) - addr = match.group(4) - if not pipes.has_key(binary): - pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary], - stdin=subprocess.PIPE, stdout=subprocess.PIPE) - p = pipes[binary] +class Symbolizer(object): + def __init__(self): + pass + + def symbolize(self, addr, binary, offset): + """Symbolize the given address (pair of binary and offset). + + Overriden in subclasses. + Args: + addr: virtual address of an instruction. + binary: path to executable/shared object containing this instruction. + offset: instruction offset in the @binary. + Returns: + list of strings (one string for each inlined frame) describing + the code locations for this instruction (that is, function name, file + name, line and column numbers). + """ + return None + + +class LLVMSymbolizer(Symbolizer): + def __init__(self, symbolizer_path): + super(LLVMSymbolizer, self).__init__() + self.symbolizer_path = symbolizer_path + self.pipe = self.open_llvm_symbolizer() + + def open_llvm_symbolizer(self): + if not os.path.exists(self.symbolizer_path): + return None + cmd = [self.symbolizer_path, + '--use-symbol-table=true', + '--demangle=false', + '--functions=true', + '--inlining=true'] + if DEBUG: + print ' '.join(cmd) + return subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if not self.pipe: + return None + result = [] try: - print >>p.stdin, addr - function_name = p.stdout.readline().rstrip() - file_name = p.stdout.readline().rstrip() - except: - function_name = "" - file_name = "" - file_name = fix_filename(file_name) + symbolizer_input = '%s %s' % (binary, offset) + if DEBUG: + print symbolizer_input + print >> self.pipe.stdin, symbolizer_input + while True: + function_name = self.pipe.stdout.readline().rstrip() + if not function_name: + break + file_name = self.pipe.stdout.readline().rstrip() + file_name = fix_filename(file_name) + if (not function_name.startswith('??') and + not file_name.startswith('??')): + # Append only valid frames. + result.append('%s in %s %s' % (addr, function_name, + file_name)) + except Exception: + result = [] + if not result: + result = None + return result - print match.group(1), "in", function_name, file_name - else: - print line.rstrip() - - -def get_macho_filetype(binary): - if not filetypes.has_key(binary): - otool_pipe = subprocess.Popen(["otool", "-Vh", binary], - stdin=subprocess.PIPE, stdout=subprocess.PIPE) - otool_line = "".join(otool_pipe.stdout.readlines()) - for t in ["DYLIB", "EXECUTE"]: - if t in otool_line: - filetypes[binary] = t - otool_pipe.stdin.close() - return filetypes[binary] - - -def symbolize_atos(line): - #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) - match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) - if match: - #print line - prefix = match.group(1) - frameno = match.group(2) - orig_addr = match.group(3) - binary = match.group(4) - offset = match.group(5) - addr = orig_addr - load_addr = hex(int(orig_addr, 16) - int(offset, 16)) - filetype = get_macho_filetype(binary) - - if not pipes.has_key(binary): - # Guess which arch we're running. 10 = len("0x") + 8 hex digits. - if len(addr) > 10: - arch = "x86_64" - else: - arch = "i386" - if filetype == "DYLIB": - load_addr = "0x0" +def LLVMSymbolizerFactory(system): + symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') + if not symbolizer_path: + # Assume llvm-symbolizer is in PATH. + symbolizer_path = 'llvm-symbolizer' + return LLVMSymbolizer(symbolizer_path) + + +class Addr2LineSymbolizer(Symbolizer): + def __init__(self, binary): + super(Addr2LineSymbolizer, self).__init__() + self.binary = binary + self.pipe = self.open_addr2line() + + def open_addr2line(self): + cmd = ['addr2line', '-f', '-e', self.binary] if DEBUG: - print "atos -o %s -arch %s -l %s" % (binary, arch, load_addr) - cmd = ["atos", "-o", binary, "-arch", arch, "-l", load_addr] - pipes[binary] = subprocess.Popen(cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - p = pipes[binary] - if filetype == "DYLIB": - print >>p.stdin, "%s" % offset + print ' '.join(cmd) + return subprocess.Popen(cmd, + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if self.binary != binary: + return None + try: + print >> self.pipe.stdin, offset + function_name = self.pipe.stdout.readline().rstrip() + file_name = self.pipe.stdout.readline().rstrip() + except Exception: + function_name = '' + file_name = '' + file_name = fix_filename(file_name) + return ['%s in %s %s' % (addr, function_name, file_name)] + + +class DarwinSymbolizer(Symbolizer): + def __init__(self, addr, binary): + super(DarwinSymbolizer, self).__init__() + self.binary = binary + # Guess which arch we're running. 10 = len('0x') + 8 hex digits. + if len(addr) > 10: + self.arch = 'x86_64' else: - print >>p.stdin, "%s" % addr - # TODO(glider): it's more efficient to make a batch atos run for each binary. - p.stdin.close() - atos_line = p.stdout.readline().rstrip() + self.arch = 'i386' + self.vmaddr = None + self.pipe = None + + def write_addr_to_pipe(self, offset): + print >> self.pipe.stdin, '0x%x' % int(offset, 16) + + def open_atos(self): + if DEBUG: + print 'atos -o %s -arch %s' % (self.binary, self.arch) + cmdline = ['atos', '-o', self.binary, '-arch', self.arch] + self.pipe = subprocess.Popen(cmdline, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + if self.binary != binary: + return None + self.open_atos() + self.write_addr_to_pipe(offset) + self.pipe.stdin.close() + atos_line = self.pipe.stdout.readline().rstrip() # A well-formed atos response looks like this: # foo(type1, type2) (in object.name) (filename.cc:80) match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) - #print "atos_line: ", atos_line + if DEBUG: + print 'atos_line: ', atos_line if match: function_name = match.group(1) - function_name = re.sub("\(.*?\)", "", function_name) + function_name = re.sub('\(.*?\)', '', function_name) file_name = fix_filename(match.group(3)) - print "%s%s in %s %s" % (prefix, addr, function_name, file_name) + return ['%s in %s %s' % (addr, function_name, file_name)] + else: + return ['%s in %s' % (addr, atos_line)] + + +# Chain several symbolizers so that if one symbolizer fails, we fall back +# to the next symbolizer in chain. +class ChainSymbolizer(Symbolizer): + def __init__(self, symbolizer_list): + super(ChainSymbolizer, self).__init__() + self.symbolizer_list = symbolizer_list + + def symbolize(self, addr, binary, offset): + """Overrides Symbolizer.symbolize.""" + for symbolizer in self.symbolizer_list: + if symbolizer: + result = symbolizer.symbolize(addr, binary, offset) + if result: + return result + return None + + def append_symbolizer(self, symbolizer): + self.symbolizer_list.append(symbolizer) + + +def BreakpadSymbolizerFactory(binary): + suffix = os.getenv('BREAKPAD_SUFFIX') + if suffix: + filename = binary + suffix + if os.access(filename, os.F_OK): + return BreakpadSymbolizer(filename) + return None + + +def SystemSymbolizerFactory(system, addr, binary): + if system == 'Darwin': + return DarwinSymbolizer(addr, binary) + elif system == 'Linux': + return Addr2LineSymbolizer(binary) + + +class BreakpadSymbolizer(Symbolizer): + def __init__(self, filename): + super(BreakpadSymbolizer, self).__init__() + self.filename = filename + lines = file(filename).readlines() + self.files = [] + self.symbols = {} + self.address_list = [] + self.addresses = {} + # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t + fragments = lines[0].rstrip().split() + self.arch = fragments[2] + self.debug_id = fragments[3] + self.binary = ' '.join(fragments[4:]) + self.parse_lines(lines[1:]) + + def parse_lines(self, lines): + cur_function_addr = '' + for line in lines: + fragments = line.split() + if fragments[0] == 'FILE': + assert int(fragments[1]) == len(self.files) + self.files.append(' '.join(fragments[2:])) + elif fragments[0] == 'PUBLIC': + self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) + elif fragments[0] in ['CFI', 'STACK']: + pass + elif fragments[0] == 'FUNC': + cur_function_addr = int(fragments[1], 16) + if not cur_function_addr in self.symbols.keys(): + self.symbols[cur_function_addr] = ' '.join(fragments[4:]) + else: + # Line starting with an address. + addr = int(fragments[0], 16) + self.address_list.append(addr) + # Tuple of symbol address, size, line, file number. + self.addresses[addr] = (cur_function_addr, + int(fragments[1], 16), + int(fragments[2]), + int(fragments[3])) + self.address_list.sort() + + def get_sym_file_line(self, addr): + key = None + if addr in self.addresses.keys(): + key = addr else: - print "%s%s in %s" % (prefix, addr, atos_line) - del pipes[binary] - else: - print line.rstrip() - -system = os.uname()[0] -if system in ['Linux', 'Darwin']: - for line in sys.stdin: - if system == 'Linux': - symbolize_addr2line(line) - elif system == 'Darwin': - symbolize_atos(line) -else: - print 'Unknown system: ', system + index = bisect.bisect_left(self.address_list, addr) + if index == 0: + return None + else: + key = self.address_list[index - 1] + sym_id, size, line_no, file_no = self.addresses[key] + symbol = self.symbols[sym_id] + filename = self.files[file_no] + if addr < key + size: + return symbol, filename, line_no + else: + return None + + def symbolize(self, addr, binary, offset): + if self.binary != binary: + return None + res = self.get_sym_file_line(int(offset, 16)) + if res: + function_name, file_name, line_no = res + result = ['%s in %s %s:%d' % ( + addr, function_name, file_name, line_no)] + print result + return result + else: + return None + + +class SymbolizationLoop(object): + def __init__(self, binary_name_filter=None): + # Used by clients who may want to supply a different binary name. + # E.g. in Chrome several binaries may share a single .dSYM. + self.binary_name_filter = binary_name_filter + self.system = os.uname()[0] + if self.system in ['Linux', 'Darwin']: + self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) + else: + raise Exception('Unknown system') + + def symbolize_address(self, addr, binary, offset): + # Use the chain of symbolizers: + # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos + # (fall back to next symbolizer if the previous one fails). + if not binary in symbolizers: + symbolizers[binary] = ChainSymbolizer( + [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) + result = symbolizers[binary].symbolize(addr, binary, offset) + if result is None: + # Initialize system symbolizer only if other symbolizers failed. + symbolizers[binary].append_symbolizer( + SystemSymbolizerFactory(self.system, addr, binary)) + result = symbolizers[binary].symbolize(addr, binary, offset) + # The system symbolizer must produce some result. + assert result + return result + + def print_symbolized_lines(self, symbolized_lines): + if not symbolized_lines: + print self.current_line + else: + for symbolized_frame in symbolized_lines: + print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() + self.frame_no += 1 + + def process_stdin(self): + self.frame_no = 0 + for line in sys.stdin: + self.current_line = line.rstrip() + #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) + stack_trace_line_format = ( + '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') + match = re.match(stack_trace_line_format, line) + if not match: + print self.current_line + continue + if DEBUG: + print line + _, frameno_str, addr, binary, offset = match.groups() + if frameno_str == '0': + # Assume that frame #0 is the first frame of new stack trace. + self.frame_no = 0 + original_binary = binary + if self.binary_name_filter: + binary = self.binary_name_filter(binary) + symbolized_line = self.symbolize_address(addr, binary, offset) + if not symbolized_line: + if original_binary != binary: + symbolized_line = self.symbolize_address(addr, binary, offset) + self.print_symbolized_lines(symbolized_line) + + +if __name__ == '__main__': + loop = SymbolizationLoop() + loop.process_stdin() |