diff options
Diffstat (limited to 'utils/libcxx/sym_check')
-rw-r--r-- | utils/libcxx/sym_check/__init__.py | 17 | ||||
-rw-r--r-- | utils/libcxx/sym_check/diff.py | 103 | ||||
-rw-r--r-- | utils/libcxx/sym_check/extract.py | 194 | ||||
-rw-r--r-- | utils/libcxx/sym_check/match.py | 40 | ||||
-rw-r--r-- | utils/libcxx/sym_check/util.py | 268 |
5 files changed, 622 insertions, 0 deletions
diff --git a/utils/libcxx/sym_check/__init__.py b/utils/libcxx/sym_check/__init__.py new file mode 100644 index 0000000000000..1aa2b450c12fb --- /dev/null +++ b/utils/libcxx/sym_check/__init__.py @@ -0,0 +1,17 @@ +#===----------------------------------------------------------------------===## +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +#===----------------------------------------------------------------------===## + +"""libcxx abi symbol checker""" + +__author__ = 'Eric Fiselier' +__email__ = 'eric@efcs.ca' +__versioninfo__ = (0, 1, 0) +__version__ = ' '.join(str(v) for v in __versioninfo__) + 'dev' + +__all__ = ['diff', 'extract', 'util'] diff --git a/utils/libcxx/sym_check/diff.py b/utils/libcxx/sym_check/diff.py new file mode 100644 index 0000000000000..0821ef6f780a4 --- /dev/null +++ b/utils/libcxx/sym_check/diff.py @@ -0,0 +1,103 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +#===----------------------------------------------------------------------===## +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +#===----------------------------------------------------------------------===## +""" +diff - A set of functions for diff-ing two symbol lists. +""" + +from libcxx.sym_check import util + + +def _symbol_difference(lhs, rhs): + lhs_names = set((n['name'] for n in lhs)) + rhs_names = set((n['name'] for n in rhs)) + diff_names = lhs_names - rhs_names + return [n for n in lhs if n['name'] in diff_names] + + +def _find_by_key(sym_list, k): + for sym in sym_list: + if sym['name'] == k: + return sym + return None + + +def added_symbols(old, new): + return _symbol_difference(new, old) + + +def removed_symbols(old, new): + return _symbol_difference(old, new) + + +def changed_symbols(old, new): + changed = [] + for old_sym in old: + if old_sym in new: + continue + new_sym = _find_by_key(new, old_sym['name']) + if (new_sym is not None and not new_sym in old + and old_sym != new_sym): + changed += [(old_sym, new_sym)] + return changed + + +def diff(old, new): + added = added_symbols(old, new) + removed = removed_symbols(old, new) + changed = changed_symbols(old, new) + return added, removed, changed + + +def report_diff(added_syms, removed_syms, changed_syms, names_only=False, + demangle=True): + def maybe_demangle(name): + return util.demangle_symbol(name) if demangle else name + + report = '' + for sym in added_syms: + report += 'Symbol added: %s\n' % maybe_demangle(sym['name']) + if not names_only: + report += ' %s\n\n' % sym + if added_syms and names_only: + report += '\n' + for sym in removed_syms: + report += 'SYMBOL REMOVED: %s\n' % maybe_demangle(sym['name']) + if not names_only: + report += ' %s\n\n' % sym + if removed_syms and names_only: + report += '\n' + if not names_only: + for sym_pair in changed_syms: + old_sym, new_sym = sym_pair + old_str = '\n OLD SYMBOL: %s' % old_sym + new_str = '\n NEW SYMBOL: %s' % new_sym + report += ('SYMBOL CHANGED: %s%s%s\n\n' % + (maybe_demangle(old_sym['name']), + old_str, new_str)) + + added = bool(len(added_syms) != 0) + abi_break = bool(len(removed_syms)) + if not names_only: + abi_break = abi_break or len(changed_syms) + if added or abi_break: + report += 'Summary\n' + report += ' Added: %d\n' % len(added_syms) + report += ' Removed: %d\n' % len(removed_syms) + if not names_only: + report += ' Changed: %d\n' % len(changed_syms) + if not abi_break: + report += 'Symbols added.' + else: + report += 'ABI BREAKAGE: SYMBOLS ADDED OR REMOVED!' + else: + report += 'Symbols match.' + is_different = abi_break or bool(len(added_syms)) \ + or bool(len(changed_syms)) + return report, abi_break, is_different diff --git a/utils/libcxx/sym_check/extract.py b/utils/libcxx/sym_check/extract.py new file mode 100644 index 0000000000000..152ff97db1785 --- /dev/null +++ b/utils/libcxx/sym_check/extract.py @@ -0,0 +1,194 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +#===----------------------------------------------------------------------===## +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +#===----------------------------------------------------------------------===## +""" +extract - A set of function that extract symbol lists from shared libraries. +""" +import distutils.spawn +import sys +import re + +import libcxx.util +from libcxx.sym_check import util + +extract_ignore_names = ['_init', '_fini'] + +class NMExtractor(object): + """ + NMExtractor - Extract symbol lists from libraries using nm. + """ + + @staticmethod + def find_tool(): + """ + Search for the nm executable and return the path. + """ + return distutils.spawn.find_executable('nm') + + def __init__(self): + """ + Initialize the nm executable and flags that will be used to extract + symbols from shared libraries. + """ + self.nm_exe = self.find_tool() + if self.nm_exe is None: + # ERROR no NM found + print("ERROR: Could not find nm") + sys.exit(1) + self.flags = ['-P', '-g'] + + def extract(self, lib): + """ + Extract symbols from a library and return the results as a dict of + parsed symbols. + """ + cmd = [self.nm_exe] + self.flags + [lib] + out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) + if exit_code != 0: + raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) + fmt_syms = (self._extract_sym(l) + for l in out.splitlines() if l.strip()) + # Cast symbol to string. + final_syms = (repr(s) for s in fmt_syms if self._want_sym(s)) + # Make unique and sort strings. + tmp_list = list(sorted(set(final_syms))) + # Cast string back to symbol. + return util.read_syms_from_list(tmp_list) + + def _extract_sym(self, sym_str): + bits = sym_str.split() + # Everything we want has at least two columns. + if len(bits) < 2: + return None + new_sym = { + 'name': bits[0], + 'type': bits[1], + 'is_defined': (bits[1].lower() != 'u') + } + new_sym['name'] = new_sym['name'].replace('@@', '@') + new_sym = self._transform_sym_type(new_sym) + # NM types which we want to save the size for. + if new_sym['type'] == 'OBJECT' and len(bits) > 3: + new_sym['size'] = int(bits[3], 16) + return new_sym + + @staticmethod + def _want_sym(sym): + """ + Check that s is a valid symbol that we want to keep. + """ + if sym is None or len(sym) < 2: + return False + if sym['name'] in extract_ignore_names: + return False + bad_types = ['t', 'b', 'r', 'd', 'w'] + return (sym['type'] not in bad_types + and sym['name'] not in ['__bss_start', '_end', '_edata']) + + @staticmethod + def _transform_sym_type(sym): + """ + Map the nm single letter output for type to either FUNC or OBJECT. + If the type is not recognized it is left unchanged. + """ + func_types = ['T', 'W'] + obj_types = ['B', 'D', 'R', 'V', 'S'] + if sym['type'] in func_types: + sym['type'] = 'FUNC' + elif sym['type'] in obj_types: + sym['type'] = 'OBJECT' + return sym + +class ReadElfExtractor(object): + """ + ReadElfExtractor - Extract symbol lists from libraries using readelf. + """ + + @staticmethod + def find_tool(): + """ + Search for the readelf executable and return the path. + """ + return distutils.spawn.find_executable('readelf') + + def __init__(self): + """ + Initialize the readelf executable and flags that will be used to + extract symbols from shared libraries. + """ + self.tool = self.find_tool() + if self.tool is None: + # ERROR no NM found + print("ERROR: Could not find readelf") + sys.exit(1) + self.flags = ['--wide', '--symbols'] + + def extract(self, lib): + """ + Extract symbols from a library and return the results as a dict of + parsed symbols. + """ + cmd = [self.tool] + self.flags + [lib] + out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) + if exit_code != 0: + raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) + dyn_syms = self.get_dynsym_table(out) + return self.process_syms(dyn_syms) + + def process_syms(self, sym_list): + new_syms = [] + for s in sym_list: + parts = s.split() + if not parts: + continue + assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9 + if len(parts) == 7: + continue + new_sym = { + 'name': parts[7], + 'size': int(parts[2]), + 'type': parts[3], + 'is_defined': (parts[6] != 'UND') + } + assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE'] + if new_sym['name'] in extract_ignore_names: + continue + if new_sym['type'] == 'NOTYPE': + continue + if new_sym['type'] == 'FUNC': + del new_sym['size'] + new_syms += [new_sym] + return new_syms + + def get_dynsym_table(self, out): + lines = out.splitlines() + start = -1 + end = -1 + for i in range(len(lines)): + if lines[i].startswith("Symbol table '.dynsym'"): + start = i + 2 + if start != -1 and end == -1 and not lines[i].strip(): + end = i + 1 + assert start != -1 + if end == -1: + end = len(lines) + return lines[start:end] + + +def extract_symbols(lib_file): + """ + Extract and return a list of symbols extracted from a dynamic library. + The symbols are extracted using NM. They are then filtered and formated. + Finally they symbols are made unique. + """ + if ReadElfExtractor.find_tool(): + extractor = ReadElfExtractor() + else: + extractor = NMExtractor() + return extractor.extract(lib_file) diff --git a/utils/libcxx/sym_check/match.py b/utils/libcxx/sym_check/match.py new file mode 100644 index 0000000000000..fae400e4e77e8 --- /dev/null +++ b/utils/libcxx/sym_check/match.py @@ -0,0 +1,40 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +#===----------------------------------------------------------------------===## +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +#===----------------------------------------------------------------------===## +""" +match - A set of functions for matching symbols in a list to a list of regexs +""" + +import re + + +def find_and_report_matching(symbol_list, regex_list): + report = '' + found_count = 0 + for regex_str in regex_list: + report += 'Matching regex "%s":\n' % regex_str + matching_list = find_matching_symbols(symbol_list, regex_str) + if not matching_list: + report += ' No matches found\n\n' + continue + # else + found_count += len(matching_list) + for m in matching_list: + report += ' MATCHES: %s\n' % m['name'] + report += '\n' + return found_count, report + + +def find_matching_symbols(symbol_list, regex_str): + regex = re.compile(regex_str) + matching_list = [] + for s in symbol_list: + if regex.match(s['name']): + matching_list += [s] + return matching_list diff --git a/utils/libcxx/sym_check/util.py b/utils/libcxx/sym_check/util.py new file mode 100644 index 0000000000000..8a4c4ab496ff5 --- /dev/null +++ b/utils/libcxx/sym_check/util.py @@ -0,0 +1,268 @@ +#===----------------------------------------------------------------------===## +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +#===----------------------------------------------------------------------===## + +import ast +import distutils.spawn +import sys +import re +import libcxx.util + + +def read_syms_from_list(slist): + """ + Read a list of symbols from a list of strings. + Each string is one symbol. + """ + return [ast.literal_eval(l) for l in slist] + + +def read_syms_from_file(filename): + """ + Read a list of symbols in from a file. + """ + with open(filename, 'r') as f: + data = f.read() + return read_syms_from_list(data.splitlines()) + + +def read_blacklist(filename): + with open(filename, 'r') as f: + data = f.read() + lines = [l.strip() for l in data.splitlines() if l.strip()] + lines = [l for l in lines if not l.startswith('#')] + return lines + + +def write_syms(sym_list, out=None, names_only=False): + """ + Write a list of symbols to the file named by out. + """ + out_str = '' + out_list = sym_list + out_list.sort(key=lambda x: x['name']) + if names_only: + out_list = [sym['name'] for sym in sym_list] + for sym in out_list: + out_str += '%s\n' % sym + if out is None: + sys.stdout.write(out_str) + else: + with open(out, 'w') as f: + f.write(out_str) + + +_cppfilt_exe = distutils.spawn.find_executable('c++filt') + + +def demangle_symbol(symbol): + if _cppfilt_exe is None: + return symbol + out, _, exit_code = libcxx.util.executeCommandVerbose( + [_cppfilt_exe], input=symbol) + if exit_code != 0: + return symbol + return out + + +def is_elf(filename): + with open(filename, 'rb') as f: + magic_bytes = f.read(4) + return magic_bytes == b'\x7fELF' + + +def is_mach_o(filename): + with open(filename, 'rb') as f: + magic_bytes = f.read(4) + return magic_bytes in [ + '\xfe\xed\xfa\xce', # MH_MAGIC + '\xce\xfa\xed\xfe', # MH_CIGAM + '\xfe\xed\xfa\xcf', # MH_MAGIC_64 + '\xcf\xfa\xed\xfe', # MH_CIGAM_64 + '\xca\xfe\xba\xbe', # FAT_MAGIC + '\xbe\xba\xfe\xca' # FAT_CIGAM + ] + + +def is_library_file(filename): + if sys.platform == 'darwin': + return is_mach_o(filename) + else: + return is_elf(filename) + + +def extract_or_load(filename): + import libcxx.sym_check.extract + if is_library_file(filename): + return libcxx.sym_check.extract.extract_symbols(filename) + return read_syms_from_file(filename) + +def adjust_mangled_name(name): + if not name.startswith('__Z'): + return name + return name[1:] + +new_delete_std_symbols = [ + '_Znam', + '_Znwm', + '_ZdaPv', + '_ZdaPvm', + '_ZdlPv', + '_ZdlPvm' +] + +cxxabi_symbols = [ + '___dynamic_cast', + '___gxx_personality_v0', + '_ZTIDi', + '_ZTIDn', + '_ZTIDs', + '_ZTIPDi', + '_ZTIPDn', + '_ZTIPDs', + '_ZTIPKDi', + '_ZTIPKDn', + '_ZTIPKDs', + '_ZTIPKa', + '_ZTIPKb', + '_ZTIPKc', + '_ZTIPKd', + '_ZTIPKe', + '_ZTIPKf', + '_ZTIPKh', + '_ZTIPKi', + '_ZTIPKj', + '_ZTIPKl', + '_ZTIPKm', + '_ZTIPKs', + '_ZTIPKt', + '_ZTIPKv', + '_ZTIPKw', + '_ZTIPKx', + '_ZTIPKy', + '_ZTIPa', + '_ZTIPb', + '_ZTIPc', + '_ZTIPd', + '_ZTIPe', + '_ZTIPf', + '_ZTIPh', + '_ZTIPi', + '_ZTIPj', + '_ZTIPl', + '_ZTIPm', + '_ZTIPs', + '_ZTIPt', + '_ZTIPv', + '_ZTIPw', + '_ZTIPx', + '_ZTIPy', + '_ZTIa', + '_ZTIb', + '_ZTIc', + '_ZTId', + '_ZTIe', + '_ZTIf', + '_ZTIh', + '_ZTIi', + '_ZTIj', + '_ZTIl', + '_ZTIm', + '_ZTIs', + '_ZTIt', + '_ZTIv', + '_ZTIw', + '_ZTIx', + '_ZTIy', + '_ZTSDi', + '_ZTSDn', + '_ZTSDs', + '_ZTSPDi', + '_ZTSPDn', + '_ZTSPDs', + '_ZTSPKDi', + '_ZTSPKDn', + '_ZTSPKDs', + '_ZTSPKa', + '_ZTSPKb', + '_ZTSPKc', + '_ZTSPKd', + '_ZTSPKe', + '_ZTSPKf', + '_ZTSPKh', + '_ZTSPKi', + '_ZTSPKj', + '_ZTSPKl', + '_ZTSPKm', + '_ZTSPKs', + '_ZTSPKt', + '_ZTSPKv', + '_ZTSPKw', + '_ZTSPKx', + '_ZTSPKy', + '_ZTSPa', + '_ZTSPb', + '_ZTSPc', + '_ZTSPd', + '_ZTSPe', + '_ZTSPf', + '_ZTSPh', + '_ZTSPi', + '_ZTSPj', + '_ZTSPl', + '_ZTSPm', + '_ZTSPs', + '_ZTSPt', + '_ZTSPv', + '_ZTSPw', + '_ZTSPx', + '_ZTSPy', + '_ZTSa', + '_ZTSb', + '_ZTSc', + '_ZTSd', + '_ZTSe', + '_ZTSf', + '_ZTSh', + '_ZTSi', + '_ZTSj', + '_ZTSl', + '_ZTSm', + '_ZTSs', + '_ZTSt', + '_ZTSv', + '_ZTSw', + '_ZTSx', + '_ZTSy' +] + +def is_stdlib_symbol_name(name): + name = adjust_mangled_name(name) + if re.search("@GLIBC|@GCC", name): + return False + if re.search('(St[0-9])|(__cxa)|(__cxxabi)', name): + return True + if name in new_delete_std_symbols: + return True + if name in cxxabi_symbols: + return True + if name.startswith('_Z'): + return True + return False + +def filter_stdlib_symbols(syms): + stdlib_symbols = [] + other_symbols = [] + for s in syms: + canon_name = adjust_mangled_name(s['name']) + if not is_stdlib_symbol_name(canon_name): + assert not s['is_defined'] and "found defined non-std symbol" + other_symbols += [s] + else: + stdlib_symbols += [s] + return stdlib_symbols, other_symbols |