summaryrefslogtreecommitdiff
path: root/utils/libcxx/sym_check
diff options
context:
space:
mode:
Diffstat (limited to 'utils/libcxx/sym_check')
-rw-r--r--utils/libcxx/sym_check/__init__.py17
-rw-r--r--utils/libcxx/sym_check/diff.py103
-rw-r--r--utils/libcxx/sym_check/extract.py194
-rw-r--r--utils/libcxx/sym_check/match.py40
-rw-r--r--utils/libcxx/sym_check/util.py268
5 files changed, 622 insertions, 0 deletions
diff --git a/utils/libcxx/sym_check/__init__.py b/utils/libcxx/sym_check/__init__.py
new file mode 100644
index 0000000000000..1aa2b450c12fb
--- /dev/null
+++ b/utils/libcxx/sym_check/__init__.py
@@ -0,0 +1,17 @@
+#===----------------------------------------------------------------------===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+
+"""libcxx abi symbol checker"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 1, 0)
+__version__ = ' '.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = ['diff', 'extract', 'util']
diff --git a/utils/libcxx/sym_check/diff.py b/utils/libcxx/sym_check/diff.py
new file mode 100644
index 0000000000000..0821ef6f780a4
--- /dev/null
+++ b/utils/libcxx/sym_check/diff.py
@@ -0,0 +1,103 @@
+# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
+#===----------------------------------------------------------------------===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+"""
+diff - A set of functions for diff-ing two symbol lists.
+"""
+
+from libcxx.sym_check import util
+
+
+def _symbol_difference(lhs, rhs):
+ lhs_names = set((n['name'] for n in lhs))
+ rhs_names = set((n['name'] for n in rhs))
+ diff_names = lhs_names - rhs_names
+ return [n for n in lhs if n['name'] in diff_names]
+
+
+def _find_by_key(sym_list, k):
+ for sym in sym_list:
+ if sym['name'] == k:
+ return sym
+ return None
+
+
+def added_symbols(old, new):
+ return _symbol_difference(new, old)
+
+
+def removed_symbols(old, new):
+ return _symbol_difference(old, new)
+
+
+def changed_symbols(old, new):
+ changed = []
+ for old_sym in old:
+ if old_sym in new:
+ continue
+ new_sym = _find_by_key(new, old_sym['name'])
+ if (new_sym is not None and not new_sym in old
+ and old_sym != new_sym):
+ changed += [(old_sym, new_sym)]
+ return changed
+
+
+def diff(old, new):
+ added = added_symbols(old, new)
+ removed = removed_symbols(old, new)
+ changed = changed_symbols(old, new)
+ return added, removed, changed
+
+
+def report_diff(added_syms, removed_syms, changed_syms, names_only=False,
+ demangle=True):
+ def maybe_demangle(name):
+ return util.demangle_symbol(name) if demangle else name
+
+ report = ''
+ for sym in added_syms:
+ report += 'Symbol added: %s\n' % maybe_demangle(sym['name'])
+ if not names_only:
+ report += ' %s\n\n' % sym
+ if added_syms and names_only:
+ report += '\n'
+ for sym in removed_syms:
+ report += 'SYMBOL REMOVED: %s\n' % maybe_demangle(sym['name'])
+ if not names_only:
+ report += ' %s\n\n' % sym
+ if removed_syms and names_only:
+ report += '\n'
+ if not names_only:
+ for sym_pair in changed_syms:
+ old_sym, new_sym = sym_pair
+ old_str = '\n OLD SYMBOL: %s' % old_sym
+ new_str = '\n NEW SYMBOL: %s' % new_sym
+ report += ('SYMBOL CHANGED: %s%s%s\n\n' %
+ (maybe_demangle(old_sym['name']),
+ old_str, new_str))
+
+ added = bool(len(added_syms) != 0)
+ abi_break = bool(len(removed_syms))
+ if not names_only:
+ abi_break = abi_break or len(changed_syms)
+ if added or abi_break:
+ report += 'Summary\n'
+ report += ' Added: %d\n' % len(added_syms)
+ report += ' Removed: %d\n' % len(removed_syms)
+ if not names_only:
+ report += ' Changed: %d\n' % len(changed_syms)
+ if not abi_break:
+ report += 'Symbols added.'
+ else:
+ report += 'ABI BREAKAGE: SYMBOLS ADDED OR REMOVED!'
+ else:
+ report += 'Symbols match.'
+ is_different = abi_break or bool(len(added_syms)) \
+ or bool(len(changed_syms))
+ return report, abi_break, is_different
diff --git a/utils/libcxx/sym_check/extract.py b/utils/libcxx/sym_check/extract.py
new file mode 100644
index 0000000000000..152ff97db1785
--- /dev/null
+++ b/utils/libcxx/sym_check/extract.py
@@ -0,0 +1,194 @@
+# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
+#===----------------------------------------------------------------------===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+"""
+extract - A set of function that extract symbol lists from shared libraries.
+"""
+import distutils.spawn
+import sys
+import re
+
+import libcxx.util
+from libcxx.sym_check import util
+
+extract_ignore_names = ['_init', '_fini']
+
+class NMExtractor(object):
+ """
+ NMExtractor - Extract symbol lists from libraries using nm.
+ """
+
+ @staticmethod
+ def find_tool():
+ """
+ Search for the nm executable and return the path.
+ """
+ return distutils.spawn.find_executable('nm')
+
+ def __init__(self):
+ """
+ Initialize the nm executable and flags that will be used to extract
+ symbols from shared libraries.
+ """
+ self.nm_exe = self.find_tool()
+ if self.nm_exe is None:
+ # ERROR no NM found
+ print("ERROR: Could not find nm")
+ sys.exit(1)
+ self.flags = ['-P', '-g']
+
+ def extract(self, lib):
+ """
+ Extract symbols from a library and return the results as a dict of
+ parsed symbols.
+ """
+ cmd = [self.nm_exe] + self.flags + [lib]
+ out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
+ if exit_code != 0:
+ raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
+ fmt_syms = (self._extract_sym(l)
+ for l in out.splitlines() if l.strip())
+ # Cast symbol to string.
+ final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
+ # Make unique and sort strings.
+ tmp_list = list(sorted(set(final_syms)))
+ # Cast string back to symbol.
+ return util.read_syms_from_list(tmp_list)
+
+ def _extract_sym(self, sym_str):
+ bits = sym_str.split()
+ # Everything we want has at least two columns.
+ if len(bits) < 2:
+ return None
+ new_sym = {
+ 'name': bits[0],
+ 'type': bits[1],
+ 'is_defined': (bits[1].lower() != 'u')
+ }
+ new_sym['name'] = new_sym['name'].replace('@@', '@')
+ new_sym = self._transform_sym_type(new_sym)
+ # NM types which we want to save the size for.
+ if new_sym['type'] == 'OBJECT' and len(bits) > 3:
+ new_sym['size'] = int(bits[3], 16)
+ return new_sym
+
+ @staticmethod
+ def _want_sym(sym):
+ """
+ Check that s is a valid symbol that we want to keep.
+ """
+ if sym is None or len(sym) < 2:
+ return False
+ if sym['name'] in extract_ignore_names:
+ return False
+ bad_types = ['t', 'b', 'r', 'd', 'w']
+ return (sym['type'] not in bad_types
+ and sym['name'] not in ['__bss_start', '_end', '_edata'])
+
+ @staticmethod
+ def _transform_sym_type(sym):
+ """
+ Map the nm single letter output for type to either FUNC or OBJECT.
+ If the type is not recognized it is left unchanged.
+ """
+ func_types = ['T', 'W']
+ obj_types = ['B', 'D', 'R', 'V', 'S']
+ if sym['type'] in func_types:
+ sym['type'] = 'FUNC'
+ elif sym['type'] in obj_types:
+ sym['type'] = 'OBJECT'
+ return sym
+
+class ReadElfExtractor(object):
+ """
+ ReadElfExtractor - Extract symbol lists from libraries using readelf.
+ """
+
+ @staticmethod
+ def find_tool():
+ """
+ Search for the readelf executable and return the path.
+ """
+ return distutils.spawn.find_executable('readelf')
+
+ def __init__(self):
+ """
+ Initialize the readelf executable and flags that will be used to
+ extract symbols from shared libraries.
+ """
+ self.tool = self.find_tool()
+ if self.tool is None:
+ # ERROR no NM found
+ print("ERROR: Could not find readelf")
+ sys.exit(1)
+ self.flags = ['--wide', '--symbols']
+
+ def extract(self, lib):
+ """
+ Extract symbols from a library and return the results as a dict of
+ parsed symbols.
+ """
+ cmd = [self.tool] + self.flags + [lib]
+ out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
+ if exit_code != 0:
+ raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
+ dyn_syms = self.get_dynsym_table(out)
+ return self.process_syms(dyn_syms)
+
+ def process_syms(self, sym_list):
+ new_syms = []
+ for s in sym_list:
+ parts = s.split()
+ if not parts:
+ continue
+ assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
+ if len(parts) == 7:
+ continue
+ new_sym = {
+ 'name': parts[7],
+ 'size': int(parts[2]),
+ 'type': parts[3],
+ 'is_defined': (parts[6] != 'UND')
+ }
+ assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE']
+ if new_sym['name'] in extract_ignore_names:
+ continue
+ if new_sym['type'] == 'NOTYPE':
+ continue
+ if new_sym['type'] == 'FUNC':
+ del new_sym['size']
+ new_syms += [new_sym]
+ return new_syms
+
+ def get_dynsym_table(self, out):
+ lines = out.splitlines()
+ start = -1
+ end = -1
+ for i in range(len(lines)):
+ if lines[i].startswith("Symbol table '.dynsym'"):
+ start = i + 2
+ if start != -1 and end == -1 and not lines[i].strip():
+ end = i + 1
+ assert start != -1
+ if end == -1:
+ end = len(lines)
+ return lines[start:end]
+
+
+def extract_symbols(lib_file):
+ """
+ Extract and return a list of symbols extracted from a dynamic library.
+ The symbols are extracted using NM. They are then filtered and formated.
+ Finally they symbols are made unique.
+ """
+ if ReadElfExtractor.find_tool():
+ extractor = ReadElfExtractor()
+ else:
+ extractor = NMExtractor()
+ return extractor.extract(lib_file)
diff --git a/utils/libcxx/sym_check/match.py b/utils/libcxx/sym_check/match.py
new file mode 100644
index 0000000000000..fae400e4e77e8
--- /dev/null
+++ b/utils/libcxx/sym_check/match.py
@@ -0,0 +1,40 @@
+# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
+#===----------------------------------------------------------------------===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+"""
+match - A set of functions for matching symbols in a list to a list of regexs
+"""
+
+import re
+
+
+def find_and_report_matching(symbol_list, regex_list):
+ report = ''
+ found_count = 0
+ for regex_str in regex_list:
+ report += 'Matching regex "%s":\n' % regex_str
+ matching_list = find_matching_symbols(symbol_list, regex_str)
+ if not matching_list:
+ report += ' No matches found\n\n'
+ continue
+ # else
+ found_count += len(matching_list)
+ for m in matching_list:
+ report += ' MATCHES: %s\n' % m['name']
+ report += '\n'
+ return found_count, report
+
+
+def find_matching_symbols(symbol_list, regex_str):
+ regex = re.compile(regex_str)
+ matching_list = []
+ for s in symbol_list:
+ if regex.match(s['name']):
+ matching_list += [s]
+ return matching_list
diff --git a/utils/libcxx/sym_check/util.py b/utils/libcxx/sym_check/util.py
new file mode 100644
index 0000000000000..8a4c4ab496ff5
--- /dev/null
+++ b/utils/libcxx/sym_check/util.py
@@ -0,0 +1,268 @@
+#===----------------------------------------------------------------------===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+
+import ast
+import distutils.spawn
+import sys
+import re
+import libcxx.util
+
+
+def read_syms_from_list(slist):
+ """
+ Read a list of symbols from a list of strings.
+ Each string is one symbol.
+ """
+ return [ast.literal_eval(l) for l in slist]
+
+
+def read_syms_from_file(filename):
+ """
+ Read a list of symbols in from a file.
+ """
+ with open(filename, 'r') as f:
+ data = f.read()
+ return read_syms_from_list(data.splitlines())
+
+
+def read_blacklist(filename):
+ with open(filename, 'r') as f:
+ data = f.read()
+ lines = [l.strip() for l in data.splitlines() if l.strip()]
+ lines = [l for l in lines if not l.startswith('#')]
+ return lines
+
+
+def write_syms(sym_list, out=None, names_only=False):
+ """
+ Write a list of symbols to the file named by out.
+ """
+ out_str = ''
+ out_list = sym_list
+ out_list.sort(key=lambda x: x['name'])
+ if names_only:
+ out_list = [sym['name'] for sym in sym_list]
+ for sym in out_list:
+ out_str += '%s\n' % sym
+ if out is None:
+ sys.stdout.write(out_str)
+ else:
+ with open(out, 'w') as f:
+ f.write(out_str)
+
+
+_cppfilt_exe = distutils.spawn.find_executable('c++filt')
+
+
+def demangle_symbol(symbol):
+ if _cppfilt_exe is None:
+ return symbol
+ out, _, exit_code = libcxx.util.executeCommandVerbose(
+ [_cppfilt_exe], input=symbol)
+ if exit_code != 0:
+ return symbol
+ return out
+
+
+def is_elf(filename):
+ with open(filename, 'rb') as f:
+ magic_bytes = f.read(4)
+ return magic_bytes == b'\x7fELF'
+
+
+def is_mach_o(filename):
+ with open(filename, 'rb') as f:
+ magic_bytes = f.read(4)
+ return magic_bytes in [
+ '\xfe\xed\xfa\xce', # MH_MAGIC
+ '\xce\xfa\xed\xfe', # MH_CIGAM
+ '\xfe\xed\xfa\xcf', # MH_MAGIC_64
+ '\xcf\xfa\xed\xfe', # MH_CIGAM_64
+ '\xca\xfe\xba\xbe', # FAT_MAGIC
+ '\xbe\xba\xfe\xca' # FAT_CIGAM
+ ]
+
+
+def is_library_file(filename):
+ if sys.platform == 'darwin':
+ return is_mach_o(filename)
+ else:
+ return is_elf(filename)
+
+
+def extract_or_load(filename):
+ import libcxx.sym_check.extract
+ if is_library_file(filename):
+ return libcxx.sym_check.extract.extract_symbols(filename)
+ return read_syms_from_file(filename)
+
+def adjust_mangled_name(name):
+ if not name.startswith('__Z'):
+ return name
+ return name[1:]
+
+new_delete_std_symbols = [
+ '_Znam',
+ '_Znwm',
+ '_ZdaPv',
+ '_ZdaPvm',
+ '_ZdlPv',
+ '_ZdlPvm'
+]
+
+cxxabi_symbols = [
+ '___dynamic_cast',
+ '___gxx_personality_v0',
+ '_ZTIDi',
+ '_ZTIDn',
+ '_ZTIDs',
+ '_ZTIPDi',
+ '_ZTIPDn',
+ '_ZTIPDs',
+ '_ZTIPKDi',
+ '_ZTIPKDn',
+ '_ZTIPKDs',
+ '_ZTIPKa',
+ '_ZTIPKb',
+ '_ZTIPKc',
+ '_ZTIPKd',
+ '_ZTIPKe',
+ '_ZTIPKf',
+ '_ZTIPKh',
+ '_ZTIPKi',
+ '_ZTIPKj',
+ '_ZTIPKl',
+ '_ZTIPKm',
+ '_ZTIPKs',
+ '_ZTIPKt',
+ '_ZTIPKv',
+ '_ZTIPKw',
+ '_ZTIPKx',
+ '_ZTIPKy',
+ '_ZTIPa',
+ '_ZTIPb',
+ '_ZTIPc',
+ '_ZTIPd',
+ '_ZTIPe',
+ '_ZTIPf',
+ '_ZTIPh',
+ '_ZTIPi',
+ '_ZTIPj',
+ '_ZTIPl',
+ '_ZTIPm',
+ '_ZTIPs',
+ '_ZTIPt',
+ '_ZTIPv',
+ '_ZTIPw',
+ '_ZTIPx',
+ '_ZTIPy',
+ '_ZTIa',
+ '_ZTIb',
+ '_ZTIc',
+ '_ZTId',
+ '_ZTIe',
+ '_ZTIf',
+ '_ZTIh',
+ '_ZTIi',
+ '_ZTIj',
+ '_ZTIl',
+ '_ZTIm',
+ '_ZTIs',
+ '_ZTIt',
+ '_ZTIv',
+ '_ZTIw',
+ '_ZTIx',
+ '_ZTIy',
+ '_ZTSDi',
+ '_ZTSDn',
+ '_ZTSDs',
+ '_ZTSPDi',
+ '_ZTSPDn',
+ '_ZTSPDs',
+ '_ZTSPKDi',
+ '_ZTSPKDn',
+ '_ZTSPKDs',
+ '_ZTSPKa',
+ '_ZTSPKb',
+ '_ZTSPKc',
+ '_ZTSPKd',
+ '_ZTSPKe',
+ '_ZTSPKf',
+ '_ZTSPKh',
+ '_ZTSPKi',
+ '_ZTSPKj',
+ '_ZTSPKl',
+ '_ZTSPKm',
+ '_ZTSPKs',
+ '_ZTSPKt',
+ '_ZTSPKv',
+ '_ZTSPKw',
+ '_ZTSPKx',
+ '_ZTSPKy',
+ '_ZTSPa',
+ '_ZTSPb',
+ '_ZTSPc',
+ '_ZTSPd',
+ '_ZTSPe',
+ '_ZTSPf',
+ '_ZTSPh',
+ '_ZTSPi',
+ '_ZTSPj',
+ '_ZTSPl',
+ '_ZTSPm',
+ '_ZTSPs',
+ '_ZTSPt',
+ '_ZTSPv',
+ '_ZTSPw',
+ '_ZTSPx',
+ '_ZTSPy',
+ '_ZTSa',
+ '_ZTSb',
+ '_ZTSc',
+ '_ZTSd',
+ '_ZTSe',
+ '_ZTSf',
+ '_ZTSh',
+ '_ZTSi',
+ '_ZTSj',
+ '_ZTSl',
+ '_ZTSm',
+ '_ZTSs',
+ '_ZTSt',
+ '_ZTSv',
+ '_ZTSw',
+ '_ZTSx',
+ '_ZTSy'
+]
+
+def is_stdlib_symbol_name(name):
+ name = adjust_mangled_name(name)
+ if re.search("@GLIBC|@GCC", name):
+ return False
+ if re.search('(St[0-9])|(__cxa)|(__cxxabi)', name):
+ return True
+ if name in new_delete_std_symbols:
+ return True
+ if name in cxxabi_symbols:
+ return True
+ if name.startswith('_Z'):
+ return True
+ return False
+
+def filter_stdlib_symbols(syms):
+ stdlib_symbols = []
+ other_symbols = []
+ for s in syms:
+ canon_name = adjust_mangled_name(s['name'])
+ if not is_stdlib_symbol_name(canon_name):
+ assert not s['is_defined'] and "found defined non-std symbol"
+ other_symbols += [s]
+ else:
+ stdlib_symbols += [s]
+ return stdlib_symbols, other_symbols