diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:44:14 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:44:14 +0000 |
commit | 2b6b257f4e5503a7a2675bdb8735693db769f75c (patch) | |
tree | e85e046ae7003fe3bcc8b5454cd0fa3f7407b470 /tools/scan-build-py/libscanbuild/compilation.py | |
parent | b4348ed0b7e90c0831b925fbee00b5f179a99796 (diff) |
Notes
Diffstat (limited to 'tools/scan-build-py/libscanbuild/compilation.py')
-rw-r--r-- | tools/scan-build-py/libscanbuild/compilation.py | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/tools/scan-build-py/libscanbuild/compilation.py b/tools/scan-build-py/libscanbuild/compilation.py new file mode 100644 index 0000000000000..ef906fa60b9be --- /dev/null +++ b/tools/scan-build-py/libscanbuild/compilation.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +""" This module is responsible for to parse a compiler invocation. """ + +import re +import os +import collections + +__all__ = ['split_command', 'classify_source', 'compiler_language'] + +# Ignored compiler options map for compilation database creation. +# The map is used in `split_command` method. (Which does ignore and classify +# parameters.) Please note, that these are not the only parameters which +# might be ignored. +# +# Keys are the option name, value number of options to skip +IGNORED_FLAGS = { + # compiling only flag, ignored because the creator of compilation + # database will explicitly set it. + '-c': 0, + # preprocessor macros, ignored because would cause duplicate entries in + # the output (the only difference would be these flags). this is actual + # finding from users, who suffered longer execution time caused by the + # duplicates. + '-MD': 0, + '-MMD': 0, + '-MG': 0, + '-MP': 0, + '-MF': 1, + '-MT': 1, + '-MQ': 1, + # linker options, ignored because for compilation database will contain + # compilation commands only. so, the compiler would ignore these flags + # anyway. the benefit to get rid of them is to make the output more + # readable. + '-static': 0, + '-shared': 0, + '-s': 0, + '-rdynamic': 0, + '-l': 1, + '-L': 1, + '-u': 1, + '-z': 1, + '-T': 1, + '-Xlinker': 1 +} + +# Known C/C++ compiler executable name patterns +COMPILER_PATTERNS = frozenset([ + re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), + re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), + re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), + re.compile(r'^llvm-g(cc|\+\+)$'), +]) + + +def split_command(command): + """ Returns a value when the command is a compilation, None otherwise. + + The value on success is a named tuple with the following attributes: + + files: list of source files + flags: list of compile options + compiler: string value of 'c' or 'c++' """ + + # the result of this method + result = collections.namedtuple('Compilation', + ['compiler', 'flags', 'files']) + result.compiler = compiler_language(command) + result.flags = [] + result.files = [] + # quit right now, if the program was not a C/C++ compiler + if not result.compiler: + return None + # iterate on the compile options + args = iter(command[1:]) + for arg in args: + # quit when compilation pass is not involved + if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: + return None + # ignore some flags + elif arg in IGNORED_FLAGS: + count = IGNORED_FLAGS[arg] + for _ in range(count): + next(args) + elif re.match(r'^-(l|L|Wl,).+', arg): + pass + # some parameters could look like filename, take as compile option + elif arg in {'-D', '-I'}: + result.flags.extend([arg, next(args)]) + # parameter which looks source file is taken... + elif re.match(r'^[^-].+', arg) and classify_source(arg): + result.files.append(arg) + # and consider everything else as compile option. + else: + result.flags.append(arg) + # do extra check on number of source files + return result if result.files else None + + +def classify_source(filename, c_compiler=True): + """ Return the language from file name extension. """ + + mapping = { + '.c': 'c' if c_compiler else 'c++', + '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', + '.ii': 'c++-cpp-output', + '.m': 'objective-c', + '.mi': 'objective-c-cpp-output', + '.mm': 'objective-c++', + '.mii': 'objective-c++-cpp-output', + '.C': 'c++', + '.cc': 'c++', + '.CC': 'c++', + '.cp': 'c++', + '.cpp': 'c++', + '.cxx': 'c++', + '.c++': 'c++', + '.C++': 'c++', + '.txx': 'c++' + } + + __, extension = os.path.splitext(os.path.basename(filename)) + return mapping.get(extension) + + +def compiler_language(command): + """ A predicate to decide the command is a compiler call or not. + + Returns 'c' or 'c++' when it match. None otherwise. """ + + cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') + + if command: + executable = os.path.basename(command[0]) + if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): + return 'c++' if cplusplus.match(executable) else 'c' + return None |