From 2b6b257f4e5503a7a2675bdb8735693db769f75c Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 23 Jul 2016 20:44:14 +0000 Subject: Vendor import of clang release_39 branch r276489: https://llvm.org/svn/llvm-project/cfe/branches/release_39@276489 --- tools/scan-build-py/libscanbuild/compilation.py | 141 ++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 tools/scan-build-py/libscanbuild/compilation.py (limited to 'tools/scan-build-py/libscanbuild/compilation.py') diff --git a/tools/scan-build-py/libscanbuild/compilation.py b/tools/scan-build-py/libscanbuild/compilation.py new file mode 100644 index 0000000000000..ef906fa60b9be --- /dev/null +++ b/tools/scan-build-py/libscanbuild/compilation.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +""" This module is responsible for to parse a compiler invocation. """ + +import re +import os +import collections + +__all__ = ['split_command', 'classify_source', 'compiler_language'] + +# Ignored compiler options map for compilation database creation. +# The map is used in `split_command` method. (Which does ignore and classify +# parameters.) Please note, that these are not the only parameters which +# might be ignored. +# +# Keys are the option name, value number of options to skip +IGNORED_FLAGS = { + # compiling only flag, ignored because the creator of compilation + # database will explicitly set it. + '-c': 0, + # preprocessor macros, ignored because would cause duplicate entries in + # the output (the only difference would be these flags). this is actual + # finding from users, who suffered longer execution time caused by the + # duplicates. + '-MD': 0, + '-MMD': 0, + '-MG': 0, + '-MP': 0, + '-MF': 1, + '-MT': 1, + '-MQ': 1, + # linker options, ignored because for compilation database will contain + # compilation commands only. so, the compiler would ignore these flags + # anyway. the benefit to get rid of them is to make the output more + # readable. + '-static': 0, + '-shared': 0, + '-s': 0, + '-rdynamic': 0, + '-l': 1, + '-L': 1, + '-u': 1, + '-z': 1, + '-T': 1, + '-Xlinker': 1 +} + +# Known C/C++ compiler executable name patterns +COMPILER_PATTERNS = frozenset([ + re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), + re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), + re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), + re.compile(r'^llvm-g(cc|\+\+)$'), +]) + + +def split_command(command): + """ Returns a value when the command is a compilation, None otherwise. + + The value on success is a named tuple with the following attributes: + + files: list of source files + flags: list of compile options + compiler: string value of 'c' or 'c++' """ + + # the result of this method + result = collections.namedtuple('Compilation', + ['compiler', 'flags', 'files']) + result.compiler = compiler_language(command) + result.flags = [] + result.files = [] + # quit right now, if the program was not a C/C++ compiler + if not result.compiler: + return None + # iterate on the compile options + args = iter(command[1:]) + for arg in args: + # quit when compilation pass is not involved + if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: + return None + # ignore some flags + elif arg in IGNORED_FLAGS: + count = IGNORED_FLAGS[arg] + for _ in range(count): + next(args) + elif re.match(r'^-(l|L|Wl,).+', arg): + pass + # some parameters could look like filename, take as compile option + elif arg in {'-D', '-I'}: + result.flags.extend([arg, next(args)]) + # parameter which looks source file is taken... + elif re.match(r'^[^-].+', arg) and classify_source(arg): + result.files.append(arg) + # and consider everything else as compile option. + else: + result.flags.append(arg) + # do extra check on number of source files + return result if result.files else None + + +def classify_source(filename, c_compiler=True): + """ Return the language from file name extension. """ + + mapping = { + '.c': 'c' if c_compiler else 'c++', + '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', + '.ii': 'c++-cpp-output', + '.m': 'objective-c', + '.mi': 'objective-c-cpp-output', + '.mm': 'objective-c++', + '.mii': 'objective-c++-cpp-output', + '.C': 'c++', + '.cc': 'c++', + '.CC': 'c++', + '.cp': 'c++', + '.cpp': 'c++', + '.cxx': 'c++', + '.c++': 'c++', + '.C++': 'c++', + '.txx': 'c++' + } + + __, extension = os.path.splitext(os.path.basename(filename)) + return mapping.get(extension) + + +def compiler_language(command): + """ A predicate to decide the command is a compiler call or not. + + Returns 'c' or 'c++' when it match. None otherwise. """ + + cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') + + if command: + executable = os.path.basename(command[0]) + if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): + return 'c++' if cplusplus.match(executable) else 'c' + return None -- cgit v1.2.3