summaryrefslogtreecommitdiff
path: root/tools/scan-build-py/libscanbuild/compilation.py
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-07-23 20:44:14 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-07-23 20:44:14 +0000
commit2b6b257f4e5503a7a2675bdb8735693db769f75c (patch)
treee85e046ae7003fe3bcc8b5454cd0fa3f7407b470 /tools/scan-build-py/libscanbuild/compilation.py
parentb4348ed0b7e90c0831b925fbee00b5f179a99796 (diff)
Notes
Diffstat (limited to 'tools/scan-build-py/libscanbuild/compilation.py')
-rw-r--r--tools/scan-build-py/libscanbuild/compilation.py141
1 files changed, 141 insertions, 0 deletions
diff --git a/tools/scan-build-py/libscanbuild/compilation.py b/tools/scan-build-py/libscanbuild/compilation.py
new file mode 100644
index 0000000000000..ef906fa60b9be
--- /dev/null
+++ b/tools/scan-build-py/libscanbuild/compilation.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+""" This module is responsible for to parse a compiler invocation. """
+
+import re
+import os
+import collections
+
+__all__ = ['split_command', 'classify_source', 'compiler_language']
+
+# Ignored compiler options map for compilation database creation.
+# The map is used in `split_command` method. (Which does ignore and classify
+# parameters.) Please note, that these are not the only parameters which
+# might be ignored.
+#
+# Keys are the option name, value number of options to skip
+IGNORED_FLAGS = {
+ # compiling only flag, ignored because the creator of compilation
+ # database will explicitly set it.
+ '-c': 0,
+ # preprocessor macros, ignored because would cause duplicate entries in
+ # the output (the only difference would be these flags). this is actual
+ # finding from users, who suffered longer execution time caused by the
+ # duplicates.
+ '-MD': 0,
+ '-MMD': 0,
+ '-MG': 0,
+ '-MP': 0,
+ '-MF': 1,
+ '-MT': 1,
+ '-MQ': 1,
+ # linker options, ignored because for compilation database will contain
+ # compilation commands only. so, the compiler would ignore these flags
+ # anyway. the benefit to get rid of them is to make the output more
+ # readable.
+ '-static': 0,
+ '-shared': 0,
+ '-s': 0,
+ '-rdynamic': 0,
+ '-l': 1,
+ '-L': 1,
+ '-u': 1,
+ '-z': 1,
+ '-T': 1,
+ '-Xlinker': 1
+}
+
+# Known C/C++ compiler executable name patterns
+COMPILER_PATTERNS = frozenset([
+ re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
+ re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
+ re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
+ re.compile(r'^llvm-g(cc|\+\+)$'),
+])
+
+
+def split_command(command):
+ """ Returns a value when the command is a compilation, None otherwise.
+
+ The value on success is a named tuple with the following attributes:
+
+ files: list of source files
+ flags: list of compile options
+ compiler: string value of 'c' or 'c++' """
+
+ # the result of this method
+ result = collections.namedtuple('Compilation',
+ ['compiler', 'flags', 'files'])
+ result.compiler = compiler_language(command)
+ result.flags = []
+ result.files = []
+ # quit right now, if the program was not a C/C++ compiler
+ if not result.compiler:
+ return None
+ # iterate on the compile options
+ args = iter(command[1:])
+ for arg in args:
+ # quit when compilation pass is not involved
+ if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
+ return None
+ # ignore some flags
+ elif arg in IGNORED_FLAGS:
+ count = IGNORED_FLAGS[arg]
+ for _ in range(count):
+ next(args)
+ elif re.match(r'^-(l|L|Wl,).+', arg):
+ pass
+ # some parameters could look like filename, take as compile option
+ elif arg in {'-D', '-I'}:
+ result.flags.extend([arg, next(args)])
+ # parameter which looks source file is taken...
+ elif re.match(r'^[^-].+', arg) and classify_source(arg):
+ result.files.append(arg)
+ # and consider everything else as compile option.
+ else:
+ result.flags.append(arg)
+ # do extra check on number of source files
+ return result if result.files else None
+
+
+def classify_source(filename, c_compiler=True):
+ """ Return the language from file name extension. """
+
+ mapping = {
+ '.c': 'c' if c_compiler else 'c++',
+ '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
+ '.ii': 'c++-cpp-output',
+ '.m': 'objective-c',
+ '.mi': 'objective-c-cpp-output',
+ '.mm': 'objective-c++',
+ '.mii': 'objective-c++-cpp-output',
+ '.C': 'c++',
+ '.cc': 'c++',
+ '.CC': 'c++',
+ '.cp': 'c++',
+ '.cpp': 'c++',
+ '.cxx': 'c++',
+ '.c++': 'c++',
+ '.C++': 'c++',
+ '.txx': 'c++'
+ }
+
+ __, extension = os.path.splitext(os.path.basename(filename))
+ return mapping.get(extension)
+
+
+def compiler_language(command):
+ """ A predicate to decide the command is a compiler call or not.
+
+ Returns 'c' or 'c++' when it match. None otherwise. """
+
+ cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
+
+ if command:
+ executable = os.path.basename(command[0])
+ if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
+ return 'c++' if cplusplus.match(executable) else 'c'
+ return None