diff options
Diffstat (limited to 'bindings/python')
-rw-r--r-- | bindings/python/README.txt | 18 | ||||
-rw-r--r-- | bindings/python/clang/__init__.py | 24 | ||||
-rw-r--r-- | bindings/python/clang/cindex.py | 933 | ||||
-rw-r--r-- | bindings/python/examples/cindex/cindex-dump.py | 87 | ||||
-rw-r--r-- | bindings/python/examples/cindex/cindex-includes.py | 58 | ||||
-rw-r--r-- | bindings/python/tests/__init__.py | 0 | ||||
-rw-r--r-- | bindings/python/tests/cindex/INPUTS/header1.h | 6 | ||||
-rw-r--r-- | bindings/python/tests/cindex/INPUTS/header2.h | 6 | ||||
-rw-r--r-- | bindings/python/tests/cindex/INPUTS/header3.h | 3 | ||||
-rw-r--r-- | bindings/python/tests/cindex/INPUTS/hello.cpp | 6 | ||||
-rw-r--r-- | bindings/python/tests/cindex/INPUTS/include.cpp | 5 | ||||
-rw-r--r-- | bindings/python/tests/cindex/INPUTS/parse_arguments.c | 2 | ||||
-rw-r--r-- | bindings/python/tests/cindex/__init__.py | 0 | ||||
-rw-r--r-- | bindings/python/tests/cindex/test_cursor.py | 59 | ||||
-rw-r--r-- | bindings/python/tests/cindex/test_cursor_kind.py | 27 | ||||
-rw-r--r-- | bindings/python/tests/cindex/test_diagnostics.py | 48 | ||||
-rw-r--r-- | bindings/python/tests/cindex/test_index.py | 15 | ||||
-rw-r--r-- | bindings/python/tests/cindex/test_translation_unit.py | 73 |
18 files changed, 1370 insertions, 0 deletions
diff --git a/bindings/python/README.txt b/bindings/python/README.txt new file mode 100644 index 000000000000..ccc2619ccf5b --- /dev/null +++ b/bindings/python/README.txt @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// Clang Python Bindings +//===----------------------------------------------------------------------===// + +This directory implements Python bindings for Clang. Currently, only bindings +for the CIndex C API exist. + +You may need to alter LD_LIBRARY_PATH so that the CIndex library can be +found. The unit tests are designed to be run with 'nosetests'. For example: +-- +$ env PYTHONPATH=$(echo ~/llvm/tools/clang/bindings/python/) \ + LD_LIBRARY_PATH=$(llvm-config --libdir) \ + nosetests -v +tests.cindex.test_index.test_create ... ok +... + +OK +-- diff --git a/bindings/python/clang/__init__.py b/bindings/python/clang/__init__.py new file mode 100644 index 000000000000..88f30812383f --- /dev/null +++ b/bindings/python/clang/__init__.py @@ -0,0 +1,24 @@ +#===- __init__.py - Clang Python Bindings --------------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +r""" +Clang Library Bindings +====================== + +This package provides access to the Clang compiler and libraries. + +The available modules are: + + cindex + + Bindings for the Clang indexing library. +""" + +__all__ = ['cindex'] + diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py new file mode 100644 index 000000000000..c37c69b79b38 --- /dev/null +++ b/bindings/python/clang/cindex.py @@ -0,0 +1,933 @@ +#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +r""" +Clang Indexing Library Bindings +=============================== + +This module provides an interface to the Clang indexing library. It is a +low-level interface to the indexing library which attempts to match the Clang +API directly while also being "pythonic". Notable differences from the C API +are: + + * string results are returned as Python strings, not CXString objects. + + * null cursors are translated to None. + + * access to child cursors is done via iteration, not visitation. + +The major indexing objects are: + + Index + + The top-level object which manages some global library state. + + TranslationUnit + + High-level object encapsulating the AST for a single translation unit. These + can be loaded from .ast files or parsed on the fly. + + Cursor + + Generic object for representing a node in the AST. + + SourceRange, SourceLocation, and File + + Objects representing information about the input source. + +Most object information is exposed using properties, when the underlying API +call is efficient. +""" + +# TODO +# ==== +# +# o API support for invalid translation units. Currently we can't even get the +# diagnostics on failure because they refer to locations in an object that +# will have been invalidated. +# +# o fix memory management issues (currently client must hold on to index and +# translation unit, or risk crashes). +# +# o expose code completion APIs. +# +# o cleanup ctypes wrapping, would be nice to separate the ctypes details more +# clearly, and hide from the external interface (i.e., help(cindex)). +# +# o implement additional SourceLocation, SourceRange, and File methods. + +from ctypes import * + +def get_cindex_library(): + # FIXME: It's probably not the case that the library is actually found in + # this location. We need a better system of identifying and loading the + # CIndex library. It could be on path or elsewhere, or versioned, etc. + import platform + name = platform.system() + if name == 'Darwin': + return cdll.LoadLibrary('libCIndex.dylib') + elif name == 'Windows': + return cdll.LoadLibrary('libCIndex.dll') + else: + return cdll.LoadLibrary('libCIndex.so') + +# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper +# object. This is a problem, because it means that from_parameter will see an +# integer and pass the wrong value on platforms where int != void*. Work around +# this by marshalling object arguments as void**. +c_object_p = POINTER(c_void_p) + +lib = get_cindex_library() + +### Structures and Utility Classes ### + +class _CXString(Structure): + """Helper for transforming CXString results.""" + + _fields_ = [("spelling", c_char_p), ("free", c_int)] + + def __del__(self): + _CXString_dispose(self) + + @staticmethod + def from_result(res, fn, args): + assert isinstance(res, _CXString) + return _CXString_getCString(res) + +class SourceLocation(Structure): + """ + A SourceLocation represents a particular location within a source file. + """ + _fields_ = [("ptr_data", c_void_p * 2), ("int_data", c_uint)] + _data = None + + def _get_instantiation(self): + if self._data is None: + f, l, c, o = c_object_p(), c_uint(), c_uint(), c_uint() + SourceLocation_loc(self, byref(f), byref(l), byref(c), byref(o)) + f = File(f) if f else None + self._data = (f, int(l.value), int(c.value), int(c.value)) + return self._data + + @property + def file(self): + """Get the file represented by this source location.""" + return self._get_instantiation()[0] + + @property + def line(self): + """Get the line represented by this source location.""" + return self._get_instantiation()[1] + + @property + def column(self): + """Get the column represented by this source location.""" + return self._get_instantiation()[2] + + @property + def offset(self): + """Get the file offset represented by this source location.""" + return self._get_instantiation()[3] + + def __repr__(self): + return "<SourceLocation file %r, line %r, column %r>" % ( + self.file.name if self.file else None, self.line, self.column) + +class SourceRange(Structure): + """ + A SourceRange describes a range of source locations within the source + code. + """ + _fields_ = [ + ("ptr_data", c_void_p * 2), + ("begin_int_data", c_uint), + ("end_int_data", c_uint)] + + # FIXME: Eliminate this and make normal constructor? Requires hiding ctypes + # object. + @staticmethod + def from_locations(start, end): + return SourceRange_getRange(start, end) + + @property + def start(self): + """ + Return a SourceLocation representing the first character within a + source range. + """ + return SourceRange_start(self) + + @property + def end(self): + """ + Return a SourceLocation representing the last character within a + source range. + """ + return SourceRange_end(self) + + def __repr__(self): + return "<SourceRange start %r, end %r>" % (self.start, self.end) + +class Diagnostic(object): + """ + A Diagnostic is a single instance of a Clang diagnostic. It includes the + diagnostic severity, the message, the location the diagnostic occurred, as + well as additional source ranges and associated fix-it hints. + """ + + Ignored = 0 + Note = 1 + Warning = 2 + Error = 3 + Fatal = 4 + + def __init__(self, severity, location, spelling, ranges, fixits): + self.severity = severity + self.location = location + self.spelling = spelling + self.ranges = ranges + self.fixits = fixits + + def __repr__(self): + return "<Diagnostic severity %r, location %r, spelling %r>" % ( + self.severity, self.location, self.spelling) + +class FixIt(object): + """ + A FixIt represents a transformation to be applied to the source to + "fix-it". The fix-it shouldbe applied by replacing the given source range + with the given value. + """ + + def __init__(self, range, value): + self.range = range + self.value = value + + def __repr__(self): + return "<FixIt range %r, value %r>" % (self.range, self.value) + +### Cursor Kinds ### + +class CursorKind(object): + """ + A CursorKind describes the kind of entity that a cursor points to. + """ + + # The unique kind objects, indexed by id. + _kinds = [] + _name_map = None + + def __init__(self, value): + if value >= len(CursorKind._kinds): + CursorKind._kinds += [None] * (value - len(CursorKind._kinds) + 1) + if CursorKind._kinds[value] is not None: + raise ValueError,'CursorKind already loaded' + self.value = value + CursorKind._kinds[value] = self + CursorKind._name_map = None + + def from_param(self): + return self.value + + @property + def name(self): + """Get the enumeration name of this cursor kind.""" + if self._name_map is None: + self._name_map = {} + for key,value in CursorKind.__dict__.items(): + if isinstance(value,CursorKind): + self._name_map[value] = key + return self._name_map[self] + + @staticmethod + def from_id(id): + if id >= len(CursorKind._kinds) or CursorKind._kinds[id] is None: + raise ValueError,'Unknown cursor kind' + return CursorKind._kinds[id] + + @staticmethod + def get_all_kinds(): + """Return all CursorKind enumeration instances.""" + return filter(None, CursorKind._kinds) + + def is_declaration(self): + """Test if this is a declaration kind.""" + return CursorKind_is_decl(self) + + def is_reference(self): + """Test if this is a reference kind.""" + return CursorKind_is_ref(self) + + def is_expression(self): + """Test if this is an expression kind.""" + return CursorKind_is_expr(self) + + def is_statement(self): + """Test if this is a statement kind.""" + return CursorKind_is_stmt(self) + + def is_invalid(self): + """Test if this is an invalid kind.""" + return CursorKind_is_inv(self) + + def __repr__(self): + return 'CursorKind.%s' % (self.name,) + +# FIXME: Is there a nicer way to expose this enumeration? We could potentially +# represent the nested structure, or even build a class hierarchy. The main +# things we want for sure are (a) simple external access to kinds, (b) a place +# to hang a description and name, (c) easy to keep in sync with Index.h. + +### +# Declaration Kinds + +# A declaration whose specific kind is not exposed via this interface. +# +# Unexposed declarations have the same operations as any other kind of +# declaration; one can extract their location information, spelling, find their +# definitions, etc. However, the specific kind of the declaration is not +# reported. +CursorKind.UNEXPOSED_DECL = CursorKind(1) + +# A C or C++ struct. +CursorKind.STRUCT_DECL = CursorKind(2) + +# A C or C++ union. +CursorKind.UNION_DECL = CursorKind(3) + +# A C++ class. +CursorKind.CLASS_DECL = CursorKind(4) + +# An enumeration. +CursorKind.ENUM_DECL = CursorKind(5) + +# A field (in C) or non-static data member (in C++) in a struct, union, or C++ +# class. +CursorKind.FIELD_DECL = CursorKind(6) + +# An enumerator constant. +CursorKind.ENUM_CONSTANT_DECL = CursorKind(7) + +# A function. +CursorKind.FUNCTION_DECL = CursorKind(8) + +# A variable. +CursorKind.VAR_DECL = CursorKind(9) + +# A function or method parameter. +CursorKind.PARM_DECL = CursorKind(10) + +# An Objective-C @interface. +CursorKind.OBJC_INTERFACE_DECL = CursorKind(11) + +# An Objective-C @interface for a category. +CursorKind.OBJC_CATEGORY_DECL = CursorKind(12) + +# An Objective-C @protocol declaration. +CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13) + +# An Objective-C @property declaration. +CursorKind.OBJC_PROPERTY_DECL = CursorKind(14) + +# An Objective-C instance variable. +CursorKind.OBJC_IVAR_DECL = CursorKind(15) + +# An Objective-C instance method. +CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16) + +# An Objective-C class method. +CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17) + +# An Objective-C @implementation. +CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18) + +# An Objective-C @implementation for a category. +CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19) + +# A typedef. +CursorKind.TYPEDEF_DECL = CursorKind(20) + +### +# Reference Kinds + +CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40) +CursorKind.OBJC_PROTOCOL_REF = CursorKind(41) +CursorKind.OBJC_CLASS_REF = CursorKind(42) + +# A reference to a type declaration. +# +# A type reference occurs anywhere where a type is named but not +# declared. For example, given: +# typedef unsigned size_type; +# size_type size; +# +# The typedef is a declaration of size_type (CXCursor_TypedefDecl), +# while the type of the variable "size" is referenced. The cursor +# referenced by the type of size is the typedef for size_type. +CursorKind.TYPE_REF = CursorKind(43) + +### +# Invalid/Error Kinds + +CursorKind.INVALID_FILE = CursorKind(70) +CursorKind.NO_DECL_FOUND = CursorKind(71) +CursorKind.NOT_IMPLEMENTED = CursorKind(72) + +### +# Expression Kinds + +# An expression whose specific kind is not exposed via this interface. +# +# Unexposed expressions have the same operations as any other kind of +# expression; one can extract their location information, spelling, children, +# etc. However, the specific kind of the expression is not reported. +CursorKind.UNEXPOSED_EXPR = CursorKind(100) + +# An expression that refers to some value declaration, such as a function, +# varible, or enumerator. +CursorKind.DECL_REF_EXPR = CursorKind(101) + +# An expression that refers to a member of a struct, union, class, Objective-C +# class, etc. +CursorKind.MEMBER_REF_EXPR = CursorKind(102) + +# An expression that calls a function. +CursorKind.CALL_EXPR = CursorKind(103) + +# An expression that sends a message to an Objective-C object or class. +CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104) + +# A statement whose specific kind is not exposed via this interface. +# +# Unexposed statements have the same operations as any other kind of statement; +# one can extract their location information, spelling, children, etc. However, +# the specific kind of the statement is not reported. +CursorKind.UNEXPOSED_STMT = CursorKind(200) + +### +# Other Kinds + +# Cursor that represents the translation unit itself. +# +# The translation unit cursor exists primarily to act as the root cursor for +# traversing the contents of a translation unit. +CursorKind.TRANSLATION_UNIT = CursorKind(300) + +### Cursors ### + +class Cursor(Structure): + """ + The Cursor class represents a reference to an element within the AST. It + acts as a kind of iterator. + """ + _fields_ = [("_kind_id", c_int), ("data", c_void_p * 3)] + + def __eq__(self, other): + return Cursor_eq(self, other) + + def __ne__(self, other): + return not Cursor_eq(self, other) + + def is_definition(self): + """ + Returns true if the declaration pointed at by the cursor is also a + definition of that entity. + """ + return Cursor_is_def(self) + + def get_definition(self): + """ + If the cursor is a reference to a declaration or a declaration of + some entity, return a cursor that points to the definition of that + entity. + """ + # TODO: Should probably check that this is either a reference or + # declaration prior to issuing the lookup. + return Cursor_def(self) + + def get_usr(self): + """Return the Unified Symbol Resultion (USR) for the entity referenced + by the given cursor (or None). + + A Unified Symbol Resolution (USR) is a string that identifies a + particular entity (function, class, variable, etc.) within a + program. USRs can be compared across translation units to determine, + e.g., when references in one translation refer to an entity defined in + another translation unit.""" + return Cursor_usr(self) + + @property + def kind(self): + """Return the kind of this cursor.""" + return CursorKind.from_id(self._kind_id) + + @property + def spelling(self): + """Return the spelling of the entity pointed at by the cursor.""" + if not self.kind.is_declaration(): + # FIXME: clang_getCursorSpelling should be fixed to not assert on + # this, for consistency with clang_getCursorUSR. + return None + return Cursor_spelling(self) + + @property + def location(self): + """ + Return the source location (the starting character) of the entity + pointed at by the cursor. + """ + return Cursor_loc(self) + + @property + def extent(self): + """ + Return the source range (the range of text) occupied by the entity + pointed at by the cursor. + """ + return Cursor_extent(self) + + def get_children(self): + """Return an iterator for accessing the children of this cursor.""" + + # FIXME: Expose iteration from CIndex, PR6125. + def visitor(child, parent, children): + # FIXME: Document this assertion in API. + # FIXME: There should just be an isNull method. + assert child != Cursor_null() + children.append(child) + return 1 # continue + children = [] + Cursor_visit(self, Cursor_visit_callback(visitor), children) + return iter(children) + + @staticmethod + def from_result(res, fn, args): + assert isinstance(res, Cursor) + # FIXME: There should just be an isNull method. + if res == Cursor_null(): + return None + return res + +## CIndex Objects ## + +# CIndex objects (derived from ClangObject) are essentially lightweight +# wrappers attached to some underlying object, which is exposed via CIndex as +# a void*. + +class ClangObject(object): + """ + A helper for Clang objects. This class helps act as an intermediary for + the ctypes library and the Clang CIndex library. + """ + def __init__(self, obj): + assert isinstance(obj, c_object_p) and obj + self.obj = self._as_parameter_ = obj + + def from_param(self): + return self._as_parameter_ + + +class _CXUnsavedFile(Structure): + """Helper for passing unsaved file arguments.""" + _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)] + +## Diagnostic Conversion ## + +# Diagnostic objects are temporary, we must extract all the information from the +# diagnostic object when it is passed to the callback. + +_clang_getDiagnosticSeverity = lib.clang_getDiagnosticSeverity +_clang_getDiagnosticSeverity.argtypes = [c_object_p] +_clang_getDiagnosticSeverity.restype = c_int + +_clang_getDiagnosticLocation = lib.clang_getDiagnosticLocation +_clang_getDiagnosticLocation.argtypes = [c_object_p] +_clang_getDiagnosticLocation.restype = SourceLocation + +_clang_getDiagnosticSpelling = lib.clang_getDiagnosticSpelling +_clang_getDiagnosticSpelling.argtypes = [c_object_p] +_clang_getDiagnosticSpelling.restype = _CXString +_clang_getDiagnosticSpelling.errcheck = _CXString.from_result + +_clang_getDiagnosticNumRanges = lib.clang_getDiagnosticNumRanges +_clang_getDiagnosticNumRanges.argtypes = [c_object_p] +_clang_getDiagnosticNumRanges.restype = c_uint + +_clang_getDiagnosticRange = lib.clang_getDiagnosticRange +_clang_getDiagnosticRange.argtypes = [c_object_p, c_uint] +_clang_getDiagnosticRange.restype = SourceRange + +_clang_getDiagnosticNumFixIts = lib.clang_getDiagnosticNumFixIts +_clang_getDiagnosticNumFixIts.argtypes = [c_object_p] +_clang_getDiagnosticNumFixIts.restype = c_uint + +_clang_getDiagnosticFixItKind = lib.clang_getDiagnosticFixItKind +_clang_getDiagnosticFixItKind.argtypes = [c_object_p, c_uint] +_clang_getDiagnosticFixItKind.restype = c_int + +_clang_getDiagnosticFixItInsertion = lib.clang_getDiagnosticFixItInsertion +_clang_getDiagnosticFixItInsertion.argtypes = [c_object_p, c_uint, + POINTER(SourceLocation)] +_clang_getDiagnosticFixItInsertion.restype = _CXString +_clang_getDiagnosticFixItInsertion.errcheck = _CXString.from_result + +_clang_getDiagnosticFixItRemoval = lib.clang_getDiagnosticFixItRemoval +_clang_getDiagnosticFixItRemoval.argtypes = [c_object_p, c_uint, + POINTER(SourceLocation)] +_clang_getDiagnosticFixItRemoval.restype = _CXString +_clang_getDiagnosticFixItRemoval.errcheck = _CXString.from_result + +_clang_getDiagnosticFixItReplacement = lib.clang_getDiagnosticFixItReplacement +_clang_getDiagnosticFixItReplacement.argtypes = [c_object_p, c_uint, + POINTER(SourceRange)] +_clang_getDiagnosticFixItReplacement.restype = _CXString +_clang_getDiagnosticFixItReplacement.errcheck = _CXString.from_result + +def _convert_fixit(diag_ptr, index): + # We normalize all the fix-its to a single representation, this is more + # convenient. + # + # FIXME: Push this back into API? It isn't exactly clear what the + # SourceRange semantics are, we should make sure we can represent an empty + # range. + kind = _clang_getDiagnosticFixItKind(diag_ptr, index) + range = None + value = None + if kind == 0: # insertion + location = SourceLocation() + value = _clang_getDiagnosticFixItInsertion(diag_ptr, index, + byref(location)) + range = SourceRange.from_locations(location, location) + elif kind == 1: # removal + range = _clang_getDiagnosticFixItRemoval(diag_ptr, index) + value = '' + else: # replacement + assert kind == 2 + range = SourceRange() + value = _clang_getDiagnosticFixItReplacement(diag_ptr, index, + byref(range)) + return FixIt(range, value) + +def _convert_diag(diag_ptr, diag_list): + severity = _clang_getDiagnosticSeverity(diag_ptr) + loc = _clang_getDiagnosticLocation(diag_ptr) + spelling = _clang_getDiagnosticSpelling(diag_ptr) + + # Diagnostic ranges. + num_ranges = _clang_getDiagnosticNumRanges(diag_ptr) + ranges = [_clang_getDiagnosticRange(diag_ptr, i) + for i in range(num_ranges)] + + fixits = [_convert_fixit(diag_ptr, i) + for i in range(_clang_getDiagnosticNumFixIts(diag_ptr))] + + diag_list.append(Diagnostic(severity, loc, spelling, ranges, fixits)) + +### + +class Index(ClangObject): + """ + The Index type provides the primary interface to the Clang CIndex library, + primarily by providing an interface for reading and parsing translation + units. + """ + + @staticmethod + def create(excludeDecls=False): + """ + Create a new Index. + Parameters: + excludeDecls -- Exclude local declarations from translation units. + """ + return Index(Index_create(excludeDecls)) + + def __del__(self): + Index_dispose(self) + + def read(self, path): + """Load the translation unit from the given AST file.""" + # FIXME: In theory, we could support streaming diagnostics. It's hard to + # integrate this into the API cleanly, however. Resolve. + diags = [] + ptr = TranslationUnit_read(self, path, + Diagnostic_callback(_convert_diag), diags) + return TranslationUnit(ptr) if ptr else None + + def parse(self, path, args = [], unsaved_files = []): + """ + Load the translation unit from the given source code file by running + clang and generating the AST before loading. Additional command line + parameters can be passed to clang via the args parameter. + + In-memory contents for files can be provided by passing a list of pairs + to as unsaved_files, the first item should be the filenames to be mapped + and the second should be the contents to be substituted for the + file. The contents may be passed as strings or file objects. + """ + arg_array = 0 + if len(args): + arg_array = (c_char_p * len(args))(* args) + unsaved_files_array = 0 + if len(unsaved_files): + unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))() + for i,(name,value) in enumerate(unsaved_files): + if not isinstance(value, str): + # FIXME: It would be great to support an efficient version + # of this, one day. + value = value.read() + print value + if not isinstance(value, str): + raise TypeError,'Unexpected unsaved file contents.' + unsaved_files_array[i].name = name + unsaved_files_array[i].contents = value + unsaved_files_array[i].length = len(value) + # FIXME: In theory, we could support streaming diagnostics. It's hard to + # integrate this into the API cleanly, however. Resolve. + diags = [] + ptr = TranslationUnit_parse(self, path, len(args), arg_array, + len(unsaved_files), unsaved_files_array, + Diagnostic_callback(_convert_diag), diags) + return TranslationUnit(ptr, diags) if ptr else None + + +class TranslationUnit(ClangObject): + """ + The TranslationUnit class represents a source code translation unit and + provides read-only access to its top-level declarations. + """ + + def __init__(self, ptr, diagnostics): + ClangObject.__init__(self, ptr) + self.diagnostics = diagnostics + + def __del__(self): + TranslationUnit_dispose(self) + + @property + def cursor(self): + """Retrieve the cursor that represents the given translation unit.""" + return TranslationUnit_cursor(self) + + @property + def spelling(self): + """Get the original translation unit source file name.""" + return TranslationUnit_spelling(self) + + def get_includes(self): + """ + Return an iterable sequence of FileInclusion objects that describe the + sequence of inclusions in a translation unit. The first object in + this sequence is always the input file. Note that this method will not + recursively iterate over header files included through precompiled + headers. + """ + def visitor(fobj, lptr, depth, includes): + loc = lptr.contents + includes.append(FileInclusion(loc.file, File(fobj), loc, depth)) + + # Automatically adapt CIndex/ctype pointers to python objects + includes = [] + TranslationUnit_includes(self, + TranslationUnit_includes_callback(visitor), + includes) + return iter(includes) + +class File(ClangObject): + """ + The File class represents a particular source file that is part of a + translation unit. + """ + + @property + def name(self): + """Return the complete file and path name of the file.""" + return File_name(self) + + @property + def time(self): + """Return the last modification time of the file.""" + return File_time(self) + +class FileInclusion(object): + """ + The FileInclusion class represents the inclusion of one source file by + another via a '#include' directive or as the input file for the translation + unit. This class provides information about the included file, the including + file, the location of the '#include' directive and the depth of the included + file in the stack. Note that the input file has depth 0. + """ + + def __init__(self, src, tgt, loc, depth): + self.source = src + self.include = tgt + self.location = loc + self.depth = depth + + @property + def is_input_file(self): + """True if the included file is the input file.""" + return self.depth == 0 + +# Additional Functions and Types + +# String Functions +_CXString_dispose = lib.clang_disposeString +_CXString_dispose.argtypes = [_CXString] + +_CXString_getCString = lib.clang_getCString +_CXString_getCString.argtypes = [_CXString] +_CXString_getCString.restype = c_char_p + +# Source Location Functions +SourceLocation_loc = lib.clang_getInstantiationLocation +SourceLocation_loc.argtypes = [SourceLocation, POINTER(c_object_p), + POINTER(c_uint), POINTER(c_uint), + POINTER(c_uint)] + +# Source Range Functions +SourceRange_getRange = lib.clang_getRange +SourceRange_getRange.argtypes = [SourceLocation, SourceLocation] +SourceRange_getRange.restype = SourceRange + +SourceRange_start = lib.clang_getRangeStart +SourceRange_start.argtypes = [SourceRange] +SourceRange_start.restype = SourceLocation + +SourceRange_end = lib.clang_getRangeEnd +SourceRange_end.argtypes = [SourceRange] +SourceRange_end.restype = SourceLocation + +# CursorKind Functions +CursorKind_is_decl = lib.clang_isDeclaration +CursorKind_is_decl.argtypes = [CursorKind] +CursorKind_is_decl.restype = bool + +CursorKind_is_ref = lib.clang_isReference +CursorKind_is_ref.argtypes = [CursorKind] +CursorKind_is_ref.restype = bool + +CursorKind_is_expr = lib.clang_isExpression +CursorKind_is_expr.argtypes = [CursorKind] +CursorKind_is_expr.restype = bool + +CursorKind_is_stmt = lib.clang_isStatement +CursorKind_is_stmt.argtypes = [CursorKind] +CursorKind_is_stmt.restype = bool + +CursorKind_is_inv = lib.clang_isInvalid +CursorKind_is_inv.argtypes = [CursorKind] +CursorKind_is_inv.restype = bool + +# Cursor Functions +# TODO: Implement this function +Cursor_get = lib.clang_getCursor +Cursor_get.argtypes = [TranslationUnit, SourceLocation] +Cursor_get.restype = Cursor + +Cursor_null = lib.clang_getNullCursor +Cursor_null.restype = Cursor + +Cursor_usr = lib.clang_getCursorUSR +Cursor_usr.argtypes = [Cursor] +Cursor_usr.restype = _CXString +Cursor_usr.errcheck = _CXString.from_result + +Cursor_is_def = lib.clang_isCursorDefinition +Cursor_is_def.argtypes = [Cursor] +Cursor_is_def.restype = bool + +Cursor_def = lib.clang_getCursorDefinition +Cursor_def.argtypes = [Cursor] +Cursor_def.restype = Cursor +Cursor_def.errcheck = Cursor.from_result + +Cursor_eq = lib.clang_equalCursors +Cursor_eq.argtypes = [Cursor, Cursor] +Cursor_eq.restype = c_uint + +Cursor_spelling = lib.clang_getCursorSpelling +Cursor_spelling.argtypes = [Cursor] +Cursor_spelling.restype = _CXString +Cursor_spelling.errcheck = _CXString.from_result + +Cursor_loc = lib.clang_getCursorLocation +Cursor_loc.argtypes = [Cursor] +Cursor_loc.restype = SourceLocation + +Cursor_extent = lib.clang_getCursorExtent +Cursor_extent.argtypes = [Cursor] +Cursor_extent.restype = SourceRange + +Cursor_ref = lib.clang_getCursorReferenced +Cursor_ref.argtypes = [Cursor] +Cursor_ref.restype = Cursor +Cursor_ref.errcheck = Cursor.from_result + +Cursor_visit_callback = CFUNCTYPE(c_int, Cursor, Cursor, py_object) +Cursor_visit = lib.clang_visitChildren +Cursor_visit.argtypes = [Cursor, Cursor_visit_callback, py_object] +Cursor_visit.restype = c_uint + +# Index Functions +Index_create = lib.clang_createIndex +Index_create.argtypes = [c_int] +Index_create.restype = c_object_p + +Index_dispose = lib.clang_disposeIndex +Index_dispose.argtypes = [Index] + +# Translation Unit Functions +Diagnostic_callback = CFUNCTYPE(None, c_object_p, py_object) + +TranslationUnit_read = lib.clang_createTranslationUnit +TranslationUnit_read.argtypes = [Index, c_char_p, + Diagnostic_callback, py_object] +TranslationUnit_read.restype = c_object_p + +TranslationUnit_parse = lib.clang_createTranslationUnitFromSourceFile +TranslationUnit_parse.argtypes = [Index, c_char_p, c_int, c_void_p, + c_int, c_void_p, + Diagnostic_callback, py_object] +TranslationUnit_parse.restype = c_object_p + +TranslationUnit_cursor = lib.clang_getTranslationUnitCursor +TranslationUnit_cursor.argtypes = [TranslationUnit] +TranslationUnit_cursor.restype = Cursor +TranslationUnit_cursor.errcheck = Cursor.from_result + +TranslationUnit_spelling = lib.clang_getTranslationUnitSpelling +TranslationUnit_spelling.argtypes = [TranslationUnit] +TranslationUnit_spelling.restype = _CXString +TranslationUnit_spelling.errcheck = _CXString.from_result + +TranslationUnit_dispose = lib.clang_disposeTranslationUnit +TranslationUnit_dispose.argtypes = [TranslationUnit] + +TranslationUnit_includes_callback = CFUNCTYPE(None, + c_object_p, + POINTER(SourceLocation), + c_uint, py_object) +TranslationUnit_includes = lib.clang_getInclusions +TranslationUnit_includes.argtypes = [TranslationUnit, + TranslationUnit_includes_callback, + py_object] + +# File Functions +File_name = lib.clang_getFileName +File_name.argtypes = [File] +File_name.restype = c_char_p + +File_time = lib.clang_getFileTime +File_time.argtypes = [File] +File_time.restype = c_uint + +### + +__all__ = ['Index', 'TranslationUnit', 'Cursor', 'CursorKind', + 'Diagnostic', 'FixIt', 'SourceRange', 'SourceLocation', 'File'] diff --git a/bindings/python/examples/cindex/cindex-dump.py b/bindings/python/examples/cindex/cindex-dump.py new file mode 100644 index 000000000000..af7ddab6ea53 --- /dev/null +++ b/bindings/python/examples/cindex/cindex-dump.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +#===- cindex-dump.py - cindex/Python Source Dump -------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +""" +A simple command line tool for dumping a source file using the Clang Index +Library. +""" + +def get_diag_info(diag): + return { 'severity' : diag.severity, + 'location' : diag.location, + 'spelling' : diag.spelling, + 'ranges' : diag.ranges, + 'fixits' : diag.fixits } + +def get_cursor_id(cursor, cursor_list = []): + if not opts.showIDs: + return None + + if cursor is None: + return None + + # FIXME: This is really slow. It would be nice if the index API exposed + # something that let us hash cursors. + for i,c in enumerate(cursor_list): + if cursor == c: + return i + cursor_list.append(cursor) + return len(cursor_list) - 1 + +def get_info(node, depth=0): + if opts.maxDepth is not None and depth >= opts.maxDepth: + children = None + else: + children = [get_info(c, depth+1) + for c in node.get_children()] + return { 'id' : get_cursor_id(node), + 'kind' : node.kind, + 'usr' : node.get_usr(), + 'spelling' : node.spelling, + 'location' : node.location, + 'extent.start' : node.extent.start, + 'extent.end' : node.extent.end, + 'is_definition' : node.is_definition(), + 'definition id' : get_cursor_id(node.get_definition()), + 'children' : children } + +def main(): + from clang.cindex import Index + from pprint import pprint + + from optparse import OptionParser, OptionGroup + + global opts + + parser = OptionParser("usage: %prog [options] {filename} [clang-args*]") + parser.add_option("", "--show-ids", dest="showIDs", + help="Don't compute cursor IDs (very slow)", + default=False) + parser.add_option("", "--max-depth", dest="maxDepth", + help="Limit cursor expansion to depth N", + metavar="N", type=int, default=None) + parser.disable_interspersed_args() + (opts, args) = parser.parse_args() + + if len(args) == 0: + parser.error('invalid number arguments') + + index = Index.create() + tu = index.parse(None, args) + if not tu: + parser.error("unable to load input") + + pprint(('diags', map(get_diag_info, tu.diagnostics))) + pprint(('nodes', get_info(tu.cursor))) + +if __name__ == '__main__': + main() + diff --git a/bindings/python/examples/cindex/cindex-includes.py b/bindings/python/examples/cindex/cindex-includes.py new file mode 100644 index 000000000000..17500227a349 --- /dev/null +++ b/bindings/python/examples/cindex/cindex-includes.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +#===- cindex-includes.py - cindex/Python Inclusion Graph -----*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +""" +A simple command line tool for dumping a Graphviz description (dot) that +describes include dependencies. +""" + +def main(): + import sys + from clang.cindex import Index + + from optparse import OptionParser, OptionGroup + + parser = OptionParser("usage: %prog [options] {filename} [clang-args*]") + parser.disable_interspersed_args() + (opts, args) = parser.parse_args() + if len(args) == 0: + parser.error('invalid number arguments') + + # FIXME: Add an output file option + out = sys.stdout + + index = Index.create() + tu = index.parse(None, args) + if not tu: + parser.error("unable to load input") + + # A helper function for generating the node name. + def name(f): + if f: + return "\"" + f.name + "\"" + + # Generate the include graph + out.write("digraph G {\n") + for i in tu.get_includes(): + line = " "; + if i.is_input_file: + # Always write the input file as a node just in case it doesn't + # actually include anything. This would generate a 1 node graph. + line += name(i.include) + else: + line += '%s->%s' % (name(i.source), name(i.include)) + line += "\n"; + out.write(line) + out.write("}\n") + +if __name__ == '__main__': + main() + diff --git a/bindings/python/tests/__init__.py b/bindings/python/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/bindings/python/tests/__init__.py diff --git a/bindings/python/tests/cindex/INPUTS/header1.h b/bindings/python/tests/cindex/INPUTS/header1.h new file mode 100644 index 000000000000..b4eacbee3754 --- /dev/null +++ b/bindings/python/tests/cindex/INPUTS/header1.h @@ -0,0 +1,6 @@ +#ifndef HEADER1 +#define HEADER1 + +#include "header3.h" + +#endif diff --git a/bindings/python/tests/cindex/INPUTS/header2.h b/bindings/python/tests/cindex/INPUTS/header2.h new file mode 100644 index 000000000000..c4eddc0c5620 --- /dev/null +++ b/bindings/python/tests/cindex/INPUTS/header2.h @@ -0,0 +1,6 @@ +#ifndef HEADER2 +#define HEADER2 + +#include "header3.h" + +#endif diff --git a/bindings/python/tests/cindex/INPUTS/header3.h b/bindings/python/tests/cindex/INPUTS/header3.h new file mode 100644 index 000000000000..6dca764860e1 --- /dev/null +++ b/bindings/python/tests/cindex/INPUTS/header3.h @@ -0,0 +1,3 @@ +// Not a guarded header! + +void f(); diff --git a/bindings/python/tests/cindex/INPUTS/hello.cpp b/bindings/python/tests/cindex/INPUTS/hello.cpp new file mode 100644 index 000000000000..7ef086e56b2c --- /dev/null +++ b/bindings/python/tests/cindex/INPUTS/hello.cpp @@ -0,0 +1,6 @@ +#include "stdio.h" + +int main(int argc, char* argv[]) { + printf("hello world\n"); + return 0; +} diff --git a/bindings/python/tests/cindex/INPUTS/include.cpp b/bindings/python/tests/cindex/INPUTS/include.cpp new file mode 100644 index 000000000000..60cfdaae4d09 --- /dev/null +++ b/bindings/python/tests/cindex/INPUTS/include.cpp @@ -0,0 +1,5 @@ +#include "header1.h" +#include "header2.h" +#include "header1.h" + +int main() { } diff --git a/bindings/python/tests/cindex/INPUTS/parse_arguments.c b/bindings/python/tests/cindex/INPUTS/parse_arguments.c new file mode 100644 index 000000000000..7196486c78a1 --- /dev/null +++ b/bindings/python/tests/cindex/INPUTS/parse_arguments.c @@ -0,0 +1,2 @@ +int DECL_ONE = 1; +int DECL_TWO = 2; diff --git a/bindings/python/tests/cindex/__init__.py b/bindings/python/tests/cindex/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/bindings/python/tests/cindex/__init__.py diff --git a/bindings/python/tests/cindex/test_cursor.py b/bindings/python/tests/cindex/test_cursor.py new file mode 100644 index 000000000000..a653ba7bf28e --- /dev/null +++ b/bindings/python/tests/cindex/test_cursor.py @@ -0,0 +1,59 @@ +from clang.cindex import Index, CursorKind + +kInput = """\ +// FIXME: Find nicer way to drop builtins and other cruft. +int start_decl; + +struct s0 { + int a; + int b; +}; + +struct s1; + +void f0(int a0, int a1) { + int l0, l1; + + if (a0) + return; + + for (;;) { + break; + } +} +""" + +def test_get_children(): + index = Index.create() + tu = index.parse('t.c', unsaved_files = [('t.c',kInput)]) + + # Skip until past start_decl. + it = tu.cursor.get_children() + while it.next().spelling != 'start_decl': + pass + + tu_nodes = list(it) + + assert len(tu_nodes) == 3 + + assert tu_nodes[0].kind == CursorKind.STRUCT_DECL + assert tu_nodes[0].spelling == 's0' + assert tu_nodes[0].is_definition() == True + assert tu_nodes[0].location.file.name == 't.c' + assert tu_nodes[0].location.line == 4 + assert tu_nodes[0].location.column == 8 + + s0_nodes = list(tu_nodes[0].get_children()) + assert len(s0_nodes) == 2 + assert s0_nodes[0].kind == CursorKind.FIELD_DECL + assert s0_nodes[0].spelling == 'a' + assert s0_nodes[1].kind == CursorKind.FIELD_DECL + assert s0_nodes[1].spelling == 'b' + + assert tu_nodes[1].kind == CursorKind.STRUCT_DECL + assert tu_nodes[1].spelling == 's1' + assert tu_nodes[1].is_definition() == False + + assert tu_nodes[2].kind == CursorKind.FUNCTION_DECL + assert tu_nodes[2].spelling == 'f0' + assert tu_nodes[2].is_definition() == True diff --git a/bindings/python/tests/cindex/test_cursor_kind.py b/bindings/python/tests/cindex/test_cursor_kind.py new file mode 100644 index 000000000000..bdfa31855835 --- /dev/null +++ b/bindings/python/tests/cindex/test_cursor_kind.py @@ -0,0 +1,27 @@ +from clang.cindex import CursorKind + +def test_name(): + assert CursorKind.UNEXPOSED_DECL.name is 'UNEXPOSED_DECL' + +def test_get_all_kinds(): + assert CursorKind.UNEXPOSED_DECL in CursorKind.get_all_kinds() + assert CursorKind.TRANSLATION_UNIT in CursorKind.get_all_kinds() + +def test_kind_groups(): + """Check that every kind classifies to exactly one group.""" + + assert CursorKind.UNEXPOSED_DECL.is_declaration() + assert CursorKind.TYPE_REF.is_reference() + assert CursorKind.DECL_REF_EXPR.is_expression() + assert CursorKind.UNEXPOSED_STMT.is_statement() + assert CursorKind.INVALID_FILE.is_invalid() + + for k in CursorKind.get_all_kinds(): + group = [n for n in ('is_declaration', 'is_reference', 'is_expression', + 'is_statement', 'is_invalid') + if getattr(k, n)()] + + if k == CursorKind.TRANSLATION_UNIT: + assert len(group) == 0 + else: + assert len(group) == 1 diff --git a/bindings/python/tests/cindex/test_diagnostics.py b/bindings/python/tests/cindex/test_diagnostics.py new file mode 100644 index 000000000000..85187652917b --- /dev/null +++ b/bindings/python/tests/cindex/test_diagnostics.py @@ -0,0 +1,48 @@ +from clang.cindex import * + +def tu_from_source(source): + index = Index.create() + tu = index.parse('INPUT.c', unsaved_files = [('INPUT.c', source)]) + # FIXME: Remove the need for this. + tu.index = index + return tu + +# FIXME: We need support for invalid translation units to test better. + +def test_diagnostic_warning(): + tu = tu_from_source("""int f0() {}\n""") + assert len(tu.diagnostics) == 1 + assert tu.diagnostics[0].severity == Diagnostic.Warning + assert tu.diagnostics[0].location.line == 1 + assert tu.diagnostics[0].location.column == 11 + assert (tu.diagnostics[0].spelling == + 'control reaches end of non-void function') + +def test_diagnostic_note(): + # FIXME: We aren't getting notes here for some reason. + index = Index.create() + tu = tu_from_source("""#define A x\nvoid *A = 1;\n""") + assert len(tu.diagnostics) == 1 + assert tu.diagnostics[0].severity == Diagnostic.Warning + assert tu.diagnostics[0].location.line == 2 + assert tu.diagnostics[0].location.column == 7 + assert 'incompatible' in tu.diagnostics[0].spelling +# assert tu.diagnostics[1].severity == Diagnostic.Note +# assert tu.diagnostics[1].location.line == 1 +# assert tu.diagnostics[1].location.column == 11 +# assert tu.diagnostics[1].spelling == 'instantiated from' + +def test_diagnostic_fixit(): + index = Index.create() + tu = tu_from_source("""struct { int f0; } x = { f0 : 1 };""") + assert len(tu.diagnostics) == 1 + assert tu.diagnostics[0].severity == Diagnostic.Warning + assert tu.diagnostics[0].location.line == 1 + assert tu.diagnostics[0].location.column == 31 + assert tu.diagnostics[0].spelling.startswith('use of GNU old-style') + assert len(tu.diagnostics[0].fixits) == 1 + assert tu.diagnostics[0].fixits[0].range.start.line == 1 + assert tu.diagnostics[0].fixits[0].range.start.column == 26 + assert tu.diagnostics[0].fixits[0].range.end.line == 1 + assert tu.diagnostics[0].fixits[0].range.end.column == 30 + assert tu.diagnostics[0].fixits[0].value == '.f0 = ' diff --git a/bindings/python/tests/cindex/test_index.py b/bindings/python/tests/cindex/test_index.py new file mode 100644 index 000000000000..dc173f04d218 --- /dev/null +++ b/bindings/python/tests/cindex/test_index.py @@ -0,0 +1,15 @@ +from clang.cindex import * +import os + +kInputsDir = os.path.join(os.path.dirname(__file__), 'INPUTS') + +def test_create(): + index = Index.create() + +# FIXME: test Index.read + +def test_parse(): + index = Index.create() + assert isinstance(index, Index) + tu = index.parse(os.path.join(kInputsDir, 'hello.cpp')) + assert isinstance(tu, TranslationUnit) diff --git a/bindings/python/tests/cindex/test_translation_unit.py b/bindings/python/tests/cindex/test_translation_unit.py new file mode 100644 index 000000000000..3c05c3f06af4 --- /dev/null +++ b/bindings/python/tests/cindex/test_translation_unit.py @@ -0,0 +1,73 @@ +from clang.cindex import * +import os + +kInputsDir = os.path.join(os.path.dirname(__file__), 'INPUTS') + +def test_spelling(): + path = os.path.join(kInputsDir, 'hello.cpp') + index = Index.create() + tu = index.parse(path) + assert tu.spelling == path + +def test_cursor(): + path = os.path.join(kInputsDir, 'hello.cpp') + index = Index.create() + tu = index.parse(path) + c = tu.cursor + assert isinstance(c, Cursor) + assert c.kind is CursorKind.TRANSLATION_UNIT + +def test_parse_arguments(): + path = os.path.join(kInputsDir, 'parse_arguments.c') + index = Index.create() + tu = index.parse(path, ['-DDECL_ONE=hello', '-DDECL_TWO=hi']) + spellings = [c.spelling for c in tu.cursor.get_children()] + assert spellings[-2] == 'hello' + assert spellings[-1] == 'hi' + +def test_unsaved_files(): + index = Index.create() + # FIXME: Why can't we just use "fake.h" here (instead of /tmp/fake.h)? + tu = index.parse('fake.c', unsaved_files = [ + ('fake.c', """ +#include "/tmp/fake.h" +int x; +int SOME_DEFINE; +"""), + ('/tmp/fake.h', """ +#define SOME_DEFINE y +""") + ]) + spellings = [c.spelling for c in tu.cursor.get_children()] + assert spellings[-2] == 'x' + assert spellings[-1] == 'y' + +def test_unsaved_files_2(): + import StringIO + index = Index.create() + tu = index.parse('fake.c', unsaved_files = [ + ('fake.c', StringIO.StringIO('int x;'))]) + spellings = [c.spelling for c in tu.cursor.get_children()] + assert spellings[-1] == 'x' + + +def test_includes(): + def eq(expected, actual): + if not actual.is_input_file: + return expected[0] == actual.source.name and \ + expected[1] == actual.include.name + else: + return expected[1] == actual.include.name + + src = os.path.join(kInputsDir, 'include.cpp') + h1 = os.path.join(kInputsDir, "header1.h") + h2 = os.path.join(kInputsDir, "header2.h") + h3 = os.path.join(kInputsDir, "header3.h") + inc = [(None, src), (src, h1), (h1, h3), (src, h2), (h2, h3)] + + index = Index.create() + tu = index.parse(src) + for i in zip(inc, tu.get_includes()): + assert eq(i[0], i[1]) + + |