diff options
Diffstat (limited to 'contrib/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h')
-rw-r--r-- | contrib/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h | 788 |
1 files changed, 788 insertions, 0 deletions
diff --git a/contrib/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/contrib/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h new file mode 100644 index 000000000000..88c1bb7e729e --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -0,0 +1,788 @@ +//===-- PythonDataObjects.h--------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// !! FIXME FIXME FIXME !! +// +// Python APIs nearly all can return an exception. They do this +// by returning NULL, or -1, or some such value and setting +// the exception state with PyErr_Set*(). Exceptions must be +// handled before further python API functions are called. Failure +// to do so will result in asserts on debug builds of python. +// It will also sometimes, but not usually result in crashes of +// release builds. +// +// Nearly all the code in this header does not handle python exceptions +// correctly. It should all be converted to return Expected<> or +// Error types to capture the exception. +// +// Everything in this file except functions that return Error or +// Expected<> is considered deprecated and should not be +// used in new code. If you need to use it, fix it first. +// +// +// TODOs for this file +// +// * Make all methods safe for exceptions. +// +// * Eliminate method signatures that must translate exceptions into +// empty objects or NULLs. Almost everything here should return +// Expected<>. It should be acceptable for certain operations that +// can never fail to assert instead, such as the creation of +// PythonString from a string literal. +// +// * Eliminate Reset(), and make all non-default constructors private. +// Python objects should be created with Retain<> or Take<>, and they +// should be assigned with operator= +// +// * Eliminate default constructors, make python objects always +// nonnull, and use optionals where necessary. +// + + +#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H +#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H + +#include "lldb/Host/Config.h" + +#if LLDB_ENABLE_PYTHON + +// LLDB Python header must be included first +#include "lldb-python.h" + +#include "lldb/Host/File.h" +#include "lldb/Utility/StructuredData.h" + +#include "llvm/ADT/ArrayRef.h" + +namespace lldb_private { +namespace python { + +class PythonObject; +class PythonBytes; +class PythonString; +class PythonList; +class PythonDictionary; +class PythonInteger; +class PythonException; + +class GIL { +public: + GIL() { + m_state = PyGILState_Ensure(); + assert(!PyErr_Occurred()); + } + ~GIL() { PyGILState_Release(m_state); } + +protected: + PyGILState_STATE m_state; +}; + +enum class PyObjectType { + Unknown, + None, + Boolean, + Integer, + Dictionary, + List, + String, + Bytes, + ByteArray, + Module, + Callable, + Tuple, + File +}; + +enum class PyRefType { + Borrowed, // We are not given ownership of the incoming PyObject. + // We cannot safely hold it without calling Py_INCREF. + Owned // We have ownership of the incoming PyObject. We should + // not call Py_INCREF. +}; + + +// Take a reference that you already own, and turn it into +// a PythonObject. +// +// Most python API methods will return a +1 reference +// if they succeed or NULL if and only if +// they set an exception. Use this to collect such return +// values, after checking for NULL. +// +// If T is not just PythonObject, then obj must be already be +// checked to be of the correct type. +template <typename T> T Take(PyObject *obj) { + assert(obj); + assert(!PyErr_Occurred()); + T thing(PyRefType::Owned, obj); + assert(thing.IsValid()); + return thing; +} + +// Retain a reference you have borrowed, and turn it into +// a PythonObject. +// +// A minority of python APIs return a borrowed reference +// instead of a +1. They will also return NULL if and only +// if they set an exception. Use this to collect such return +// values, after checking for NULL. +// +// If T is not just PythonObject, then obj must be already be +// checked to be of the correct type. +template <typename T> T Retain(PyObject *obj) { + assert(obj); + assert(!PyErr_Occurred()); + T thing(PyRefType::Borrowed, obj); + assert(thing.IsValid()); + return thing; +} + +// This class can be used like a utility function to convert from +// a llvm-friendly Twine into a null-terminated const char *, +// which is the form python C APIs want their strings in. +// +// Example: +// const llvm::Twine &some_twine; +// PyFoo_Bar(x, y, z, NullTerminated(some_twine)); +// +// Why a class instead of a function? If the twine isn't already null +// terminated, it will need a temporary buffer to copy the string +// into. We need that buffer to stick around for the lifetime of the +// statement. +class NullTerminated { + const char *str; + llvm::SmallString<32> storage; + +public: + NullTerminated(const llvm::Twine &twine) { + llvm::StringRef ref = twine.toNullTerminatedStringRef(storage); + str = ref.begin(); + } + operator const char *() { return str; } +}; + +inline llvm::Error nullDeref() { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "A NULL PyObject* was dereferenced"); +} + +inline llvm::Error exception(const char *s = nullptr) { + return llvm::make_error<PythonException>(s); +} + +inline llvm::Error keyError() { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "key not in dict"); +} + +inline const char *py2_const_cast(const char *s) { return s; } + +enum class PyInitialValue { Invalid, Empty }; + +// DOC: https://docs.python.org/3/c-api/arg.html#building-values +template <typename T, typename Enable = void> struct PythonFormat; + +template <typename T, char F> struct PassthroughFormat { + static constexpr char format = F; + static constexpr T get(T t) { return t; } +}; + +template <> struct PythonFormat<char *> : PassthroughFormat<char *, 's'> {}; +template <> struct PythonFormat<const char *> : + PassthroughFormat<const char *, 's'> {}; +template <> struct PythonFormat<char> : PassthroughFormat<char, 'b'> {}; +template <> +struct PythonFormat<unsigned char> : PassthroughFormat<unsigned char, 'B'> {}; +template <> struct PythonFormat<short> : PassthroughFormat<short, 'h'> {}; +template <> +struct PythonFormat<unsigned short> : PassthroughFormat<unsigned short, 'H'> {}; +template <> struct PythonFormat<int> : PassthroughFormat<int, 'i'> {}; +template <> struct PythonFormat<bool> : PassthroughFormat<bool, 'p'> {}; +template <> +struct PythonFormat<unsigned int> : PassthroughFormat<unsigned int, 'I'> {}; +template <> struct PythonFormat<long> : PassthroughFormat<long, 'l'> {}; +template <> +struct PythonFormat<unsigned long> : PassthroughFormat<unsigned long, 'k'> {}; +template <> +struct PythonFormat<long long> : PassthroughFormat<long long, 'L'> {}; +template <> +struct PythonFormat<unsigned long long> + : PassthroughFormat<unsigned long long, 'K'> {}; +template <> +struct PythonFormat<PyObject *> : PassthroughFormat<PyObject *, 'O'> {}; + +template <typename T> +struct PythonFormat< + T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> { + static constexpr char format = 'O'; + static auto get(const T &value) { return value.get(); } +}; + +class PythonObject { +public: + PythonObject() = default; + + PythonObject(PyRefType type, PyObject *py_obj) { + m_py_obj = py_obj; + // If this is a borrowed reference, we need to convert it to + // an owned reference by incrementing it. If it is an owned + // reference (for example the caller allocated it with PyDict_New() + // then we must *not* increment it. + if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed) + Py_XINCREF(m_py_obj); + } + + PythonObject(const PythonObject &rhs) + : PythonObject(PyRefType::Borrowed, rhs.m_py_obj) {} + + PythonObject(PythonObject &&rhs) { + m_py_obj = rhs.m_py_obj; + rhs.m_py_obj = nullptr; + } + + ~PythonObject() { Reset(); } + + void Reset(); + + void Dump() const { + if (m_py_obj) + _PyObject_Dump(m_py_obj); + else + puts("NULL"); + } + + void Dump(Stream &strm) const; + + PyObject *get() const { return m_py_obj; } + + PyObject *release() { + PyObject *result = m_py_obj; + m_py_obj = nullptr; + return result; + } + + PythonObject &operator=(PythonObject other) { + Reset(); + m_py_obj = std::exchange(other.m_py_obj, nullptr); + return *this; + } + + PyObjectType GetObjectType() const; + + PythonString Repr() const; + + PythonString Str() const; + + static PythonObject ResolveNameWithDictionary(llvm::StringRef name, + const PythonDictionary &dict); + + template <typename T> + static T ResolveNameWithDictionary(llvm::StringRef name, + const PythonDictionary &dict) { + return ResolveNameWithDictionary(name, dict).AsType<T>(); + } + + PythonObject ResolveName(llvm::StringRef name) const; + + template <typename T> T ResolveName(llvm::StringRef name) const { + return ResolveName(name).AsType<T>(); + } + + bool HasAttribute(llvm::StringRef attribute) const; + + PythonObject GetAttributeValue(llvm::StringRef attribute) const; + + bool IsNone() const { return m_py_obj == Py_None; } + + bool IsValid() const { return m_py_obj != nullptr; } + + bool IsAllocated() const { return IsValid() && !IsNone(); } + + explicit operator bool() const { return IsValid() && !IsNone(); } + + template <typename T> T AsType() const { + if (!T::Check(m_py_obj)) + return T(); + return T(PyRefType::Borrowed, m_py_obj); + } + + StructuredData::ObjectSP CreateStructuredObject() const; + + template <typename... T> + llvm::Expected<PythonObject> CallMethod(const char *name, + const T &... t) const { + const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; + PyObject *obj = + PyObject_CallMethod(m_py_obj, py2_const_cast(name), + py2_const_cast(format), PythonFormat<T>::get(t)...); + if (!obj) + return exception(); + return python::Take<PythonObject>(obj); + } + + template <typename... T> + llvm::Expected<PythonObject> Call(const T &... t) const { + const char format[] = {'(', PythonFormat<T>::format..., ')', 0}; + PyObject *obj = PyObject_CallFunction(m_py_obj, py2_const_cast(format), + PythonFormat<T>::get(t)...); + if (!obj) + return exception(); + return python::Take<PythonObject>(obj); + } + + llvm::Expected<PythonObject> GetAttribute(const llvm::Twine &name) const { + if (!m_py_obj) + return nullDeref(); + PyObject *obj = PyObject_GetAttrString(m_py_obj, NullTerminated(name)); + if (!obj) + return exception(); + return python::Take<PythonObject>(obj); + } + + llvm::Expected<PythonObject> GetType() const { + if (!m_py_obj) + return nullDeref(); + PyObject *obj = PyObject_Type(m_py_obj); + if (!obj) + return exception(); + return python::Take<PythonObject>(obj); + } + + llvm::Expected<bool> IsTrue() { + if (!m_py_obj) + return nullDeref(); + int r = PyObject_IsTrue(m_py_obj); + if (r < 0) + return exception(); + return !!r; + } + + llvm::Expected<long long> AsLongLong() const; + + llvm::Expected<unsigned long long> AsUnsignedLongLong() const; + + // wraps on overflow, instead of raising an error. + llvm::Expected<unsigned long long> AsModuloUnsignedLongLong() const; + + llvm::Expected<bool> IsInstance(const PythonObject &cls) { + if (!m_py_obj || !cls.IsValid()) + return nullDeref(); + int r = PyObject_IsInstance(m_py_obj, cls.get()); + if (r < 0) + return exception(); + return !!r; + } + +protected: + PyObject *m_py_obj = nullptr; +}; + + +// This is why C++ needs monads. +template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) { + if (!obj) + return obj.takeError(); + if (!T::Check(obj.get().get())) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "type error"); + return T(PyRefType::Borrowed, std::move(obj.get().get())); +} + +template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj); + +template <> +llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj); + +template <> +llvm::Expected<unsigned long long> +As<unsigned long long>(llvm::Expected<PythonObject> &&obj); + +template <> +llvm::Expected<std::string> As<std::string>(llvm::Expected<PythonObject> &&obj); + + +template <class T> class TypedPythonObject : public PythonObject { +public: + TypedPythonObject(PyRefType type, PyObject *py_obj) { + if (!py_obj) + return; + if (T::Check(py_obj)) + PythonObject::operator=(PythonObject(type, py_obj)); + else if (type == PyRefType::Owned) + Py_DECREF(py_obj); + } + + TypedPythonObject() = default; +}; + +class PythonBytes : public TypedPythonObject<PythonBytes> { +public: + using TypedPythonObject::TypedPythonObject; + explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes); + PythonBytes(const uint8_t *bytes, size_t length); + + static bool Check(PyObject *py_obj); + + llvm::ArrayRef<uint8_t> GetBytes() const; + + size_t GetSize() const; + + void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); + + StructuredData::StringSP CreateStructuredString() const; +}; + +class PythonByteArray : public TypedPythonObject<PythonByteArray> { +public: + using TypedPythonObject::TypedPythonObject; + explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes); + PythonByteArray(const uint8_t *bytes, size_t length); + PythonByteArray(const PythonBytes &object); + + static bool Check(PyObject *py_obj); + + llvm::ArrayRef<uint8_t> GetBytes() const; + + size_t GetSize() const; + + void SetBytes(llvm::ArrayRef<uint8_t> stringbytes); + + StructuredData::StringSP CreateStructuredString() const; +}; + +class PythonString : public TypedPythonObject<PythonString> { +public: + using TypedPythonObject::TypedPythonObject; + static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string); + + PythonString() : TypedPythonObject() {} // MSVC requires this for some reason + + explicit PythonString(llvm::StringRef string); // safe, null on error + + static bool Check(PyObject *py_obj); + + llvm::StringRef GetString() const; // safe, empty string on error + + llvm::Expected<llvm::StringRef> AsUTF8() const; + + size_t GetSize() const; + + void SetString(llvm::StringRef string); // safe, null on error + + StructuredData::StringSP CreateStructuredString() const; +}; + +class PythonInteger : public TypedPythonObject<PythonInteger> { +public: + using TypedPythonObject::TypedPythonObject; + + PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason + + explicit PythonInteger(int64_t value); + + static bool Check(PyObject *py_obj); + + void SetInteger(int64_t value); + + StructuredData::IntegerSP CreateStructuredInteger() const; + + StructuredData::UnsignedIntegerSP CreateStructuredUnsignedInteger() const; + + StructuredData::SignedIntegerSP CreateStructuredSignedInteger() const; +}; + +class PythonBoolean : public TypedPythonObject<PythonBoolean> { +public: + using TypedPythonObject::TypedPythonObject; + + explicit PythonBoolean(bool value); + + static bool Check(PyObject *py_obj); + + bool GetValue() const; + + void SetValue(bool value); + + StructuredData::BooleanSP CreateStructuredBoolean() const; +}; + +class PythonList : public TypedPythonObject<PythonList> { +public: + using TypedPythonObject::TypedPythonObject; + + PythonList() : TypedPythonObject() {} // MSVC requires this for some reason + + explicit PythonList(PyInitialValue value); + explicit PythonList(int list_size); + + static bool Check(PyObject *py_obj); + + uint32_t GetSize() const; + + PythonObject GetItemAtIndex(uint32_t index) const; + + void SetItemAtIndex(uint32_t index, const PythonObject &object); + + void AppendItem(const PythonObject &object); + + StructuredData::ArraySP CreateStructuredArray() const; +}; + +class PythonTuple : public TypedPythonObject<PythonTuple> { +public: + using TypedPythonObject::TypedPythonObject; + + explicit PythonTuple(PyInitialValue value); + explicit PythonTuple(int tuple_size); + PythonTuple(std::initializer_list<PythonObject> objects); + PythonTuple(std::initializer_list<PyObject *> objects); + + static bool Check(PyObject *py_obj); + + uint32_t GetSize() const; + + PythonObject GetItemAtIndex(uint32_t index) const; + + void SetItemAtIndex(uint32_t index, const PythonObject &object); + + StructuredData::ArraySP CreateStructuredArray() const; +}; + +class PythonDictionary : public TypedPythonObject<PythonDictionary> { +public: + using TypedPythonObject::TypedPythonObject; + + PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason + + explicit PythonDictionary(PyInitialValue value); + + static bool Check(PyObject *py_obj); + + bool HasKey(const llvm::Twine &key) const; + + uint32_t GetSize() const; + + PythonList GetKeys() const; + + PythonObject GetItemForKey(const PythonObject &key) const; // DEPRECATED + void SetItemForKey(const PythonObject &key, + const PythonObject &value); // DEPRECATED + + llvm::Expected<PythonObject> GetItem(const PythonObject &key) const; + llvm::Expected<PythonObject> GetItem(const llvm::Twine &key) const; + llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const; + llvm::Error SetItem(const llvm::Twine &key, const PythonObject &value) const; + + StructuredData::DictionarySP CreateStructuredDictionary() const; +}; + +class PythonModule : public TypedPythonObject<PythonModule> { +public: + using TypedPythonObject::TypedPythonObject; + + static bool Check(PyObject *py_obj); + + static PythonModule BuiltinsModule(); + + static PythonModule MainModule(); + + static PythonModule AddModule(llvm::StringRef module); + + // safe, returns invalid on error; + static PythonModule ImportModule(llvm::StringRef name) { + std::string s = std::string(name); + auto mod = Import(s.c_str()); + if (!mod) { + llvm::consumeError(mod.takeError()); + return PythonModule(); + } + return std::move(mod.get()); + } + + static llvm::Expected<PythonModule> Import(const llvm::Twine &name); + + llvm::Expected<PythonObject> Get(const llvm::Twine &name); + + PythonDictionary GetDictionary() const; +}; + +class PythonCallable : public TypedPythonObject<PythonCallable> { +public: + using TypedPythonObject::TypedPythonObject; + + struct ArgInfo { + /* the largest number of positional arguments this callable + * can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs + * function and can accept an arbitrary number */ + unsigned max_positional_args; + static constexpr unsigned UNBOUNDED = UINT_MAX; // FIXME c++17 inline + }; + + static bool Check(PyObject *py_obj); + + llvm::Expected<ArgInfo> GetArgInfo() const; + + PythonObject operator()(); + + PythonObject operator()(std::initializer_list<PyObject *> args); + + PythonObject operator()(std::initializer_list<PythonObject> args); + + template <typename Arg, typename... Args> + PythonObject operator()(const Arg &arg, Args... args) { + return operator()({arg, args...}); + } +}; + +class PythonFile : public TypedPythonObject<PythonFile> { +public: + using TypedPythonObject::TypedPythonObject; + + PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason + + static bool Check(PyObject *py_obj); + + static llvm::Expected<PythonFile> FromFile(File &file, + const char *mode = nullptr); + + llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false); + llvm::Expected<lldb::FileSP> + ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false); +}; + +class PythonException : public llvm::ErrorInfo<PythonException> { +private: + PyObject *m_exception_type, *m_exception, *m_traceback; + PyObject *m_repr_bytes; + +public: + static char ID; + const char *toCString() const; + PythonException(const char *caller = nullptr); + void Restore(); + ~PythonException() override; + void log(llvm::raw_ostream &OS) const override; + std::error_code convertToErrorCode() const override; + bool Matches(PyObject *exc) const; + std::string ReadBacktrace() const; +}; + +// This extracts the underlying T out of an Expected<T> and returns it. +// If the Expected is an Error instead of a T, that error will be converted +// into a python exception, and this will return a default-constructed T. +// +// This is appropriate for use right at the boundary of python calling into +// C++, such as in a SWIG typemap. In such a context you should simply +// check if the returned T is valid, and if it is, return a NULL back +// to python. This will result in the Error being raised as an exception +// from python code's point of view. +// +// For example: +// ``` +// Expected<Foo *> efoop = some_cpp_function(); +// Foo *foop = unwrapOrSetPythonException(efoop); +// if (!foop) +// return NULL; +// do_something(*foop); +// +// If the Error returned was itself created because a python exception was +// raised when C++ code called into python, then the original exception +// will be restored. Otherwise a simple string exception will be raised. +template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) { + if (expected) + return expected.get(); + llvm::handleAllErrors( + expected.takeError(), [](PythonException &E) { E.Restore(); }, + [](const llvm::ErrorInfoBase &E) { + PyErr_SetString(PyExc_Exception, E.message().c_str()); + }); + return T(); +} + +// This is only here to help incrementally migrate old, exception-unsafe +// code. +template <typename T> T unwrapIgnoringErrors(llvm::Expected<T> expected) { + if (expected) + return std::move(expected.get()); + llvm::consumeError(expected.takeError()); + return T(); +} + +llvm::Expected<PythonObject> runStringOneLine(const llvm::Twine &string, + const PythonDictionary &globals, + const PythonDictionary &locals); + +llvm::Expected<PythonObject> runStringMultiLine(const llvm::Twine &string, + const PythonDictionary &globals, + const PythonDictionary &locals); + +// Sometimes the best way to interact with a python interpreter is +// to run some python code. You construct a PythonScript with +// script string. The script assigns some function to `_function_` +// and you get a C++ callable object that calls the python function. +// +// Example: +// +// const char script[] = R"( +// def main(x, y): +// .... +// )"; +// +// Expected<PythonObject> cpp_foo_wrapper(PythonObject x, PythonObject y) { +// // no need to synchronize access to this global, we already have the GIL +// static PythonScript foo(script) +// return foo(x, y); +// } +class PythonScript { + const char *script; + PythonCallable function; + + llvm::Error Init(); + +public: + PythonScript(const char *script) : script(script), function() {} + + template <typename... Args> + llvm::Expected<PythonObject> operator()(Args &&... args) { + if (llvm::Error error = Init()) + return std::move(error); + return function.Call(std::forward<Args>(args)...); + } +}; + +class StructuredPythonObject : public StructuredData::Generic { +public: + StructuredPythonObject() : StructuredData::Generic() {} + + // Take ownership of the object we received. + StructuredPythonObject(PythonObject obj) + : StructuredData::Generic(obj.release()) {} + + ~StructuredPythonObject() override { + // Hand ownership back to a (temporary) PythonObject instance and let it + // take care of releasing it. + PythonObject(PyRefType::Owned, static_cast<PyObject *>(GetValue())); + } + + bool IsValid() const override { return GetValue() && GetValue() != Py_None; } + + void Serialize(llvm::json::OStream &s) const override; + +private: + StructuredPythonObject(const StructuredPythonObject &) = delete; + const StructuredPythonObject & + operator=(const StructuredPythonObject &) = delete; +}; + +} // namespace python +} // namespace lldb_private + +#endif + +#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H |