diff options
Diffstat (limited to 'python/tre-python.c')
-rw-r--r-- | python/tre-python.c | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/python/tre-python.c b/python/tre-python.c new file mode 100644 index 0000000000000..bbb24edfadd96 --- /dev/null +++ b/python/tre-python.c @@ -0,0 +1,549 @@ +/* + tre-python.c - TRE Python language bindings + + This sotfware is released under a BSD-style license. + See the file LICENSE for details and copyright. + + The original version of this code was contributed by + Nikolai Saoukh <nms+python@otdel1.org>. + +*/ + + +#include "Python.h" +#include "structmember.h" + +#include <tre/tre.h> + +#define TRE_MODULE "tre" + +typedef struct { + PyObject_HEAD + regex_t rgx; + int flags; +} TrePatternObject; + +typedef struct { + PyObject_HEAD + regaparams_t ap; +} TreFuzzynessObject; + +typedef struct { + PyObject_HEAD + regamatch_t am; + PyObject *targ; /* string we matched against */ + TreFuzzynessObject *fz; /* fuzzyness used during match */ +} TreMatchObject; + + +static PyObject *ErrorObject; + +static void +_set_tre_err(int rc, regex_t *rgx) +{ + PyObject *errval; + char emsg[256]; + size_t elen; + + elen = tre_regerror(rc, rgx, emsg, sizeof(emsg)); + if (emsg[elen] == '\0') + elen--; + errval = Py_BuildValue("s#", emsg, elen); + PyErr_SetObject(ErrorObject, errval); + Py_XDECREF(errval); +} + +static PyObject * +TreFuzzyness_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = { + "delcost", "inscost", "maxcost", "subcost", + "maxdel", "maxerr", "maxins", "maxsub", + NULL + }; + + TreFuzzynessObject *self; + + self = (TreFuzzynessObject*)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + tre_regaparams_default(&self->ap); + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiiii", kwlist, + &self->ap.cost_del, &self->ap.cost_ins, + &self->ap.max_cost, &self->ap.cost_subst, + &self->ap.max_del, &self->ap.max_err, + &self->ap.max_ins, &self->ap.max_subst)) + { + Py_DECREF(self); + return NULL; + } + return (PyObject*)self; +} + +static PyObject * +TreFuzzyness_repr(PyObject *obj) +{ + TreFuzzynessObject *self = (TreFuzzynessObject*)obj; + PyObject *o; + + o = PyString_FromFormat("%s(delcost=%d,inscost=%d,maxcost=%d,subcost=%d," + "maxdel=%d,maxerr=%d,maxins=%d,maxsub=%d)", + self->ob_type->tp_name, self->ap.cost_del, + self->ap.cost_ins, self->ap.max_cost, + self->ap.cost_subst, self->ap.max_del, + self->ap.max_err, self->ap.max_ins, + self->ap.max_subst); + return o; +} + +static PyMemberDef TreFuzzyness_members[] = { + { "delcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_del), 0, + "The cost of a deleted character" }, + { "inscost", T_INT, offsetof(TreFuzzynessObject, ap.cost_ins), 0, + "The cost of an inserted character" }, + { "maxcost", T_INT, offsetof(TreFuzzynessObject, ap.max_cost), 0, + "The maximum allowed cost of a match. If this is set to zero, an exact " + "match is searched for" }, + { "subcost", T_INT, offsetof(TreFuzzynessObject, ap.cost_subst), 0, + "The cost of a substituted character" }, + { "maxdel", T_INT, offsetof(TreFuzzynessObject, ap.max_del), 0, + "Maximum allowed number of deleted characters" }, + { "maxerr", T_INT, offsetof(TreFuzzynessObject, ap.max_err), 0, + "Maximum allowed number of errors (inserts + deletes + substitutes)" }, + { "maxins", T_INT, offsetof(TreFuzzynessObject, ap.max_ins), 0, + "Maximum allowed number of inserted characters" }, + { "maxsub", T_INT, offsetof(TreFuzzynessObject, ap.max_subst), 0, + "Maximum allowed number of substituted characters" }, + { NULL } +}; + +static PyTypeObject TreFuzzynessType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + TRE_MODULE ".Fuzzyness", /* tp_name */ + sizeof(TreFuzzynessObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + TreFuzzyness_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + /* tp_doc */ + TRE_MODULE ".fuzzyness object holds approximation parameters for match", + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + TreFuzzyness_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + TreFuzzyness_new /* tp_new */ +}; + +static PyObject * +PyTreMatch_groups(TreMatchObject *self, PyObject *dummy) +{ + PyObject *result; + size_t i; + + if (self->am.nmatch < 1) + { + Py_INCREF(Py_None); + return Py_None; + } + result = PyTuple_New(self->am.nmatch); + for (i = 0; i < self->am.nmatch; i++) + { + PyObject *range; + regmatch_t *rm = &self->am.pmatch[i]; + + if (rm->rm_so == (-1) && rm->rm_eo == (-1)) + { + Py_INCREF(Py_None); + range = Py_None; + } + else + { + range = Py_BuildValue("(ii)", rm->rm_so, rm->rm_eo); + } + PyTuple_SetItem(result, i, range); + } + return (PyObject*)result; +} + +static PyObject * +PyTreMatch_groupi(PyObject *obj, int gn) +{ + TreMatchObject *self = (TreMatchObject*)obj; + PyObject *result; + regmatch_t *rm; + + if (gn < 0 || (size_t)gn > self->am.nmatch - 1) + { + PyErr_SetString(PyExc_ValueError, "out of bounds"); + return NULL; + } + rm = &self->am.pmatch[gn]; + if (rm->rm_so == (-1) && rm->rm_eo == (-1)) + { + Py_INCREF(Py_None); + return Py_None; + } + result = PySequence_GetSlice(self->targ, rm->rm_so, rm->rm_eo); + return result; +} + +static PyObject * +PyTreMatch_group(TreMatchObject *self, PyObject *grpno) +{ + PyObject *result; + long gn; + + gn = PyInt_AsLong(grpno); + + if (PyErr_Occurred()) + return NULL; + + result = PyTreMatch_groupi((PyObject*)self, gn); + return result; +} + +static PyMethodDef TreMatch_methods[] = { + {"group", (PyCFunction)PyTreMatch_group, METH_O, + "return submatched string or None if a parenthesized subexpression did " + "not participate in a match"}, + {"groups", (PyCFunction)PyTreMatch_groups, METH_NOARGS, + "return the tuple of slice tuples for all parenthesized subexpressions " + "(None for not participated)"}, + {NULL, NULL} +}; + +static PyMemberDef TreMatch_members[] = { + { "cost", T_INT, offsetof(TreMatchObject, am.cost), READONLY, + "Cost of the match" }, + { "numdel", T_INT, offsetof(TreMatchObject, am.num_del), READONLY, + "Number of deletes in the match" }, + { "numins", T_INT, offsetof(TreMatchObject, am.num_ins), READONLY, + "Number of inserts in the match" }, + { "numsub", T_INT, offsetof(TreMatchObject, am.num_subst), READONLY, + "Number of substitutes in the match" }, + { "fuzzyness", T_OBJECT, offsetof(TreMatchObject, fz), READONLY, + "Fuzzyness used during match" }, + { NULL } +}; + +static void +PyTreMatch_dealloc(TreMatchObject *self) +{ + Py_XDECREF(self->targ); + Py_XDECREF(self->fz); + if (self->am.pmatch != NULL) + PyMem_Del(self->am.pmatch); + PyObject_Del(self); +} + +static PySequenceMethods TreMatch_as_sequence_methods = { + 0, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + PyTreMatch_groupi, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + 0, /* sq_contains */ + 0, /* sq_inplace_concat */ + 0 /* sq_inplace_repeat */ +}; + +static PyTypeObject TreMatchType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + TRE_MODULE ".Match", /* tp_name */ + sizeof(TreMatchObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)PyTreMatch_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &TreMatch_as_sequence_methods, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + TRE_MODULE ".match object holds result of successful match", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + TreMatch_methods, /* tp_methods */ + TreMatch_members /* tp_members */ +}; + +static TreMatchObject * +newTreMatchObject(void) +{ + TreMatchObject *self; + + self = PyObject_New(TreMatchObject, &TreMatchType); + if (self == NULL) + return NULL; + memset(&self->am, '\0', sizeof(self->am)); + self->targ = NULL; + self->fz = NULL; + return self; +} + +static PyObject * +PyTrePattern_search(TrePatternObject *self, PyObject *args) +{ + PyObject *pstring; + int eflags = 0; + TreMatchObject *mo; + TreFuzzynessObject *fz; + size_t nsub; + int rc; + regmatch_t *pm; + char *targ; + size_t tlen; + + if (!PyArg_ParseTuple(args, "SO!|i:match", &pstring, &TreFuzzynessType, + &fz, &eflags)) + return NULL; + + mo = newTreMatchObject(); + if (mo == NULL) + return NULL; + + nsub = self->rgx.re_nsub + 1; + pm = PyMem_New(regmatch_t, nsub); + if (pm != NULL) + { + mo->am.nmatch = nsub; + mo->am.pmatch = pm; + } + else + { + /* XXX */ + Py_DECREF(mo); + return NULL; + } + + targ = PyString_AsString(pstring); + tlen = PyString_Size(pstring); + + rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags); + + if (PyErr_Occurred()) + { + Py_DECREF(mo); + return NULL; + } + + if (rc == REG_OK) + { + Py_INCREF(pstring); + mo->targ = pstring; + Py_INCREF(fz); + mo->fz = fz; + return (PyObject*)mo; + } + + if (rc == REG_NOMATCH) + { + Py_DECREF(mo); + Py_INCREF(Py_None); + return Py_None; + } + _set_tre_err(rc, &self->rgx); + Py_DECREF(mo); + return NULL; +} + +static PyMethodDef TrePattern_methods[] = { + { "search", (PyCFunction)PyTrePattern_search, METH_VARARGS, + "try to match against given string, returning " TRE_MODULE ".match object " + "or None on failure" }, + {NULL, NULL} +}; + +static PyMemberDef TrePattern_members[] = { + { "nsub", T_INT, offsetof(TrePatternObject, rgx.re_nsub), READONLY, + "Number of parenthesized subexpressions in regex" }, + { NULL } +}; + +static void +PyTrePattern_dealloc(TrePatternObject *self) +{ + tre_regfree(&self->rgx); + PyObject_Del(self); +} + +static PyTypeObject TrePatternType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + TRE_MODULE ".Pattern", /* tp_name */ + sizeof(TrePatternObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)PyTrePattern_dealloc, /*tp_dealloc*/ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + TRE_MODULE ".pattern object holds compiled tre regex", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + TrePattern_methods, /* tp_methods */ + TrePattern_members /* tp_members */ +}; + +static TrePatternObject * +newTrePatternObject(PyObject *args) +{ + TrePatternObject *self; + + self = PyObject_New(TrePatternObject, &TrePatternType); + if (self == NULL) + return NULL; + self->flags = 0; + return self; +} + +static PyObject * +PyTre_ncompile(PyObject *self, PyObject *args) +{ + TrePatternObject *rv; + char *pattern; + int pattlen; + int cflags = 0; + int rc; + + if (!PyArg_ParseTuple(args, "s#|i:compile", &pattern, &pattlen, &cflags)) + return NULL; + + rv = newTrePatternObject(args); + if (rv == NULL) + return NULL; + + rc = tre_regncomp(&rv->rgx, (char*)pattern, pattlen, cflags); + if (rc != REG_OK) + { + if (!PyErr_Occurred()) + _set_tre_err(rc, &rv->rgx); + Py_DECREF(rv); + return NULL; + } + rv->flags = cflags; + return (PyObject*)rv; +} + +static PyMethodDef tre_methods[] = { + { "compile", PyTre_ncompile, METH_VARARGS, + "Compile a regular expression pattern, returning a " + TRE_MODULE ".pattern object" }, + { NULL, NULL } +}; + +static char *tre_doc = +"Python module for TRE library\n\nModule exports " +"the only function: compile"; + +static struct _tre_flags { + char *name; + int val; +} tre_flags[] = { + { "EXTENDED", REG_EXTENDED }, + { "ICASE", REG_ICASE }, + { "NEWLINE", REG_NEWLINE }, + { "NOSUB", REG_NOSUB }, + { "LITERAL", REG_LITERAL }, + + { "NOTBOL", REG_NOTBOL }, + { "NOTEOL", REG_NOTEOL }, + { NULL, 0 } +}; + +PyMODINIT_FUNC +inittre(void) +{ + PyObject *m; + struct _tre_flags *fp; + + if (PyType_Ready(&TreFuzzynessType) < 0) + return; + if (PyType_Ready(&TreMatchType) < 0) + return; + if (PyType_Ready(&TrePatternType) < 0) + return; + + /* Create the module and add the functions */ + m = Py_InitModule3(TRE_MODULE, tre_methods, tre_doc); + if (m == NULL) + return; + + Py_INCREF(&TreFuzzynessType); + if (PyModule_AddObject(m, "Fuzzyness", (PyObject*)&TreFuzzynessType) < 0) + return; + Py_INCREF(&TreMatchType); + if (PyModule_AddObject(m, "Match", (PyObject*)&TreMatchType) < 0) + return; + Py_INCREF(&TrePatternType); + if (PyModule_AddObject(m, "Pattern", (PyObject*)&TrePatternType) < 0) + return; + ErrorObject = PyErr_NewException(TRE_MODULE ".Error", NULL, NULL); + Py_INCREF(ErrorObject); + if (PyModule_AddObject(m, "Error", ErrorObject) < 0) + return; + + /* Insert the flags */ + for (fp = tre_flags; fp->name != NULL; fp++) + if (PyModule_AddIntConstant(m, fp->name, fp->val) < 0) + return; +} |