summaryrefslogtreecommitdiff
path: root/lib/tre-match-utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tre-match-utils.h')
-rw-r--r--lib/tre-match-utils.h215
1 files changed, 215 insertions, 0 deletions
diff --git a/lib/tre-match-utils.h b/lib/tre-match-utils.h
new file mode 100644
index 0000000000000..70db745e1050c
--- /dev/null
+++ b/lib/tre-match-utils.h
@@ -0,0 +1,215 @@
+/*
+ tre-match-utils.h - TRE matcher helper definitions
+
+ This software is released under a BSD-style license.
+ See the file LICENSE for details and copyright.
+
+*/
+
+#define str_source ((const tre_str_source*)string)
+
+#ifdef TRE_WCHAR
+
+#ifdef TRE_MULTIBYTE
+
+/* Wide character and multibyte support. */
+
+#define GET_NEXT_WCHAR() \
+ do { \
+ prev_c = next_c; \
+ if (type == STR_BYTE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = '\0'; \
+ else \
+ next_c = (unsigned char)(*str_byte++); \
+ } \
+ else if (type == STR_WIDE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = L'\0'; \
+ else \
+ next_c = *str_wide++; \
+ } \
+ else if (type == STR_MBS) \
+ { \
+ pos += pos_add_next; \
+ if (str_byte == NULL) \
+ next_c = L'\0'; \
+ else \
+ { \
+ size_t w; \
+ int max; \
+ if (len >= 0) \
+ max = len - pos; \
+ else \
+ max = 32; \
+ if (max <= 0) \
+ { \
+ next_c = L'\0'; \
+ pos_add_next = 1; \
+ } \
+ else \
+ { \
+ w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \
+ if (w == (size_t)-1 || w == (size_t)-2) \
+ return REG_NOMATCH; \
+ if (w == 0 && len >= 0) \
+ { \
+ pos_add_next = 1; \
+ next_c = 0; \
+ str_byte++; \
+ } \
+ else \
+ { \
+ pos_add_next = w; \
+ str_byte += w; \
+ } \
+ } \
+ } \
+ } \
+ else if (type == STR_USER) \
+ { \
+ pos += pos_add_next; \
+ str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
+ str_source->context); \
+ } \
+ } while(/*CONSTCOND*/0)
+
+#else /* !TRE_MULTIBYTE */
+
+/* Wide character support, no multibyte support. */
+
+#define GET_NEXT_WCHAR() \
+ do { \
+ prev_c = next_c; \
+ if (type == STR_BYTE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = '\0'; \
+ else \
+ next_c = (unsigned char)(*str_byte++); \
+ } \
+ else if (type == STR_WIDE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = L'\0'; \
+ else \
+ next_c = *str_wide++; \
+ } \
+ else if (type == STR_USER) \
+ { \
+ pos += pos_add_next; \
+ str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
+ str_source->context); \
+ } \
+ } while(/*CONSTCOND*/0)
+
+#endif /* !TRE_MULTIBYTE */
+
+#else /* !TRE_WCHAR */
+
+/* No wide character or multibyte support. */
+
+#define GET_NEXT_WCHAR() \
+ do { \
+ prev_c = next_c; \
+ if (type == STR_BYTE) \
+ { \
+ pos++; \
+ if (len >= 0 && pos >= len) \
+ next_c = '\0'; \
+ else \
+ next_c = (unsigned char)(*str_byte++); \
+ } \
+ else if (type == STR_USER) \
+ { \
+ pos += pos_add_next; \
+ str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
+ str_source->context); \
+ } \
+ } while(/*CONSTCOND*/0)
+
+#endif /* !TRE_WCHAR */
+
+
+
+#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
+
+#define CHECK_ASSERTIONS(assertions) \
+ (((assertions & ASSERT_AT_BOL) \
+ && (pos > 0 || reg_notbol) \
+ && (prev_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_EOL) \
+ && (next_c != L'\0' || reg_noteol) \
+ && (next_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_BOW) \
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_EOW) \
+ && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB) \
+ && (pos != 0 && next_c != L'\0' \
+ && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB_NEG) \
+ && (pos == 0 || next_c == L'\0' \
+ || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
+
+#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
+ (((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && !(tnfa->cflags & REG_ICASE) \
+ && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && (tnfa->cflags & REG_ICASE) \
+ && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
+ && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
+ && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
+ tnfa->cflags & REG_ICASE)))
+
+
+
+
+/* Returns 1 if `t1' wins `t2', 0 otherwise. */
+inline static int
+tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
+ int *t1, int *t2)
+{
+ int i;
+ for (i = 0; i < num_tags; i++)
+ {
+ if (tag_directions[i] == TRE_TAG_MINIMIZE)
+ {
+ if (t1[i] < t2[i])
+ return 1;
+ if (t1[i] > t2[i])
+ return 0;
+ }
+ else
+ {
+ if (t1[i] > t2[i])
+ return 1;
+ if (t1[i] < t2[i])
+ return 0;
+ }
+ }
+ /* assert(0);*/
+ return 0;
+}
+
+inline static int
+tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
+{
+ DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
+ while (*classes != (tre_ctype_t)0)
+ if ((!icase && tre_isctype(wc, *classes))
+ || (icase && (tre_isctype(tre_toupper(wc), *classes)
+ || tre_isctype(tre_tolower(wc), *classes))))
+ return 1; /* Match. */
+ else
+ classes++;
+ return 0; /* No match. */
+}