summaryrefslogtreecommitdiff
path: root/contrib/less/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/less/charset.c')
-rw-r--r--contrib/less/charset.c105
1 files changed, 66 insertions, 39 deletions
diff --git a/contrib/less/charset.c b/contrib/less/charset.c
index 7aec0ea140e8..88978918e45f 100644
--- a/contrib/less/charset.c
+++ b/contrib/less/charset.c
@@ -22,6 +22,13 @@
#include "charset.h"
+#if MSDOS_COMPILER==WIN32C
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
+extern int bs_mode;
+
public int utf_mode = 0;
/*
@@ -215,7 +222,13 @@ icharset(name, no_error)
{
ichardef(p->desc);
if (p->p_flag != NULL)
+ {
+#if MSDOS_COMPILER==WIN32C
+ *(p->p_flag) = 1 + (GetConsoleOutputCP() != CP_UTF8);
+#else
*(p->p_flag) = 1;
+#endif
+ }
return (1);
}
}
@@ -251,16 +264,17 @@ ilocale()
/*
* Define the printing format for control (or binary utf) chars.
*/
- static void
-setbinfmt(s, fmtvarptr, default_fmt)
+ public void
+setfmt(s, fmtvarptr, attrptr, default_fmt)
char *s;
char **fmtvarptr;
+ int *attrptr;
char *default_fmt;
{
if (s && utf_mode)
{
/* It would be too hard to account for width otherwise. */
- char *t = s;
+ char constant *t = s;
while (*t)
{
if (*t < ' ' || *t > '~')
@@ -282,15 +296,15 @@ setbinfmt(s, fmtvarptr, default_fmt)
* Select the attributes if it starts with "*".
*/
attr:
- if (*s == '*')
+ if (*s == '*' && s[1] != '\0')
{
switch (s[1])
{
- case 'd': binattr = AT_BOLD; break;
- case 'k': binattr = AT_BLINK; break;
- case 's': binattr = AT_STANDOUT; break;
- case 'u': binattr = AT_UNDERLINE; break;
- default: binattr = AT_NORMAL; break;
+ case 'd': *attrptr = AT_BOLD; break;
+ case 'k': *attrptr = AT_BLINK; break;
+ case 's': *attrptr = AT_STANDOUT; break;
+ case 'u': *attrptr = AT_UNDERLINE; break;
+ default: *attrptr = AT_NORMAL; break;
}
s += 2;
}
@@ -305,6 +319,14 @@ set_charset()
{
char *s;
+#if MSDOS_COMPILER==WIN32C
+ /*
+ * If the Windows console is using UTF-8, we'll use it too.
+ */
+ if (GetConsoleOutputCP() == CP_UTF8)
+ if (icharset("utf-8", 1))
+ return;
+#endif
/*
* See if environment variable LESSCHARSET is defined.
*/
@@ -354,6 +376,7 @@ set_charset()
* rather than from predefined charset entry.
*/
ilocale();
+#else
#if MSDOS_COMPILER
/*
* Default to "dos".
@@ -383,10 +406,10 @@ init_charset()
set_charset();
s = lgetenv("LESSBINFMT");
- setbinfmt(s, &binfmt, "*s<%02X>");
+ setfmt(s, &binfmt, &binattr, "*s<%02X>");
s = lgetenv("LESSUTFBINFMT");
- setbinfmt(s, &utfbinfmt, "<U+%04lX>");
+ setfmt(s, &utfbinfmt, &binattr, "<U+%04lX>");
}
/*
@@ -543,34 +566,19 @@ is_utf8_well_formed(ss, slen)
}
/*
- * Return number of invalid UTF-8 sequences found in a buffer.
+ * Skip bytes until a UTF-8 lead byte (11xxxxxx) or ASCII byte (0xxxxxxx) is found.
*/
- public int
-utf_bin_count(data, len)
- char *data;
- int len;
+ public void
+utf_skip_to_lead(pp, limit)
+ char **pp;
+ char *limit;
{
- int bin_count = 0;
- while (len > 0)
- {
- if (is_utf8_well_formed(data, len))
- {
- int clen = utf_len(*data & 0377);
- data += clen;
- len -= clen;
- } else
- {
- /* Skip to next lead byte. */
- bin_count++;
- do {
- ++data;
- --len;
- } while (len > 0 && !IS_UTF8_LEAD(*data & 0377));
- }
- }
- return (bin_count);
+ do {
+ ++(*pp);
+ } while (*pp < limit && !IS_UTF8_LEAD((*pp)[0] & 0377) && !IS_ASCII_OCTET((*pp)[0]));
}
+
/*
* Get the value of a UTF-8 character.
*/
@@ -690,9 +698,9 @@ step_char(pp, dir, limit)
{
/* It's easy if chars are one byte. */
if (dir > 0)
- ch = (LWCHAR) ((p < limit) ? *p++ : 0);
+ ch = (LWCHAR) (unsigned char) ((p < limit) ? *p++ : 0);
else
- ch = (LWCHAR) ((p > limit) ? *--p : 0);
+ ch = (LWCHAR) (unsigned char) ((p > limit) ? *--p : 0);
} else if (dir > 0)
{
len = utf_len(*p);
@@ -740,6 +748,10 @@ DECLARE_RANGE_TABLE_START(wide)
#include "wide.uni"
DECLARE_RANGE_TABLE_END(wide)
+DECLARE_RANGE_TABLE_START(fmt)
+#include "fmt.uni"
+DECLARE_RANGE_TABLE_END(fmt)
+
/* comb_table is special pairs, not ranges. */
static struct wchar_range comb_table[] = {
{0x0644,0x0622}, {0x0644,0x0623}, {0x0644,0x0625}, {0x0644,0x0627},
@@ -780,7 +792,8 @@ is_in_table(ch, table)
is_composing_char(ch)
LWCHAR ch;
{
- return is_in_table(ch, &compose_table);
+ return is_in_table(ch, &compose_table) ||
+ (bs_mode != BS_CONTROL && is_in_table(ch, &fmt_table));
}
/*
@@ -790,7 +803,21 @@ is_composing_char(ch)
is_ubin_char(ch)
LWCHAR ch;
{
- return is_in_table(ch, &ubin_table);
+ int ubin = is_in_table(ch, &ubin_table) ||
+ (bs_mode == BS_CONTROL && is_in_table(ch, &fmt_table));
+#if MSDOS_COMPILER==WIN32C
+ if (!ubin && utf_mode == 2 && ch < 0x10000)
+ {
+ /*
+ * Consider it binary if it can't be converted.
+ */
+ BOOL used_default = TRUE;
+ WideCharToMultiByte(GetConsoleOutputCP(), WC_NO_BEST_FIT_CHARS, (LPCWSTR) &ch, 1, NULL, 0, NULL, &used_default);
+ if (used_default)
+ ubin = 1;
+ }
+#endif
+ return ubin;
}
/*