diff options
| author | Paul Saab <ps@FreeBSD.org> | 2000-05-22 09:53:22 +0000 | 
|---|---|---|
| committer | Paul Saab <ps@FreeBSD.org> | 2000-05-22 09:53:22 +0000 | 
| commit | a5f0fb151d90effe79714de0fa059954725fe57f (patch) | |
| tree | 27b16fc210b9a302c9e74f90e36a9b5ed21e6300 /contrib/less/charset.c | |
Diffstat (limited to 'contrib/less/charset.c')
| -rw-r--r-- | contrib/less/charset.c | 294 | 
1 files changed, 294 insertions, 0 deletions
| diff --git a/contrib/less/charset.c b/contrib/less/charset.c new file mode 100644 index 000000000000..efb26a87bd10 --- /dev/null +++ b/contrib/less/charset.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 1984-2000  Mark Nudelman + * + * You may distribute under the terms of either the GNU General Public + * License or the Less License, as specified in the README file. + * + * For more information about less, or for information on how to  + * contact the author, see the README file. + */ + + +/* + * Functions to define the character set + * and do things specific to the character set. + */ + +#include "less.h" +#if HAVE_LOCALE +#include <locale.h> +#include <ctype.h> +#endif + +public int utf_mode = 0; + +/* + * Predefined character sets, + * selected by the LESSCHARSET environment variable. + */ +struct charset { +	char *name; +	int *p_flag; +	char *desc; +} charsets[] = { +	{ "ascii",	NULL,       "8bcccbcc18b95.b" }, +	{ "dos",	NULL,       "8bcccbcc12bc5b95.b." }, +	{ "ebcdic",	NULL,       "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, +	{ "iso8859",	NULL,       "8bcccbcc18b95.33b." }, +	{ "koi8-r",	NULL,       "8bcccbcc18b95.b128." }, +	{ "latin1",	NULL,       "8bcccbcc18b95.33b." }, +	{ "next",	NULL,       "8bcccbcc18b95.bb125.bb" }, +	{ "utf-8",	&utf_mode,  "8bcccbcc18b." }, +	{ NULL, NULL, NULL } +}; + +#define	IS_BINARY_CHAR	01 +#define	IS_CONTROL_CHAR	02 + +static char chardef[256]; +static char *binfmt = NULL; +public int binattr = AT_STANDOUT; + + +/* + * Define a charset, given a description string. + * The string consists of 256 letters, + * one for each character in the charset. + * If the string is shorter than 256 letters, missing letters + * are taken to be identical to the last one. + * A decimal number followed by a letter is taken to be a  + * repetition of the letter. + * + * Each letter is one of: + *	. normal character + *	b binary character + *	c control character + */ +	static void +ichardef(s) +	char *s; +{ +	register char *cp; +	register int n; +	register char v; + +	n = 0; +	v = 0; +	cp = chardef; +	while (*s != '\0') +	{ +		switch (*s++) +		{ +		case '.': +			v = 0; +			break; +		case 'c': +			v = IS_CONTROL_CHAR; +			break; +		case 'b': +			v = IS_BINARY_CHAR|IS_CONTROL_CHAR; +			break; + +		case '0': case '1': case '2': case '3': case '4': +		case '5': case '6': case '7': case '8': case '9': +			n = (10 * n) + (s[-1] - '0'); +			continue; + +		default: +			error("invalid chardef", NULL_PARG); +			quit(QUIT_ERROR); +			/*NOTREACHED*/ +		} + +		do +		{ +			if (cp >= chardef + sizeof(chardef)) +			{ +				error("chardef longer than 256", NULL_PARG); +				quit(QUIT_ERROR); +				/*NOTREACHED*/ +			} +			*cp++ = v; +		} while (--n > 0); +		n = 0; +	} + +	while (cp < chardef + sizeof(chardef)) +		*cp++ = v; +} + +/* + * Define a charset, given a charset name. + * The valid charset names are listed in the "charsets" array. + */ +	static int +icharset(name) +	register char *name; +{ +	register struct charset *p; + +	if (name == NULL || *name == '\0') +		return (0); + +	for (p = charsets;  p->name != NULL;  p++) +	{ +		if (strcmp(name, p->name) == 0) +		{ +			ichardef(p->desc); +			if (p->p_flag != NULL) +				*(p->p_flag) = 1; +			return (1); +		} +	} + +	error("invalid charset name", NULL_PARG); +	quit(QUIT_ERROR); +	/*NOTREACHED*/ +} + +#if HAVE_LOCALE +/* + * Define a charset, given a locale name. + */ +	static void +ilocale() +{ +	register int c; + +	setlocale(LC_ALL, ""); +	for (c = 0;  c < (int) sizeof(chardef);  c++) +	{ +		if (isprint(c)) +			chardef[c] = 0; +		else if (iscntrl(c)) +			chardef[c] = IS_CONTROL_CHAR; +		else +			chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; +	} +} +#endif + +/* + * Define the printing format for control chars. + */ +   	public void +setbinfmt(s) +	char *s; +{ +	if (s == NULL || *s == '\0') +		s = "*s<%X>"; +	/* +	 * Select the attributes if it starts with "*". +	 */ +	if (*s == '*') +	{ +		switch (s[1]) +		{ +		case 'd':  binattr = AT_BOLD;      break; +		case 'k':  binattr = AT_BLINK;     break; +		case 's':  binattr = AT_STANDOUT;  break; +		case 'u':  binattr = AT_UNDERLINE; break; +		default:   binattr = AT_NORMAL;    break; +		} +		s += 2; +	} +	binfmt = s; +} + +/* + * Initialize charset data structures. + */ +	public void +init_charset() +{ +	register char *s; + +	s = lgetenv("LESSBINFMT"); +	setbinfmt(s); +	 +	/* +	 * See if environment variable LESSCHARSET is defined. +	 */ +	s = lgetenv("LESSCHARSET"); +	if (icharset(s)) +		return; +	/* +	 * LESSCHARSET is not defined: try LESSCHARDEF. +	 */ +	s = lgetenv("LESSCHARDEF"); +	if (s != NULL && *s != '\0') +	{ +		ichardef(s); +		return; +	} + +#if HAVE_STRSTR +	/* +	 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. +	 */ +	if ((s = lgetenv("LC_ALL")) != NULL || +	    (s = lgetenv("LC_CTYPE")) != NULL || +	    (s = lgetenv("LANG")) != NULL) +	{ +		if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) +			if (icharset("utf-8")) +				return; +	} +#endif + +#if HAVE_LOCALE +	/* +	 * Use setlocale. +	 */ +	ilocale(); +#else +	/* +	 * Default to "latin1". +	 */ +	(void) icharset("latin1"); +#endif +} + +/* + * Is a given character a "binary" character? + */ +	public int +binary_char(c) +	unsigned char c; +{ +	c &= 0377; +	return (chardef[c] & IS_BINARY_CHAR); +} + +/* + * Is a given character a "control" character? + */ +	public int +control_char(c) +	int c; +{ +	c &= 0377; +	return (chardef[c] & IS_CONTROL_CHAR); +} + +/* + * Return the printable form of a character. + * For example, in the "ascii" charset '\3' is printed as "^C". + */ +	public char * +prchar(c) +	int c; +{ +	static char buf[8]; + +	c &= 0377; +	if (!control_char(c)) +		sprintf(buf, "%c", c); +	else if (c == ESC) +		sprintf(buf, "ESC"); +	else if (c < 128 && !control_char(c ^ 0100)) +		sprintf(buf, "^%c", c ^ 0100); +	else +		sprintf(buf, binfmt, c); +	return (buf); +} | 
