diff options
| author | Tim J. Robbins <tjr@FreeBSD.org> | 2002-10-04 03:18:26 +0000 | 
|---|---|---|
| committer | Tim J. Robbins <tjr@FreeBSD.org> | 2002-10-04 03:18:26 +0000 | 
| commit | fd4f1dd9fa30021c03609767a0e865962633d60d (patch) | |
| tree | 0d173f4e040e30d9156e96c95bc7f828c498a795 /lib/libc/string | |
| parent | 92d0f599033fd7ecaae06039287f23f1775441dd (diff) | |
Notes
Diffstat (limited to 'lib/libc/string')
| -rw-r--r-- | lib/libc/string/Makefile.inc | 10 | ||||
| -rw-r--r-- | lib/libc/string/wcscoll.3 | 112 | ||||
| -rw-r--r-- | lib/libc/string/wcscoll.c | 97 | ||||
| -rw-r--r-- | lib/libc/string/wcsxfrm.3 | 124 | ||||
| -rw-r--r-- | lib/libc/string/wcsxfrm.c | 115 | 
5 files changed, 454 insertions, 4 deletions
| diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc index ce73ee1353b1..63c7af029592 100644 --- a/lib/libc/string/Makefile.inc +++ b/lib/libc/string/Makefile.inc @@ -12,9 +12,11 @@ MISRCS+=bcmp.c bcopy.c bzero.c ffs.c index.c memccpy.c memchr.c memcmp.c \  	strlcat.c strlcpy.c strlen.c strmode.c strncat.c strncmp.c strncpy.c \  	strcasestr.c strnstr.c \  	strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \ -	strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscpy.c wcscspn.c \ +	strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscoll.c wcscpy.c \ +	wcscspn.c \  	wcslcat.c wcslcpy.c wcslen.c wcsncat.c wcsncmp.c wcsncpy.c wcspbrk.c \ -	wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wmemchr.c wmemcmp.c \ +	wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wcsxfrm.c wmemchr.c \ +	wmemcmp.c \  	wmemcpy.c wmemmove.c wmemset.c @@ -28,8 +30,8 @@ MAN+=	bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \  	memcmp.3 memcpy.3 memmove.3 memset.3 rindex.3 strcasecmp.3 strcat.3 \  	strchr.3 strcmp.3 strcoll.3 strcpy.3 strcspn.3 strdup.3 strerror.3 \  	string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strrchr.3 strsep.3 \ -	strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcstok.3 wcswidth.3 \ -	wmemchr.3 +	strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcscoll.3 wcstok.3 \ +	wcswidth.3 wcsxfrm.3 wmemchr.3  MLINKS+=strcasecmp.3 strncasecmp.3  MLINKS+=strcat.3 strncat.3 diff --git a/lib/libc/string/wcscoll.3 b/lib/libc/string/wcscoll.3 new file mode 100644 index 000000000000..fb6a36b834b5 --- /dev/null +++ b/lib/libc/string/wcscoll.3 @@ -0,0 +1,112 @@ +.\" Copyright (c) 1990, 1991, 1993 +.\"	The Regents of the University of California.  All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Chris Torek and the American National Standards Committee X3, +.\" on Information Processing Systems. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\"    notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\"    notice, this list of conditions and the following disclaimer in the +.\"    documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\"    must display the following acknowledgement: +.\"	This product includes software developed by the University of +.\"	California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\"    may be used to endorse or promote products derived from this software +.\"    without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\"     @(#)strcoll.3	8.1 (Berkeley) 6/4/93 +.\" FreeBSD: src/lib/libc/string/strcoll.3,v 1.11 2001/10/01 16:09:00 ru Exp  +.\" $FreeBSD$ +.\" +.Dd October 4, 2002 +.Dt WCSCOLL 3 +.Os +.Sh NAME +.Nm wcscoll +.Nd compare wide strings according to current collation +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft int +.Fn wcscoll "const wchar_t *s1" "const wchar_t *s2" +.Sh DESCRIPTION +The +.Fn wcscoll +function compares the null-terminated strings +.Fa s1 +and +.Fa s2 +according to the current locale collation order. +In the +.Dq Li C +locale, +.Fn wcscoll +is equivalent to +.Fn wcscmp . +.Sh RETURN VALUES +The +.Fn wcscoll +function +returns an integer greater than, equal to, or less than 0, +if +.Fa s1 +is greater than, equal to, or less than +.Fa s2 . +.Pp +No return value is reserved to indicate errors; +callers should set +.Va errno +to 0 before calling +.Fn wcscoll . +If it is non-zero upon return from +.Fn wcscoll , +an error has occurred. +.Sh ERRORS +The +.Fn wcscoll +function will fail if: +.Bl -tag -width Er +.It Bq Er EILSEQ +An invalid wide character code was specified. +.It Bq Er ENOMEM +Cannot allocate enough memory for temporary buffers. +.El +.Sh SEE ALSO +.Xr setlocale 3 , +.Xr strcoll 3 , +.Xr wcscmp 3 , +.Xr wcsxfrm 3 +.Sh STANDARDS +The +.Fn wcscoll +function +conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation of +.Fn wcscoll +only works in single-byte +.Dv LC_CTYPE +locales, and falls back to using +.Fn wcscmp +in locales with extended character sets. diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c new file mode 100644 index 000000000000..79dad7d50372 --- /dev/null +++ b/lib/libc/string/wcscoll.c @@ -0,0 +1,97 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "collate.h" + +static char *__mbsdup(const wchar_t *); + +/* + * Placeholder implementation of wcscoll(). Attempts to use the single-byte + * collation ordering where possible, and falls back on wcscmp() in locales + * with extended character sets. + */ +int +wcscoll(const wchar_t *ws1, const wchar_t *ws2) +{ +	char *mbs1, *mbs2; +	int diff, sverrno; + +	if (__collate_load_error || MB_CUR_MAX > 1) +		/* +		 * Locale has no special collating order, could not be +		 * loaded, or has an extended character set; do a fast binary +		 * comparison. +		 */ +		return (wcscmp(ws1, ws2)); + +	if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) { +		/* +		 * Out of memory or illegal wide chars; fall back to wcscmp() +		 * but leave errno indicating the error. Callers that don't +		 * check for error will get a reasonable but often slightly +		 * incorrect result. +		 */ +		sverrno = errno; +		free(mbs1); +		errno = sverrno; +		return (wcscmp(ws1, ws2)); +	} + +	diff = strcoll(mbs1, mbs2); +	sverrno = errno; +	free(mbs1); +	free(mbs2); +	errno = sverrno; + +	return (diff); +} + +static char * +__mbsdup(const wchar_t *ws) +{ +	mbstate_t state; +	const wchar_t *wcp; +	size_t len; +	char *mbs; + +	memset(&state, 0, sizeof(state)); +	wcp = ws; +	if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1) +		return (NULL); +	if ((mbs = malloc(len + 1)) == NULL) +		return (NULL); +	memset(&state, 0, sizeof(state)); +	wcsrtombs(mbs, &ws, len + 1, &state); + +	return (mbs); +} diff --git a/lib/libc/string/wcsxfrm.3 b/lib/libc/string/wcsxfrm.3 new file mode 100644 index 000000000000..55c03f86dbf4 --- /dev/null +++ b/lib/libc/string/wcsxfrm.3 @@ -0,0 +1,124 @@ +.\" Copyright (c) 1990, 1991, 1993 +.\"	The Regents of the University of California.  All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Chris Torek and the American National Standards Committee X3, +.\" on Information Processing Systems. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\"    notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\"    notice, this list of conditions and the following disclaimer in the +.\"    documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\"    must display the following acknowledgement: +.\"	This product includes software developed by the University of +.\"	California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\"    may be used to endorse or promote products derived from this software +.\"    without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\"     @(#)strxfrm.3	8.1 (Berkeley) 6/4/93 +.\" FreeBSD: src/lib/libc/string/strxfrm.3,v 1.16 2002/09/06 11:24:06 tjr Exp  +.\" $FreeBSD$ +.\" +.Dd October 4, 2002 +.Dt WCSXFRM 3 +.Os +.Sh NAME +.Nm wcsxfrm +.Nd transform a wide string under locale +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn wcsxfrm "wchar_t * restrict dst" "const wchar_t * restrict src" "size_t n" +.Sh DESCRIPTION +The +.Fn wcsxfrm +function transforms a null-terminated wide character string pointed to by +.Fa src +according to the current locale collation order +then copies the transformed string +into +.Fa dst . +No more than +.Fa n +wide characters are copied into +.Fa dst , +including the terminating null character added. +If +.Fa n +is set to 0 +(it helps to determine an actual size needed +for transformation), +.Fa dst +is permitted to be a NULL pointer. +.Pp +Comparing two strings using +.Fn wcscmp +after +.Fn wcsxfrm +is equivalent to comparing +two original strings with +.Fn wcscoll . +.Sh RETURN VALUES +Upon successful completion, +.Fn wcsxfrm +returns the length of the transformed string not including +the terminating null character. +If this value is +.Fa n +or more, the contents of +.Fa dst +are indeterminate. +.Sh SEE ALSO +.Xr setlocale 3 , +.Xr strxfrm 3 , +.Xr wcscoll 3 , +.Xr wcscmp 3 +.Sh STANDARDS +The +.Fn wcsxfrm +function +conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation of +.Fn wcsxfrm +only works in single-byte +.Dv LC_CTYPE +locales, and falls back to using +.Fn wcsncpy +in locales with extended character sets. +.Pp +Comparing two strings using +.Fn wcscmp +after +.Fn wcsxfrm +is +.Em not +always equivalent to comparison with +.Fn wcscoll ; +.Fn wcsxfrm +only stores information about primary collation weights into  +.Fa dst , +whereas +.Fn wcscoll +compares characters using both primary and secondary weights. diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c new file mode 100644 index 000000000000..4be6e464742e --- /dev/null +++ b/lib/libc/string/wcsxfrm.c @@ -0,0 +1,115 @@ +/*- + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + *		at Electronni Visti IA, Kiev, Ukraine. + *			All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#if 0 +__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp "); +#endif +__FBSDID("$FreeBSD$"); + +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "collate.h" + +static char *__mbsdup(const wchar_t *); + +/* + * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of + * the logic used. + */ +size_t +wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len) +{ +	int prim, sec, l; +	size_t slen; +	char *mbsrc, *s, *ss; + +	if (*src == L'\0') { +		if (len != 0) +			*dest = L'\0'; +		return (0); +	} + +	if (__collate_load_error || MB_CUR_MAX > 1) { +		slen = wcslen(src); +		if (len > 0) { +			if (slen < len) +				wcscpy(dest, src); +			else { +				wcsncpy(dest, src, len - 1); +				dest[len - 1] = L'\0'; +			} +		} +		return (slen); +	} + +	mbsrc = __mbsdup(src); +	slen = 0; +	prim = sec = 0; +	ss = s = __collate_substitute(mbsrc); +	while (*s != '\0') { +		while (*s != '\0' && prim == 0) { +			__collate_lookup(s, &l, &prim, &sec); +			s += l; +		} +		if (prim != 0) { +			if (len > 1) { +				*dest++ = (wchar_t)prim; +				len--; +			} +			slen++; +			prim = 0; +		} +	} +	free(ss); +	free(mbsrc); +	if (len != 0) +		*dest = L'\0'; + +	return (slen); +} + +static char * +__mbsdup(const wchar_t *ws) +{ +	mbstate_t state; +	const wchar_t *wcp; +	size_t len; +	char *mbs; + +	memset(&state, 0, sizeof(state)); +	wcp = ws; +	if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1) +		return (NULL); +	if ((mbs = malloc(len + 1)) == NULL) +		return (NULL); +	memset(&state, 0, sizeof(state)); +	wcsrtombs(mbs, &ws, len + 1, &state); + +	return (mbs); +} | 
