summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim J. Robbins <tjr@FreeBSD.org>2002-09-23 11:35:50 +0000
committerTim J. Robbins <tjr@FreeBSD.org>2002-09-23 11:35:50 +0000
commit4712aa3b5901a905ad20cfa26067cc10012ff421 (patch)
tree1697747a3d06969079977f9a6d255bb6ad5ed00a
parentadc106840cc910d707a5d2ea3768f2914dad62e9 (diff)
Notes
-rw-r--r--lib/libc/stdio/scanf.347
-rw-r--r--lib/libc/stdio/vfscanf.c106
2 files changed, 137 insertions, 16 deletions
diff --git a/lib/libc/stdio/scanf.3 b/lib/libc/stdio/scanf.3
index 0250ed9af223..da5ac5d1b7d0 100644
--- a/lib/libc/stdio/scanf.3
+++ b/lib/libc/stdio/scanf.3
@@ -178,9 +178,10 @@ and the next pointer is a pointer to
(rather than
.Vt float ) ,
or that the conversion will be one of
-.Cm c
-or
+.Cm c ,
.Cm s
+or
+.Cm \&[
and the next pointer is a pointer to an array of
.Vt wchar_t
(rather than
@@ -254,8 +255,15 @@ If no width is given,
a default of
.Dq infinity
is used (with one exception, below);
-otherwise at most this many characters are scanned
+otherwise at most this many bytes are scanned
in processing the conversion.
+In the case of the
+.Cm lc ,
+.Cm ls
+and
+.Cm l[
+conversions, the field width specifies the maximum number
+of multibyte characters that will be scanned.
Before conversion begins,
most conversions skip white space;
this white space is not counted against the field width.
@@ -334,6 +342,13 @@ terminating
character.
The input string stops at white space
or at the maximum field width, whichever occurs first.
+.Pp
+If an
+.Cm l
+qualifier is present, the next pointer must be a pointer to
+.Vt wchar_t ,
+into which the input will be placed after conversion by
+.Xr mbrtowc 3 .
.It Cm S
The same as
.Cm ls .
@@ -350,6 +365,13 @@ and there must be enough room for all the characters
is added).
The usual skip of leading white space is suppressed.
To skip white space first, use an explicit space in the format.
+.Pp
+If an
+.Cm l
+qualifier is present, the next pointer must be a pointer to
+.Vt wchar_t ,
+into which the input will be placed after conversion by
+.Xr mbrtowc 3 .
.It Cm C
The same as
.Cm lc .
@@ -395,6 +417,13 @@ means the set
The string ends with the appearance of a character not in the
(or, with a circumflex, in) set
or when the field width runs out.
+.Pp
+If an
+.Cm l
+qualifier is present, the next pointer must be a pointer to
+.Vt wchar_t ,
+into which the input will be placed after conversion by
+.Xr mbrtowc 3 .
.It Cm p
Matches a pointer value (as printed by
.Ql %p
@@ -492,18 +521,6 @@ The
modifiers for positional arguments are not implemented.
.Pp
The
-.Cm l
-modifier for
-.Cm %c
-and
-.Cm %s
-(and
-.Cm %C
-and
-.Cm %S )
-to specify wide characters and strings is not implemented.
-.Pp
-The
.Cm \&%a
and
.Cm \&%A
diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index 47334d5c4cc9..358c5627cfd2 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
#include "un-namespace.h"
#include "collate.h"
@@ -136,7 +138,11 @@ __svfscanf(FILE *fp, const char *fmt0, va_list ap)
int nread; /* number of characters consumed from fp */
int base; /* base argument to conversion function */
char ccltab[256]; /* character class table for %[...] */
- char buf[BUF]; /* buffer for numeric conversions */
+ char buf[BUF]; /* buffer for numeric and mb conversions */
+ wchar_t *wcp; /* handy wide character pointer */
+ wchar_t *wcp0; /* saves original value of wcp */
+ mbstate_t mbs; /* multibyte conversion state */
+ size_t nconv; /* length of multibyte sequence converted */
/* `basefix' is used to avoid `if' tests in the integer scanner */
static short basefix[17] =
@@ -371,6 +377,32 @@ literal:
}
}
nread += sum;
+ } else if (flags & LONG) {
+ wcp = va_arg(ap, wchar_t *);
+ n = 0;
+ while (width != 0) {
+ if (n == MB_CUR_MAX)
+ goto input_failure;
+ buf[n++] = *fp->_p;
+ fp->_p++;
+ fp->_r--;
+ memset(&mbs, 0, sizeof(mbs));
+ nconv = mbrtowc(wcp, buf, n, &mbs);
+ if (nconv == 0 || nconv == (size_t)-1)
+ goto input_failure;
+ if (nconv != (size_t)-2) {
+ nread += n;
+ width--;
+ wcp++;
+ n = 0;
+ }
+ if (fp->_r <= 0 && __srefill(fp)) {
+ if (n != 0)
+ goto input_failure;
+ break;
+ }
+ }
+ nassigned++;
} else {
size_t r = fread((void *)va_arg(ap, char *), 1,
width, fp);
@@ -402,6 +434,45 @@ literal:
}
if (n == 0)
goto match_failure;
+ } else if (flags & LONG) {
+ wcp = wcp0 = va_arg(ap, wchar_t *);
+ n = 0;
+ while (width != 0) {
+ if (n == MB_CUR_MAX)
+ goto input_failure;
+ buf[n++] = *fp->_p;
+ fp->_p++;
+ fp->_r--;
+ memset(&mbs, 0, sizeof(mbs));
+ nconv = mbrtowc(wcp, buf, n, &mbs);
+ if (nconv == 0 || nconv == (size_t)-1)
+ goto input_failure;
+ if (nconv != (size_t)-2) {
+ if (wctob(*wcp) != EOF &&
+ !ccltab[wctob(*wcp)]) {
+ while (--n > 0)
+ __ungetc(buf[n],
+ fp);
+ break;
+ }
+ nread += n;
+ width--;
+ wcp++;
+ n = 0;
+ }
+ if (fp->_r <= 0 && __srefill(fp)) {
+ if (n != 0)
+ goto input_failure;
+ break;
+ }
+ }
+ if (n != 0)
+ goto input_failure;
+ n = wcp - wcp0;
+ if (n == 0)
+ goto match_failure;
+ *wcp = L'\0';
+ nassigned++;
} else {
p0 = p = va_arg(ap, char *);
while (ccltab[*fp->_p]) {
@@ -439,6 +510,39 @@ literal:
break;
}
nread += n;
+ } else if (flags & LONG) {
+ wcp = va_arg(ap, wchar_t *);
+ n = 0;
+ while (!isspace(*fp->_p) && width != 0) {
+ if (n == MB_CUR_MAX)
+ goto input_failure;
+ buf[n++] = *fp->_p;
+ fp->_p++;
+ fp->_r--;
+ memset(&mbs, 0, sizeof(mbs));
+ nconv = mbrtowc(wcp, buf, n, &mbs);
+ if (nconv == 0 || nconv == (size_t)-1)
+ goto input_failure;
+ if (nconv != (size_t)-2) {
+ if (iswspace(*wcp)) {
+ while (--n > 0)
+ __ungetc(buf[n],
+ fp);
+ break;
+ }
+ nread += n;
+ width--;
+ wcp++;
+ n = 0;
+ }
+ if (fp->_r <= 0 && __srefill(fp)) {
+ if (n != 0)
+ goto input_failure;
+ break;
+ }
+ }
+ *wcp = L'\0';
+ nassigned++;
} else {
p0 = p = va_arg(ap, char *);
while (!isspace(*fp->_p)) {