From 4712aa3b5901a905ad20cfa26067cc10012ff421 Mon Sep 17 00:00:00 2001 From: "Tim J. Robbins" Date: Mon, 23 Sep 2002 11:35:50 +0000 Subject: [PATCH] Implement the %lc, %ls and %[ conversions, which read sequences of wide characters, non-whitespace wide character strings and wide character strings in a scanset. --- lib/libc/stdio/scanf.3 | 47 +++++++++++------ lib/libc/stdio/vfscanf.c | 106 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 16 deletions(-) diff --git a/lib/libc/stdio/scanf.3 b/lib/libc/stdio/scanf.3 index 0250ed9af22..da5ac5d1b7d 100644 --- a/lib/libc/stdio/scanf.3 +++ b/lib/libc/stdio/scanf.3 @@ -178,9 +178,10 @@ and the next pointer is a pointer to (rather than .Vt float ) , or that the conversion will be one of -.Cm c -or +.Cm c , .Cm s +or +.Cm \&[ and the next pointer is a pointer to an array of .Vt wchar_t (rather than @@ -254,8 +255,15 @@ If no width is given, a default of .Dq infinity is used (with one exception, below); -otherwise at most this many characters are scanned +otherwise at most this many bytes are scanned in processing the conversion. +In the case of the +.Cm lc , +.Cm ls +and +.Cm l[ +conversions, the field width specifies the maximum number +of multibyte characters that will be scanned. Before conversion begins, most conversions skip white space; this white space is not counted against the field width. @@ -334,6 +342,13 @@ terminating character. The input string stops at white space or at the maximum field width, whichever occurs first. +.Pp +If an +.Cm l +qualifier is present, the next pointer must be a pointer to +.Vt wchar_t , +into which the input will be placed after conversion by +.Xr mbrtowc 3 . .It Cm S The same as .Cm ls . @@ -350,6 +365,13 @@ and there must be enough room for all the characters is added). The usual skip of leading white space is suppressed. To skip white space first, use an explicit space in the format. +.Pp +If an +.Cm l +qualifier is present, the next pointer must be a pointer to +.Vt wchar_t , +into which the input will be placed after conversion by +.Xr mbrtowc 3 . .It Cm C The same as .Cm lc . @@ -395,6 +417,13 @@ means the set The string ends with the appearance of a character not in the (or, with a circumflex, in) set or when the field width runs out. +.Pp +If an +.Cm l +qualifier is present, the next pointer must be a pointer to +.Vt wchar_t , +into which the input will be placed after conversion by +.Xr mbrtowc 3 . .It Cm p Matches a pointer value (as printed by .Ql %p @@ -492,18 +521,6 @@ The modifiers for positional arguments are not implemented. .Pp The -.Cm l -modifier for -.Cm %c -and -.Cm %s -(and -.Cm %C -and -.Cm %S ) -to specify wide characters and strings is not implemented. -.Pp -The .Cm \&%a and .Cm \&%A diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c index 47334d5c4cc..358c5627cfd 100644 --- a/lib/libc/stdio/vfscanf.c +++ b/lib/libc/stdio/vfscanf.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include "un-namespace.h" #include "collate.h" @@ -136,7 +138,11 @@ __svfscanf(FILE *fp, const char *fmt0, va_list ap) int nread; /* number of characters consumed from fp */ int base; /* base argument to conversion function */ char ccltab[256]; /* character class table for %[...] */ - char buf[BUF]; /* buffer for numeric conversions */ + char buf[BUF]; /* buffer for numeric and mb conversions */ + wchar_t *wcp; /* handy wide character pointer */ + wchar_t *wcp0; /* saves original value of wcp */ + mbstate_t mbs; /* multibyte conversion state */ + size_t nconv; /* length of multibyte sequence converted */ /* `basefix' is used to avoid `if' tests in the integer scanner */ static short basefix[17] = @@ -371,6 +377,32 @@ literal: } } nread += sum; + } else if (flags & LONG) { + wcp = va_arg(ap, wchar_t *); + n = 0; + while (width != 0) { + if (n == MB_CUR_MAX) + goto input_failure; + buf[n++] = *fp->_p; + fp->_p++; + fp->_r--; + memset(&mbs, 0, sizeof(mbs)); + nconv = mbrtowc(wcp, buf, n, &mbs); + if (nconv == 0 || nconv == (size_t)-1) + goto input_failure; + if (nconv != (size_t)-2) { + nread += n; + width--; + wcp++; + n = 0; + } + if (fp->_r <= 0 && __srefill(fp)) { + if (n != 0) + goto input_failure; + break; + } + } + nassigned++; } else { size_t r = fread((void *)va_arg(ap, char *), 1, width, fp); @@ -402,6 +434,45 @@ literal: } if (n == 0) goto match_failure; + } else if (flags & LONG) { + wcp = wcp0 = va_arg(ap, wchar_t *); + n = 0; + while (width != 0) { + if (n == MB_CUR_MAX) + goto input_failure; + buf[n++] = *fp->_p; + fp->_p++; + fp->_r--; + memset(&mbs, 0, sizeof(mbs)); + nconv = mbrtowc(wcp, buf, n, &mbs); + if (nconv == 0 || nconv == (size_t)-1) + goto input_failure; + if (nconv != (size_t)-2) { + if (wctob(*wcp) != EOF && + !ccltab[wctob(*wcp)]) { + while (--n > 0) + __ungetc(buf[n], + fp); + break; + } + nread += n; + width--; + wcp++; + n = 0; + } + if (fp->_r <= 0 && __srefill(fp)) { + if (n != 0) + goto input_failure; + break; + } + } + if (n != 0) + goto input_failure; + n = wcp - wcp0; + if (n == 0) + goto match_failure; + *wcp = L'\0'; + nassigned++; } else { p0 = p = va_arg(ap, char *); while (ccltab[*fp->_p]) { @@ -439,6 +510,39 @@ literal: break; } nread += n; + } else if (flags & LONG) { + wcp = va_arg(ap, wchar_t *); + n = 0; + while (!isspace(*fp->_p) && width != 0) { + if (n == MB_CUR_MAX) + goto input_failure; + buf[n++] = *fp->_p; + fp->_p++; + fp->_r--; + memset(&mbs, 0, sizeof(mbs)); + nconv = mbrtowc(wcp, buf, n, &mbs); + if (nconv == 0 || nconv == (size_t)-1) + goto input_failure; + if (nconv != (size_t)-2) { + if (iswspace(*wcp)) { + while (--n > 0) + __ungetc(buf[n], + fp); + break; + } + nread += n; + width--; + wcp++; + n = 0; + } + if (fp->_r <= 0 && __srefill(fp)) { + if (n != 0) + goto input_failure; + break; + } + } + *wcp = L'\0'; + nassigned++; } else { p0 = p = va_arg(ap, char *); while (!isspace(*fp->_p)) {