diff options
Diffstat (limited to 'usr.bin/wc')
-rw-r--r-- | usr.bin/wc/Makefile | 15 | ||||
-rw-r--r-- | usr.bin/wc/Makefile.depend | 17 | ||||
-rw-r--r-- | usr.bin/wc/Makefile.depend.options | 7 | ||||
-rw-r--r-- | usr.bin/wc/tests/Makefile | 6 | ||||
-rw-r--r-- | usr.bin/wc/tests/Makefile.depend | 10 | ||||
-rwxr-xr-x | usr.bin/wc/tests/wc_test.sh | 248 | ||||
-rw-r--r-- | usr.bin/wc/wc.1 | 209 | ||||
-rw-r--r-- | usr.bin/wc/wc.c | 358 |
8 files changed, 870 insertions, 0 deletions
diff --git a/usr.bin/wc/Makefile b/usr.bin/wc/Makefile new file mode 100644 index 000000000000..e4fe6cb452d6 --- /dev/null +++ b/usr.bin/wc/Makefile @@ -0,0 +1,15 @@ +.include <src.opts.mk> + +PROG= wc +LIBADD= xo + +.if ${MK_CASPER} != "no" +LIBADD+= casper +LIBADD+= cap_fileargs +CFLAGS+=-DWITH_CASPER +.endif + +HAS_TESTS= +SUBDIR.${MK_TESTS}= tests + +.include <bsd.prog.mk> diff --git a/usr.bin/wc/Makefile.depend b/usr.bin/wc/Makefile.depend new file mode 100644 index 000000000000..58dd23ba0203 --- /dev/null +++ b/usr.bin/wc/Makefile.depend @@ -0,0 +1,17 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + include \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcapsicum \ + lib/libcompiler_rt \ + lib/libxo/libxo \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/usr.bin/wc/Makefile.depend.options b/usr.bin/wc/Makefile.depend.options new file mode 100644 index 000000000000..16ba822617d3 --- /dev/null +++ b/usr.bin/wc/Makefile.depend.options @@ -0,0 +1,7 @@ +# This file is not autogenerated - take care! + +DIRDEPS_OPTIONS= CASPER + +DIRDEPS.CASPER.yes= lib/libcasper/services/cap_fileargs + +.include <dirdeps-options.mk> diff --git a/usr.bin/wc/tests/Makefile b/usr.bin/wc/tests/Makefile new file mode 100644 index 000000000000..18002a8d70f4 --- /dev/null +++ b/usr.bin/wc/tests/Makefile @@ -0,0 +1,6 @@ +PACKAGE= tests + +ATF_TESTS_SH= wc_test +BINDIR= ${TESTSDIR} + +.include <bsd.test.mk> diff --git a/usr.bin/wc/tests/Makefile.depend b/usr.bin/wc/tests/Makefile.depend new file mode 100644 index 000000000000..11aba52f82cf --- /dev/null +++ b/usr.bin/wc/tests/Makefile.depend @@ -0,0 +1,10 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/usr.bin/wc/tests/wc_test.sh b/usr.bin/wc/tests/wc_test.sh new file mode 100755 index 000000000000..70eb5f5a2b37 --- /dev/null +++ b/usr.bin/wc/tests/wc_test.sh @@ -0,0 +1,248 @@ +# +# Copyright (c) 2023 Klara, Inc. +# +# SPDX-License-Identifier: BSD-2-Clause +# + +# +# These tests need to run in a multibyte locale with non-localized +# error messages. +# +export LC_CTYPE=C.UTF-8 +export LC_MESSAGES=C + +# +# Size of wc's read buffer. +# +MAXBSIZE=65536 + +# +# Sample text containing multibyte characters +# +tv="Der bode en underlig gråsprængt en +på den yderste nøgne ø; – +han gjorde visst intet menneske mén +hverken på land eller sjø; +dog stundom gnistred hans øjne stygt, – +helst mod uroligt vejr, – +og da mente folk, at han var forrykt, +og da var der få, som uden frykt +kom Terje Vigen nær. +" +tvl=10 +tvw=55 +tvc=300 +tvm=283 +tvcL=42 +tvmL=39 + +# +# Run a series of tests using the same input file. The first argument +# is the name of the file. The next three are the expected line, +# word, and byte counts. The optional fifth is the expected character +# count; if not provided, it is expected to be identical to the byte +# count. +# +atf_check_wc() { + local file="$1" + local l="$2" + local w="$3" + local c="$4" + local m="${5-$4}" + + atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}" + atf_check -o match:"^ +${l}\$" wc -l <"${file}" + atf_check -o match:"^ +${w}\$" wc -w <"${file}" + atf_check -o match:"^ +${c}\$" wc -c <"${file}" + atf_check -o match:"^ +${m}\$" wc -m <"${file}" + atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file" + atf_check -o match:"^ +${l} ${file}\$" wc -l "$file" + atf_check -o match:"^ +${w} ${file}\$" wc -w "$file" + atf_check -o match:"^ +${c} ${file}\$" wc -c "$file" + atf_check -o match:"^ +${m} ${file}\$" wc -m "$file" +} + +atf_test_case basic +basic_head() +{ + atf_set "descr" "Basic test case" +} +basic_body() +{ + printf "a b\n" >foo + atf_check_wc foo 1 2 4 +} + +atf_test_case blank +blank_head() +{ + atf_set "descr" "Input containing only blank lines" +} +blank_body() +{ + printf "\n\n\n" >foo + atf_check_wc foo 3 0 3 +} + +atf_test_case empty +empty_head() +{ + atf_set "descr" "Empty input" +} +empty_body() +{ + printf "" >foo + atf_check_wc foo 0 0 0 +} + +atf_test_case invalid +invalid_head() +{ + atf_set "descr" "Invalid multibyte input" +} +invalid_body() +{ + printf "a\377b\n" >foo + atf_check \ + -e match:"Illegal byte sequence" \ + -o match:"^ +4 foo$" \ + wc -m foo +} + +atf_test_case multiline +multiline_head() +{ + atf_set "descr" "Multiline, multibyte input" +} +multiline_body() +{ + printf "%s\n" "$tv" >foo + atf_check_wc foo $tvl $tvw $tvc $tvm + # longest line in bytes + atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo + atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo + # longest line in characters + atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo + atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo +} + +atf_test_case multiline_repeated +multiline_repeated_head() +{ + atf_set "descr" "Multiline input exceeding the input buffer size" +} +multiline_repeated_body() +{ + local c=0 + while [ $c -lt 1000 ] ; do + printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv" + c=$((c+5)) + done >foo + atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c)) +} + +atf_test_case nul +nul_head() +{ + atf_set "descr" "Input containing NUL" +} +nul_body() +{ + printf "a\0b\n" >foo + atf_check_wc foo 1 1 4 +} + +atf_test_case poop +poop_head() +{ + atf_set "descr" "Multibyte sequence across buffer boundary" +} +poop_body() +{ + local l=0 w=0 c=0 m=0 + # The code below produces a stream of 4-byte UTF-8 sequences + # aligned on 5-byte boundaries, ensuring that the first full + # read of length MAXBSIZE will end in a partial sequence — + # unless MAXBSIZE is a multiple of 5 (not possible since it's + # a power of 2) or one less than a multiple of 5 (e.g. 2^18 = + # 262,144 = (52429 * 5) - 1) in which case we prepend a single + # newline to push our sequence out of phase. + atf_check_not_equal 0 $((MAXBSIZE % 5)) + :>foo + if [ $((MAXBSIZE % 5)) -eq 4 ] ; then + printf "\n" + l=$((l + 1)) + c=$((c + 1)) + m=$((m + 1)) + fi >>foo + while [ $c -le $MAXBSIZE ] ; do + printf "💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩.💩\n" + l=$((l + 1)) + w=$((w + 1)) + c=$((c + 80)) # 80 bytes + m=$((m + 32)) # 32 multibyte characters + done >>foo + atf_check_wc foo $l $w $c $m +} + +atf_test_case total +total_head() +{ + atf_set "descr" "Multiple inputs" +} +total_body() +{ + printf "%s\n" "$tv" >foo + printf "%s\n" "$tv" >bar + atf_check \ + -o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \ + wc foo bar +} + +atf_test_case unterminated +unterminated_head() +{ + atf_set "descr" "Input not ending in newline" +} +unterminated_body() +{ + printf "a b" >foo + atf_check_wc foo 0 2 3 +} + +atf_test_case usage +usage_head() +{ + atf_set "descr" "Trigger usage message" +} +usage_body() +{ + atf_check -s exit:1 -e match:"usage: wc" wc -\? +} + +atf_test_case whitespace +whitespace_head() +{ + atf_set "descr" "Input containing only whitespace and newlines" +} +whitespace_body() +{ + printf "\n \n\t\n" >foo + atf_check_wc foo 3 0 5 +} + +atf_init_test_cases() +{ + atf_add_test_case basic + atf_add_test_case blank + atf_add_test_case empty + atf_add_test_case invalid + atf_add_test_case multiline + atf_add_test_case multiline_repeated + atf_add_test_case nul + atf_add_test_case poop + atf_add_test_case total + atf_add_test_case unterminated + atf_add_test_case usage + atf_add_test_case whitespace +} diff --git a/usr.bin/wc/wc.1 b/usr.bin/wc/wc.1 new file mode 100644 index 000000000000..656408794950 --- /dev/null +++ b/usr.bin/wc/wc.1 @@ -0,0 +1,209 @@ +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd July 16, 2025 +.Dt WC 1 +.Os +.Sh NAME +.Nm wc +.Nd word, line, character, and byte count +.Sh SYNOPSIS +.Nm +.Op Fl -libxo +.Op Fl Lclmw +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility displays the number of lines, words, and bytes contained in each +input +.Ar file , +or standard input (if no file is specified) to the standard output. +A line is defined as a string of characters delimited by a +.Aq newline +character. +Characters beyond the final +.Aq newline +character will not be included +in the line count. +.Pp +A word is defined as a string of characters delimited by white space +characters. +White space characters are the set of characters for which the +.Xr iswspace 3 +function returns true. +If more than one input file is specified, a line of cumulative counts +for all the files is displayed on a separate line after the output for +the last file. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl -libxo +Generate output via +.Xr libxo 3 +in a selection of different human and machine readable formats. +See +.Xr xo_options 7 +for details on command line arguments. +.It Fl L +Write the length of the line containing the most bytes (default) or characters +(when +.Fl m +is provided) +to standard output. +When more than one +.Ar file +argument is specified, the longest input line of +.Em all +files is reported as the value of the final +.Dq total . +.It Fl c +The number of bytes in each input file +is written to the standard output. +This will cancel out any prior usage of the +.Fl m +option. +.It Fl l +The number of lines in each input file +is written to the standard output. +.It Fl m +The number of characters in each input file is written to the standard output. +If the current locale does not support multibyte characters, this +is equivalent to the +.Fl c +option. +This will cancel out any prior usage of the +.Fl c +option. +.It Fl w +The number of words in each input file +is written to the standard output. +.El +.Pp +When an option is specified, +.Nm +only reports the information requested by that option. +The order of output always takes the form of line, word, +byte, and file name. +The default action is equivalent to specifying the +.Fl c , l +and +.Fl w +options. +.Pp +If no files are specified, the standard input is used and no +file name is displayed. +The prompt will accept input until receiving EOF, or +.Bq ^D +in most environments. +.Pp +If +.Nm +receives a +.Dv SIGINFO +(see the +.Cm status +argument for +.Xr stty 1 ) +signal, the interim data will be written +to the standard error output in the same format +as the standard completion message. +.Sh ENVIRONMENT +The +.Ev LANG , LC_ALL +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Count the number of characters, words and lines in each of the files +.Pa report1 +and +.Pa report2 +as well as the totals for both: +.Pp +.Dl "wc -mlw report1 report2" +.Pp +Find the longest line in a list of files: +.Pp +.Dl "wc -L file1 file2 file3 | fgrep total" +.Sh COMPATIBILITY +Historically, the +.Nm +utility was documented to define a word as a +.Do +maximal string of +characters delimited by <space>, <tab> or <newline> characters +.Dc . +The implementation, however, did not handle non-printing characters +correctly so that +.Dq Li "\ \ ^D^E\ \ " +counted as 6 spaces, while +.Dq Li foo^D^Ebar +counted as 8 characters. +.Bx 4 +systems after +.Bx 4.3 +modified the implementation to be consistent +with the documentation. +This implementation defines a +.Dq word +in terms of the +.Xr iswspace 3 +function, as required by +.St -p1003.2 . +.Pp +The +.Fl L +option is a non-standard +.Fx +extension, compatible with the +.Fl L +option of the GNU +.Nm +utility. +.Sh SEE ALSO +.Xr iswspace 3 , +.Xr libxo 3 , +.Xr xo_options 7 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.1-2001 . +.Sh HISTORY +A +.Nm +command appeared in +.At v1 . diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c new file mode 100644 index 000000000000..7b83412f3c42 --- /dev/null +++ b/usr.bin/wc/wc.c @@ -0,0 +1,358 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1980, 1987, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/capsicum.h> +#include <sys/param.h> +#include <sys/stat.h> + +#include <capsicum_helpers.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include <wctype.h> +#include <libxo/xo.h> + +#include <libcasper.h> +#include <casper/cap_fileargs.h> + +#define WC_XO_VERSION "1" + +static const char *stdin_filename = "stdin"; + +static fileargs_t *fa; +static uintmax_t tlinect, twordct, tcharct, tlongline; +static bool doline, doword, dochar, domulti, dolongline; +static volatile sig_atomic_t siginfo; +static xo_handle_t *stderr_handle; + +static void show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, + uintmax_t charct, uintmax_t llct); +static int cnt(const char *); +static void usage(void); + +static void +siginfo_handler(int sig __unused) +{ + + siginfo = 1; +} + +static void +reset_siginfo(void) +{ + + signal(SIGINFO, SIG_DFL); + siginfo = 0; +} + +int +main(int argc, char *argv[]) +{ + int ch, errors, total; + cap_rights_t rights; + + (void) setlocale(LC_CTYPE, ""); + + argc = xo_parse_args(argc, argv); + if (argc < 0) + exit(EXIT_FAILURE); + + while ((ch = getopt(argc, argv, "clmwL")) != -1) + switch((char)ch) { + case 'l': + doline = true; + break; + case 'w': + doword = true; + break; + case 'c': + dochar = true; + domulti = false; + break; + case 'L': + dolongline = true; + break; + case 'm': + domulti = true; + dochar = false; + break; + case '?': + default: + usage(); + } + argv += optind; + argc -= optind; + + fa = fileargs_init(argc, argv, O_RDONLY, 0, + cap_rights_init(&rights, CAP_READ, CAP_FSTAT), FA_OPEN); + if (fa == NULL) + xo_err(EXIT_FAILURE, "Unable to initialize casper"); + caph_cache_catpages(); + if (caph_limit_stdio() < 0) + xo_err(EXIT_FAILURE, "Unable to limit stdio"); + if (caph_enter_casper() < 0) + xo_err(EXIT_FAILURE, "Unable to enter capability mode"); + + /* Wc's flags are on by default. */ + if (!(doline || doword || dochar || domulti || dolongline)) + doline = doword = dochar = true; + + stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0); + + xo_set_version(WC_XO_VERSION); + xo_open_container("wc"); + xo_open_list("file"); + + (void)signal(SIGINFO, siginfo_handler); + errors = 0; + total = 0; + if (argc == 0) { + xo_open_instance("file"); + if (cnt(NULL) != 0) + ++errors; + xo_close_instance("file"); + } else { + while (argc--) { + xo_open_instance("file"); + if (cnt(*argv++) != 0) + ++errors; + xo_close_instance("file"); + ++total; + } + } + + xo_close_list("file"); + + if (total > 1) { + xo_open_container("total"); + show_cnt("total", tlinect, twordct, tcharct, tlongline); + xo_close_container("total"); + } + + fileargs_free(fa); + xo_close_container("wc"); + if (xo_finish() < 0) + xo_err(EXIT_FAILURE, "stdout"); + exit(errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +static void +show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, + uintmax_t charct, uintmax_t llct) +{ + xo_handle_t *xop; + + if (!siginfo) + xop = NULL; + else { + xop = stderr_handle; + siginfo = 0; + } + + if (doline) + xo_emit_h(xop, " {:lines/%7ju/%ju}", linect); + if (doword) + xo_emit_h(xop, " {:words/%7ju/%ju}", wordct); + if (dochar || domulti) + xo_emit_h(xop, " {:characters/%7ju/%ju}", charct); + if (dolongline) + xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct); + if (file != stdin_filename) + xo_emit_h(xop, " {:filename/%s}\n", file); + else + xo_emit_h(xop, "\n"); +} + +static int +cnt(const char *file) +{ + static char buf[MAXBSIZE]; + struct stat sb; + mbstate_t mbs; + const char *p; + uintmax_t linect, wordct, charct, llct, tmpll; + ssize_t len; + size_t clen; + int fd; + wchar_t wch; + bool gotsp, warned; + + linect = wordct = charct = llct = tmpll = 0; + if (file == NULL) { + fd = STDIN_FILENO; + file = stdin_filename; + } else if ((fd = fileargs_open(fa, file)) < 0) { + xo_warn("%s: open", file); + return (1); + } + if (doword || (domulti && MB_CUR_MAX != 1)) + goto word; + /* + * If all we need is the number of characters and it's a regular file, + * just stat it. + */ + if (doline == 0 && dolongline == 0) { + if (fstat(fd, &sb)) { + xo_warn("%s: fstat", file); + (void)close(fd); + return (1); + } + /* pseudo-filesystems advertize a zero size */ + if (S_ISREG(sb.st_mode) && sb.st_size > 0) { + reset_siginfo(); + charct = sb.st_size; + show_cnt(file, linect, wordct, charct, llct); + tcharct += charct; + (void)close(fd); + return (0); + } + } + /* + * For files we can't stat, or if we need line counting, slurp the + * file. Line counting is split out because it's a lot faster to get + * lines than to get words, since the word count requires locale + * handling. + */ + while ((len = read(fd, buf, sizeof(buf))) != 0) { + if (len < 0) { + xo_warn("%s: read", file); + (void)close(fd); + return (1); + } + if (siginfo) + show_cnt(file, linect, wordct, charct, llct); + charct += len; + if (doline || dolongline) { + for (p = buf; len > 0; --len, ++p) { + if (*p == '\n') { + if (tmpll > llct) + llct = tmpll; + tmpll = 0; + ++linect; + } else { + tmpll++; + } + } + } + } + reset_siginfo(); + if (doline) + tlinect += linect; + if (dochar) + tcharct += charct; + if (dolongline && llct > tlongline) + tlongline = llct; + show_cnt(file, linect, wordct, charct, llct); + (void)close(fd); + return (0); + + /* Do it the hard way... */ +word: gotsp = true; + warned = false; + memset(&mbs, 0, sizeof(mbs)); + while ((len = read(fd, buf, sizeof(buf))) != 0) { + if (len < 0) { + xo_warn("%s: read", file); + (void)close(fd); + return (1); + } + p = buf; + while (len > 0) { + if (siginfo) + show_cnt(file, linect, wordct, charct, llct); + if (!domulti || MB_CUR_MAX == 1) { + clen = 1; + wch = (unsigned char)*p; + } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 0) { + clen = 1; + } else if (clen == (size_t)-1) { + if (!warned) { + errno = EILSEQ; + xo_warn("%s", file); + warned = true; + } + memset(&mbs, 0, sizeof(mbs)); + clen = 1; + wch = (unsigned char)*p; + } else if (clen == (size_t)-2) { + break; + } + charct++; + if (wch != L'\n') + tmpll++; + len -= clen; + p += clen; + if (wch == L'\n') { + if (tmpll > llct) + llct = tmpll; + tmpll = 0; + ++linect; + } + if (iswspace(wch)) { + gotsp = true; + } else if (gotsp) { + gotsp = false; + ++wordct; + } + } + } + reset_siginfo(); + if (domulti && MB_CUR_MAX > 1) { + if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) + xo_warn("%s", file); + } + if (doline) + tlinect += linect; + if (doword) + twordct += wordct; + if (dochar || domulti) + tcharct += charct; + if (dolongline && llct > tlongline) + tlongline = llct; + show_cnt(file, linect, wordct, charct, llct); + (void)close(fd); + return (0); +} + +static void +usage(void) +{ + xo_error("usage: wc [-Lclmw] [file ...]\n"); + exit(EXIT_FAILURE); +} |