wc(1): Extend non-controversial optimizations to '-c' mode

wc(1)'s slow path for counting words or multibyte characters requires conversion of the 8-bit input stream to wide characters. However, a faster path can be used for counting only lines ('-l' -- newlines have the same representation in all supported encodings) or bytes ('-c'). The existing line count optimization was not used if the input was the implicit stdin. Additionally, it wasn't used if only byte counting was requested. This change expands the fast path to both of these scenarios. Expanding the buffer size from 64 kB helps reduce the number of read(2) calls needed, but exactly what impact that change has and what size to expand the buffer to are still under discussion. PR: 224160 Tested by: wosch (earlier version) Sponsored by: Dell EMC Isilon
author: Conrad Meyer <cem@FreeBSD.org> 2017-12-09 21:55:19 +0000
committer: Conrad Meyer <cem@FreeBSD.org> 2017-12-09 21:55:19 +0000
commit: de1430411de6780e91445ae5c8f6d93efb9a0360 (patch)
tree: 9d1cb0ae54f213efda497cb3b21fa7cb978c4289 /usr.bin/wc
parent: 219afc4fe22e92e21de49b806940ed2d3cb73f07 (diff)
download: src-test-de1430411de6780e91445ae5c8f6d93efb9a0360.tar.gz
src-test-de1430411de6780e91445ae5c8f6d93efb9a0360.zip
1 files changed, 51 insertions, 50 deletions
diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c
index d403433e50427..c0c6810fd4dfd 100644
--- a/usr.bin/wc/wc.c
+++ b/usr.bin/wc/wc.c
@@ -206,30 +206,30 @@ cnt(const char *file)
 	linect = wordct = charct = llct = tmpll = 0;
 	if (file == NULL)
 		fd = STDIN_FILENO;
-	else {
-		if ((fd = open(file, O_RDONLY, 0)) < 0) {
-			xo_warn("%s: open", file);
-			return (1);
-		}
-		if (doword || (domulti && MB_CUR_MAX != 1))
-			goto word;
-		/*
-		 * Line counting is split out because it's a lot faster to get
-		 * lines than to get words, since the word count requires some
-		 * logic.
-		 */
-		if (doline) {
-			while ((len = read(fd, buf, MAXBSIZE))) {
-				if (len == -1) {
-					xo_warn("%s: read", file);
-					(void)close(fd);
-					return (1);
-				}
-				if (siginfo) {
-					show_cnt(file, linect, wordct, charct,
-					    llct);
-				}
-				charct += len;
+	else if ((fd = open(file, O_RDONLY, 0)) < 0) {
+		xo_warn("%s: open", file);
+		return (1);
+	}
+	if (doword || (domulti && MB_CUR_MAX != 1))
+		goto word;
+	/*
+	 * Line counting is split out because it's a lot faster to get
+	 * lines than to get words, since the word count requires some
+	 * logic.
+	 */
+	if (doline || dochar) {
+		while ((len = read(fd, buf, MAXBSIZE))) {
+			if (len == -1) {
+				xo_warn("%s: read", file);
+				(void)close(fd);
+				return (1);
+			}
+			if (siginfo) {
+				show_cnt(file, linect, wordct, charct,
+				    llct);
+			}
+			charct += len;
+			if (doline) {
 				for (p = buf; len--; ++p)
 					if (*p == '\n') {
 						if (tmpll > llct)
@@ -239,37 +239,38 @@ cnt(const char *file)
 					} else
 						tmpll++;
 			}
-			reset_siginfo();
+		}
+		reset_siginfo();
+		if (doline)
 			tlinect += linect;
-			if (dochar)
-				tcharct += charct;
-			if (dolongline) {
-				if (llct > tlongline)
-					tlongline = llct;
-			}
+		if (dochar)
+			tcharct += charct;
+		if (dolongline) {
+			if (llct > tlongline)
+				tlongline = llct;
+		}
+		show_cnt(file, linect, wordct, charct, llct);
+		(void)close(fd);
+		return (0);
+	}
+	/*
+	 * If all we need is the number of characters and it's a
+	 * regular file, just stat the puppy.
+	 */
+	if (dochar || domulti) {
+		if (fstat(fd, &sb)) {
+			xo_warn("%s: fstat", file);
+			(void)close(fd);
+			return (1);
+		}
+		if (S_ISREG(sb.st_mode)) {
+			reset_siginfo();
+			charct = sb.st_size;
 			show_cnt(file, linect, wordct, charct, llct);
+			tcharct += charct;
 			(void)close(fd);
 			return (0);
 		}
-		/*
-		 * If all we need is the number of characters and it's a
-		 * regular file, just stat the puppy.
-		 */
-		if (dochar || domulti) {
-			if (fstat(fd, &sb)) {
-				xo_warn("%s: fstat", file);
-				(void)close(fd);
-				return (1);
-			}
-			if (S_ISREG(sb.st_mode)) {
-				reset_siginfo();
-				charct = sb.st_size;
-				show_cnt(file, linect, wordct, charct, llct);
-				tcharct += charct;
-				(void)close(fd);
-				return (0);
-			}
-		}
 	}
 
 	/* Do it the hard way... */
author	Conrad Meyer <cem@FreeBSD.org>	2017-12-09 21:55:19 +0000
committer	Conrad Meyer <cem@FreeBSD.org>	2017-12-09 21:55:19 +0000
commit	de1430411de6780e91445ae5c8f6d93efb9a0360 (patch)
tree	9d1cb0ae54f213efda497cb3b21fa7cb978c4289 /usr.bin/wc
parent	219afc4fe22e92e21de49b806940ed2d3cb73f07 (diff)
download	src-test-de1430411de6780e91445ae5c8f6d93efb9a0360.tar.gz src-test-de1430411de6780e91445ae5c8f6d93efb9a0360.zip