1 files changed, 160 insertions, 182 deletions
diff --git a/contrib/file/ascmagic.c b/contrib/file/ascmagic.c
index 43d467cd3e229..2a76a63378ff1 100644
--- a/contrib/file/ascmagic.c
+++ b/contrib/file/ascmagic.c
@@ -1,34 +1,10 @@
 /*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice immediately at the beginning of the file, without modification,
- *    this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
  * ASCII magic -- file types that we know based on keywords
  * that can appear anywhere in the file.
  *
+ * Copyright (c) Ian F. Darwin, 1987.
+ * Written by Ian F. Darwin.
+ *
  * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
  * to handle character codes other than ASCII on a unified basis.
  *
@@ -36,8 +12,29 @@
  * international characters, now subsumed into this file.
  */
 
+/*
+ * This software is not subject to any license of the American Telephone
+ * and Telegraph Company or of the Regents of the University of California.
+ *
+ * Permission is granted to anyone to use this software for any purpose on
+ * any computer system, and to alter it and redistribute it freely, subject
+ * to the following restrictions:
+ *
+ * 1. The author is not responsible for the consequences of use of this
+ *    software, no matter how awful, even if they arise from flaws in it.
+ *
+ * 2. The origin of this software must not be misrepresented, either by
+ *    explicit claim or by omission.  Since few users ever read sources,
+ *    credits must appear in the documentation.
+ *
+ * 3. Altered versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.  Since few users
+ *    ever read sources, credits must appear in the documentation.
+ *
+ * 4. This notice may not be removed or altered.
+ */
+
 #include "file.h"
-#include "magic.h"
 #include <stdio.h>
 #include <string.h>
 #include <memory.h>
@@ -49,7 +46,7 @@
 #include "names.h"
 
 #ifndef	lint
-FILE_RCSID("@(#)$Id: ascmagic.c,v 1.45 2006/03/12 22:09:33 christos Exp $")
+FILE_RCSID("@(#)$Id: ascmagic.c,v 1.28 2000/08/05 17:36:47 christos Exp $")
 #endif	/* lint */
 
 typedef unsigned long unichar;
@@ -58,34 +55,35 @@ typedef unsigned long unichar;
 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
 		  || (x) == 0x85 || (x) == '\f')
 
-private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_unicode(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
-private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
-private int ascmatch(const unsigned char *, const unichar *, size_t);
-
-
-protected int
-file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
+static int looks_ascii __P((const unsigned char *, int, unichar *, int *));
+static int looks_utf8 __P((const unsigned char *, int, unichar *, int *));
+static int looks_unicode __P((const unsigned char *, int, unichar *, int *));
+static int looks_latin1 __P((const unsigned char *, int, unichar *, int *));
+static int looks_extended __P((const unsigned char *, int, unichar *, int *));
+static void from_ebcdic __P((const unsigned char *, int, unsigned char *));
+static int ascmatch __P((const unsigned char *, const unichar *, int));
+
+int
+ascmagic(buf, nbytes)
+	unsigned char *buf;
+	int nbytes;	/* size actually read */
 {
-	size_t i;
-	unsigned char *nbuf = NULL;
-	unichar *ubuf = NULL;	
-	size_t ulen;
+	int i;
+	unsigned char *s;
+	char nbuf[HOWMANY+1];		/* one extra for terminating '\0' */
+	unichar ubuf[HOWMANY+1];	/* one extra for terminating '\0' */
+	int ulen;
+	char *token;
 	struct names *p;
-	int rv = -1;
 
-	const char *code = NULL;
-	const char *code_mime = NULL;
-	const char *type = NULL;
-	const char *subtype = NULL;
-	const char *subtype_mime = NULL;
+	char *code = NULL;
+	char *code_mime = NULL;
+	char *type = NULL;
+	char *subtype = NULL;
+	char *subtype_mime = NULL;
 
 	int has_escapes = 0;
 	int has_backspace = 0;
-	int seen_cr = 0;
 
 	int n_crlf = 0;
 	int n_lf = 0;
@@ -96,16 +94,23 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 	int has_long_lines = 0;
 
 	/*
-	 * Undo the NUL-termination kindly provided by process()
-	 * but leave at least one byte to look at
+	 * Do the tar test first, because if the first file in the tar
+	 * archive starts with a dot, we can confuse it with an nroff file.
 	 */
-	while (nbytes > 1 && buf[nbytes - 1] == '\0')
-		nbytes--;
+	switch (is_tar(buf, nbytes)) {
+	case 1:
+		ckfputs(iflag ? "application/x-tar" : "tar archive", stdout);
+		return 1;
+	case 2:
+		ckfputs(iflag ? "application/x-tar, POSIX"
+				: "POSIX tar archive", stdout);
+		return 1;
+	}
+
+	/* Undo the NUL-termination kindly provided by process() */
 
-	if ((nbuf = malloc((nbytes + 1) * sizeof(nbuf[0]))) == NULL)
-		goto done;
-	if ((ubuf = malloc((nbytes + 1) * sizeof(ubuf[0]))) == NULL)
-		goto done;
+	while (nbytes > 0 && buf[nbytes - 1] == '\0')
+		nbytes--;
 
 	/*
 	 * Then try to determine whether it's any character code we can
@@ -121,7 +126,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 		code = "UTF-8 Unicode";
 		code_mime = "utf-8";
 		type = "text";
-	} else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen)) != 0) {
+	} else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen))) {
 		if (i == 1)
 			code = "Little-endian UTF-16 Unicode";
 		else
@@ -149,16 +154,10 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 			type = "character data";
 			code_mime = "ebcdic";
 		} else {
-			rv = 0;
-			goto done;  /* doesn't look like text at all */
+			return 0;  /* doesn't look like text at all */
 		}
 	}
 
-	if (nbytes <= 1) {
-		rv = 0;
-		goto done;
-	}
-
 	/*
 	 * for troff, look for . + letter + letter or .\";
 	 * this must be done to disambiguate tar archives' ./file
@@ -173,10 +172,8 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 		while (ISSPC(*tp))
 			++tp;	/* skip leading whitespace */
 		if ((tp[0] == '\\' && tp[1] == '\"') ||
-		    (isascii((unsigned char)tp[0]) &&
-		     isalnum((unsigned char)tp[0]) &&
-		     isascii((unsigned char)tp[1]) &&
-		     isalnum((unsigned char)tp[1]) &&
+		    (isascii(tp[0]) && isalnum(tp[0]) &&
+		     isascii(tp[1]) && isalnum(tp[1]) &&
 		     ISSPC(tp[2]))) {
 			subtype_mime = "text/troff";
 			subtype = "troff or preprocessor input";
@@ -194,7 +191,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 
 	i = 0;
 	while (i < ulen) {
-		size_t end;
+		int end;
 
 		/*
 		 * skip past any leading space
@@ -215,8 +212,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
 		 * compare the word thus isolated against the token list
 		 */
 		for (p = names; p < names + NNAMES; p++) {
-			if (ascmatch((const unsigned char *)p->name, ubuf + i,
-			    end - i)) {
+			if (ascmatch(p->name, ubuf + i, end - i)) {
 				subtype = types[p->type].human;
 				subtype_mime = types[p->type].mime;
 				goto subtype_identified;
@@ -232,25 +228,6 @@ subtype_identified:
 	 * Now try to discover other details about the file.
 	 */
 	for (i = 0; i < ulen; i++) {
-		if (ubuf[i] == '\n') {
-			if (seen_cr)
-				n_crlf++;
-			else
-				n_lf++;
-			last_line_end = i;
-		} else if (seen_cr)
-			n_cr++;
-
-		seen_cr = (ubuf[i] == '\r');
-		if (seen_cr)
-			last_line_end = i;
-
-		if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
-			n_nel++;
-			last_line_end = i;
-		}
-
-		/* If this line is _longer_ than MAXLINELEN, remember it. */
 		if (i > last_line_end + MAXLINELEN)
 			has_long_lines = 1;
 
@@ -258,49 +235,48 @@ subtype_identified:
 			has_escapes = 1;
 		if (ubuf[i] == '\b')
 			has_backspace = 1;
-	}
 
-	/* Beware, if the data has been truncated, the final CR could have
-	   been followed by a LF.  If we have HOWMANY bytes, it indicates
-	   that the data might have been truncated, probably even before
-	   this function was called. */
-	if (seen_cr && nbytes < HOWMANY)
-		n_cr++;
-
-	if ((ms->flags & MAGIC_MIME)) {
-		if (subtype_mime) {
-			if (file_printf(ms, subtype_mime) == -1)
-				goto done;
-		} else {
-			if (file_printf(ms, "text/plain") == -1)
-				goto done;
+		if (ubuf[i] == '\r' && (i + 1 <  ulen && ubuf[i + 1] == '\n')) {
+			n_crlf++;
+			last_line_end = i;
+		}
+		if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) {
+			n_cr++;
+			last_line_end = i;
+		}
+		if (ubuf[i] == '\n' && (i - 1 <  0    || ubuf[i - 1] != '\r')) {
+			n_lf++;
+			last_line_end = i;
 		}
+		if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
+			n_nel++;
+			last_line_end = i;
+		}
+	}
+
+	if (iflag) {
+		if (subtype_mime)
+			ckfputs(subtype_mime, stdout);
+		else
+			ckfputs("text/plain", stdout);
 
 		if (code_mime) {
-			if (file_printf(ms, "; charset=") == -1)
-				goto done;
-			if (file_printf(ms, code_mime) == -1)
-				goto done;
+			ckfputs("; charset=", stdout);
+			ckfputs(code_mime, stdout);
 		}
 	} else {
-		if (file_printf(ms, code) == -1)
-			goto done;
+		ckfputs(code, stdout);
 
 		if (subtype) {
-			if (file_printf(ms, " ") == -1)
-				goto done;
-			if (file_printf(ms, subtype) == -1)
-				goto done;
+			ckfputs(" ", stdout);
+			ckfputs(subtype, stdout);
 		}
 
-		if (file_printf(ms, " ") == -1)
-			goto done;
-		if (file_printf(ms, type) == -1)
-			goto done;
+		ckfputs(" ", stdout);
+		ckfputs(type, stdout);
 
 		if (has_long_lines)
-			if (file_printf(ms, ", with very long lines") == -1)
-				goto done;
+			ckfputs(", with very long lines", stdout);
 
 		/*
 		 * Only report line terminators if we find one other than LF,
@@ -308,62 +284,47 @@ subtype_identified:
 		 */
 		if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
 		    (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
-			if (file_printf(ms, ", with") == -1)
-				goto done;
+			ckfputs(", with", stdout);
 
-			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)			{
-				if (file_printf(ms, " no") == -1)
-					goto done;
-			} else {
+			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)
+				ckfputs(" no", stdout);
+			else {
 				if (n_crlf) {
-					if (file_printf(ms, " CRLF") == -1)
-						goto done;
+					ckfputs(" CRLF", stdout);
 					if (n_cr || n_lf || n_nel)
-						if (file_printf(ms, ",") == -1)
-							goto done;
+						ckfputs(",", stdout);
 				}
 				if (n_cr) {
-					if (file_printf(ms, " CR") == -1)
-						goto done;
+					ckfputs(" CR", stdout);
 					if (n_lf || n_nel)
-						if (file_printf(ms, ",") == -1)
-							goto done;
+						ckfputs(",", stdout);
 				}
 				if (n_lf) {
-					if (file_printf(ms, " LF") == -1)
-						goto done;
+					ckfputs(" LF", stdout);
 					if (n_nel)
-						if (file_printf(ms, ",") == -1)
-							goto done;
+						ckfputs(",", stdout);
 				}
 				if (n_nel)
-					if (file_printf(ms, " NEL") == -1)
-						goto done;
+					ckfputs(" NEL", stdout);
 			}
 
-			if (file_printf(ms, " line terminators") == -1)
-				goto done;
+			ckfputs(" line terminators", stdout);
 		}
 
 		if (has_escapes)
-			if (file_printf(ms, ", with escape sequences") == -1)
-				goto done;
+			ckfputs(", with escape sequences", stdout);
 		if (has_backspace)
-			if (file_printf(ms, ", with overstriking") == -1)
-				goto done;
+			ckfputs(", with overstriking", stdout);
 	}
-	rv = 1;
-done:
-	if (nbuf)
-		free(nbuf);
-	if (ubuf)
-		free(ubuf);
 
-	return rv;
+	return 1;
 }
 
-private int
-ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
+static int
+ascmatch(s, us, ulen)
+	const unsigned char *s;
+	const unichar *us;
+	int ulen;
 {
 	size_t i;
 
@@ -435,7 +396,7 @@ ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
 #define I 2   /* character appears in ISO-8859 text */
 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
 
-private char text_chars[256] = {
+static char text_chars[256] = {
 	/*                  BEL BS HT LF    FF CR    */
 	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
         /*                              ESC          */
@@ -457,9 +418,12 @@ private char text_chars[256] = {
 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
 };
 
-private int
-looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
+static int
+looks_ascii(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
 {
 	int i;
 
@@ -477,8 +441,12 @@ looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 	return 1;
 }
 
-private int
-looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+static int
+looks_latin1(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
 {
 	int i;
 
@@ -496,9 +464,12 @@ looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ule
 	return 1;
 }
 
-private int
-looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
+static int
+looks_extended(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
 {
 	int i;
 
@@ -516,8 +487,12 @@ looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 	return 1;
 }
 
-private int
-looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+int
+looks_utf8(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
 {
 	int i, n;
 	unichar c;
@@ -578,9 +553,12 @@ done:
 	return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
 }
 
-private int
-looks_unicode(const unsigned char *buf, size_t nbytes, unichar *ubuf,
-    size_t *ulen)
+static int
+looks_unicode(buf, nbytes, ubuf, ulen)
+	const unsigned char *buf;
+	int nbytes;
+	unichar *ubuf;
+	int *ulen;
 {
 	int bigend;
 	int i;
@@ -607,12 +585,11 @@ looks_unicode(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 
 		if (ubuf[*ulen - 1] == 0xfffe)
 			return 0;
-		if (ubuf[*ulen - 1] < 128 &&
-		    text_chars[(size_t)ubuf[*ulen - 1]] != T)
+		if (ubuf[*ulen - 1] < 128 && text_chars[ubuf[*ulen - 1]] != T)
 			return 0;
 	}
 
-	return 1 + bigend;
+	return 1;
 }
 
 #undef F
@@ -642,7 +619,7 @@ looks_unicode(const unsigned char *buf, size_t nbytes, unichar *ubuf,
  * between old-style and internationalized examples of text.
  */
 
-private unsigned char ebcdic_to_ascii[] = {
+unsigned char ebcdic_to_ascii[] = {
   0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
  16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
 128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
@@ -661,7 +638,6 @@ private unsigned char ebcdic_to_ascii[] = {
 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
 };
 
-#ifdef notdef
 /*
  * The following EBCDIC-to-ASCII table may relate more closely to reality,
  * or at least to modern reality.  It comes from
@@ -676,7 +652,7 @@ private unsigned char ebcdic_to_ascii[] = {
  * cases for the NEL character can be taken out of the code.
  */
 
-private unsigned char ebcdic_1047_to_8859[] = {
+unsigned char ebcdic_1047_to_8859[] = {
 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
@@ -694,13 +670,15 @@ private unsigned char ebcdic_1047_to_8859[] = {
 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
 };
-#endif
 
 /*
  * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
  */
-private void
-from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
+static void
+from_ebcdic(buf, nbytes, out)
+	const unsigned char *buf;
+	int nbytes;
+	unsigned char *out;
 {
 	int i;