summaryrefslogtreecommitdiff
path: root/src/ascmagic.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/ascmagic.c')
-rw-r--r--src/ascmagic.c84
1 files changed, 53 insertions, 31 deletions
diff --git a/src/ascmagic.c b/src/ascmagic.c
index 2d1abe543e603..624ac90b1f1c2 100644
--- a/src/ascmagic.c
+++ b/src/ascmagic.c
@@ -35,7 +35,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.98 2017/11/02 20:25:39 christos Exp $")
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.104 2019/05/07 02:27:11 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -80,7 +80,13 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
const char *type = NULL;
bb = *b;
- bb.flen = trim_nuls(b->fbuf, b->flen);
+ bb.flen = trim_nuls(CAST(const unsigned char *, b->fbuf), b->flen);
+ /*
+ * Avoid trimming at an odd byte if the original buffer was evenly
+ * sized; this avoids losing the last character on UTF-16 LE text
+ */
+ if ((bb.flen & 1) && !(b->flen & 1))
+ bb.flen++;
/* If file doesn't look like any sort of text, give up. */
if (file_encoding(ms, &bb, &ubuf, &ulen, &code, &code_mime,
@@ -96,17 +102,18 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
}
protected int
-file_ascmagic_with_encoding(struct magic_set *ms,
+file_ascmagic_with_encoding(struct magic_set *ms,
const struct buffer *b, unichar *ubuf, size_t ulen, const char *code,
const char *type, int text)
{
struct buffer bb;
- const unsigned char *buf = b->fbuf;
+ const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
size_t nbytes = b->flen;
unsigned char *utf8_buf = NULL, *utf8_end;
- size_t mlen, i;
+ size_t mlen, i, len;
int rv = -1;
int mime = ms->flags & MAGIC_MIME;
+ int need_separator = 0;
const char *subtype = NULL;
const char *subtype_mime = NULL;
@@ -121,7 +128,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
int n_nel = 0;
int executable = 0;
- size_t last_line_end = (size_t)-1;
+ size_t last_line_end = CAST(size_t, -1);
int has_long_lines = 0;
nbytes = trim_nuls(buf, nbytes);
@@ -144,12 +151,14 @@ file_ascmagic_with_encoding(struct magic_set *ms,
if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen))
== NULL)
goto done;
- buffer_init(&bb, b->fd, utf8_buf,
- (size_t)(utf8_end - utf8_buf));
+ buffer_init(&bb, b->fd, &b->st, utf8_buf,
+ CAST(size_t, utf8_end - utf8_buf));
if ((rv = file_softmagic(ms, &bb, NULL, NULL,
TEXTTEST, text)) == 0)
rv = -1;
+ else
+ need_separator = 1;
buffer_fini(&bb);
if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))) {
rv = rv == -1 ? 0 : 1;
@@ -200,8 +209,21 @@ file_ascmagic_with_encoding(struct magic_set *ms,
rv = 0;
goto done;
}
+ len = file_printedlen(ms);
if (mime) {
- if (!file_printedlen(ms) && (mime & MAGIC_MIME_TYPE) != 0) {
+ if ((mime & MAGIC_MIME_TYPE) != 0) {
+ if (len) {
+ /*
+ * Softmagic printed something, we
+ * are either done, or we need a separator
+ */
+ if ((ms->flags & MAGIC_CONTINUE) == 0) {
+ rv = 1;
+ goto done;
+ }
+ if (need_separator && file_separator(ms) == -1)
+ goto done;
+ }
if (subtype_mime) {
if (file_printf(ms, "%s", subtype_mime) == -1)
goto done;
@@ -211,7 +233,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
}
}
} else {
- if (file_printedlen(ms)) {
+ if (len) {
switch (file_replace(ms, " text$", ", ")) {
case 0:
switch (file_replace(ms, " text executable$",
@@ -324,42 +346,42 @@ encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen)
if (ubuf[i] <= 0x7f) {
if (end - buf < 1)
return NULL;
- *buf++ = (unsigned char)ubuf[i];
+ *buf++ = CAST(unsigned char, ubuf[i]);
} else if (ubuf[i] <= 0x7ff) {
if (end - buf < 2)
return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 6) + 0xc0);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] >> 6) + 0xc0);
+ *buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0xffff) {
if (end - buf < 3)
return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 12) + 0xe0);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] >> 12) + 0xe0);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x1fffff) {
if (end - buf < 4)
return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 18) + 0xf0);
- *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] >> 18) + 0xf0);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x3ffffff) {
if (end - buf < 5)
return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 24) + 0xf8);
- *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] >> 24) + 0xf8);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x7fffffff) {
if (end - buf < 6)
return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 30) + 0xfc);
- *buf++ = (unsigned char)(((ubuf[i] >> 24) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] >> 30) + 0xfc);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
+ *buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
} else /* Invalid character */
return NULL;
}