diff options
Diffstat (limited to 'src/encoding.c')
| -rw-r--r-- | src/encoding.c | 38 | 
1 files changed, 32 insertions, 6 deletions
| diff --git a/src/encoding.c b/src/encoding.c index c1b23cc0f3374..3c116cd74f176 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -35,7 +35,7 @@  #include "file.h"  #ifndef	lint -FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.13 2015/06/04 19:16:28 christos Exp $")  #endif	/* lint */  #include "magic.h" @@ -47,6 +47,7 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $")  private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);  private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,      size_t *); +private int looks_utf7(const unsigned char *, size_t, unichar *, size_t *);  private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);  private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);  private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); @@ -88,9 +89,15 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni  	}  	if (looks_ascii(buf, nbytes, *ubuf, ulen)) { -		DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); -		*code = "ASCII"; -		*code_mime = "us-ascii"; +		if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) { +			DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen)); +			*code = "UTF-7 Unicode"; +			*code_mime = "utf-7"; +		} else { +			DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); +			*code = "ASCII"; +			*code_mime = "us-ascii"; +		}  	} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {  		DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));  		*code = "UTF-8 Unicode (with BOM)"; @@ -199,8 +206,8 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni  #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */  private char text_chars[256] = { -	/*                  BEL BS HT LF    FF CR    */ -	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */ +	/*                  BEL BS HT LF VT FF CR    */ +	F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F,  /* 0x0X */  	/*                              ESC          */  	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */  	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */ @@ -372,6 +379,25 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,  }  private int +looks_utf7(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) +{ +	if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v') +		switch (buf[3]) { +		case '8': +		case '9': +		case '+': +		case '/': +			if (ubuf) +				*ulen = 0; +			return 1; +		default: +			return -1; +		} +	else +		return -1; +} + +private int  looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,      size_t *ulen)  { | 
