summaryrefslogtreecommitdiff
path: root/contrib/file
diff options
context:
space:
mode:
authorDavid E. O'Brien <obrien@FreeBSD.org>2009-05-04 00:37:44 +0000
committerDavid E. O'Brien <obrien@FreeBSD.org>2009-05-04 00:37:44 +0000
commit7dbb948b5f00eabb81904e9db0aa9fc712939aca (patch)
tree12756817ab8bc4334652c625c7731c9e7fd0cefc /contrib/file
parent8ee6f90a0c45383950152583c7fd62172e27bb73 (diff)
parent4f3dd34290f174adbda2b9668cc6b6c554ae9bb0 (diff)
downloadsrc-test-7dbb948b5f00eabb81904e9db0aa9fc712939aca.tar.gz
src-test-7dbb948b5f00eabb81904e9db0aa9fc712939aca.zip
Merge vendor/file/dist@191739, bringing FILE 5.00 to 8-CURRENT.
Notes
Notes: svn path=/head/; revision=191771
Diffstat (limited to 'contrib/file')
-rw-r--r--contrib/file/.cvsignore1
-rw-r--r--contrib/file/ChangeLog156
-rw-r--r--contrib/file/Magdir/animation23
-rw-r--r--contrib/file/Magdir/audio45
-rw-r--r--contrib/file/Magdir/cafebabe20
-rw-r--r--contrib/file/Magdir/compress6
-rw-r--r--contrib/file/Magdir/elf6
-rw-r--r--contrib/file/Magdir/epoc15
-rw-r--r--contrib/file/Magdir/filesystems273
-rw-r--r--contrib/file/Magdir/graphviz13
-rw-r--r--contrib/file/Magdir/images5
-rw-r--r--contrib/file/Magdir/jpeg1
-rw-r--r--contrib/file/Magdir/mach2
-rw-r--r--contrib/file/Magdir/macintosh3
-rw-r--r--contrib/file/Magdir/msdos49
-rw-r--r--contrib/file/Magdir/perl1
-rw-r--r--contrib/file/Magdir/printer2
-rw-r--r--contrib/file/Magdir/timezone17
-rw-r--r--contrib/file/Magdir/wireless5
-rw-r--r--contrib/file/Magdir/xwindows9
-rw-r--r--contrib/file/Makefile.am3
-rw-r--r--contrib/file/Makefile.am-src18
-rw-r--r--contrib/file/Makefile.in3
-rw-r--r--contrib/file/README56
-rw-r--r--contrib/file/TODO6
-rw-r--r--contrib/file/apprentice.c117
-rw-r--r--contrib/file/apptype.c10
-rw-r--r--contrib/file/ascmagic.c556
-rw-r--r--contrib/file/asprintf.c6
-rw-r--r--contrib/file/cdf.c1105
-rw-r--r--contrib/file/cdf.h298
-rw-r--r--contrib/file/cdf_time.c182
-rw-r--r--contrib/file/compress.c15
-rw-r--r--contrib/file/config.h.in12
-rwxr-xr-xcontrib/file/configure227
-rw-r--r--contrib/file/configure.ac6
-rw-r--r--contrib/file/encoding.c484
-rw-r--r--contrib/file/file.c185
-rw-r--r--contrib/file/file.h60
-rw-r--r--contrib/file/file.man153
-rw-r--r--contrib/file/file_opts.h1
-rw-r--r--contrib/file/fsmagic.c127
-rw-r--r--contrib/file/funcs.c161
-rw-r--r--contrib/file/getopt_long.c10
-rw-r--r--contrib/file/is_tar.c19
-rw-r--r--contrib/file/libmagic.man13
-rw-r--r--contrib/file/magic.c26
-rw-r--r--contrib/file/magic.h20
-rw-r--r--contrib/file/magic.man19
-rw-r--r--contrib/file/patchlevel.h9
-rw-r--r--contrib/file/print.c16
-rw-r--r--contrib/file/readcdf.c256
-rw-r--r--contrib/file/readelf.c27
-rw-r--r--contrib/file/softmagic.c368
-rw-r--r--contrib/file/vasprintf.c9
55 files changed, 4004 insertions, 1231 deletions
diff --git a/contrib/file/.cvsignore b/contrib/file/.cvsignore
deleted file mode 100644
index d89921897ae6b..0000000000000
--- a/contrib/file/.cvsignore
+++ /dev/null
@@ -1 +0,0 @@
-autom4te.cache
diff --git a/contrib/file/ChangeLog b/contrib/file/ChangeLog
index 2c62a725d1b6f..d2110415a1af2 100644
--- a/contrib/file/ChangeLog
+++ b/contrib/file/ChangeLog
@@ -1,9 +1,105 @@
+2008-12-12 15:50 Christos Zoulas <christos@zoulas.com>
+
+ * fix initial offset calculation for non 4K sector files
+
+ * add loop limits to avoid DoS attacks by constructing
+ looping sector references.
+
+2008-12-03 13:05 Christos Zoulas <christos@zoulas.com>
+
+ * fix memory botches on cdf file parsing.
+
+ * exit with non-zero value for any error, not just for the last
+ file processed.
+
+2008-11-09 20:42 Charles Longeau <chl@tuxfamily.org>
+
+ * Replace all str{cpy,cat} functions with strl{cpy,cat}
+ * Ensure that strl{cpy,cat} are included in libmagic,
+ as needed.
+
+2008-11-06 18:18 Christos Zoulas <christos@zoulas.com>
+
+ * Handle ID3 format files.
+
+2008-11-06 23:00 Reuben Thomas <rrt@sc3d.org>
+
+ * Fix --mime, --mime-type and --mime-encoding under new scheme.
+
+ * Rename "ascii" to "text" and add "encoding" test.
+
+ * Return a precise ("utf-16le" or "utf-16be") MIME charset for
+ UTF-16.
+
+ * Fix error in comment caused by automatic indentation adding
+ words!
+
+2008-11-06 10:35 Christos Zoulas <christos@astron.com>
+
+ * use memchr instead of strchr because the string
+ might not be NUL terminated (Scott MacVicar)
+
+2008-11-03 07:31 Reuben Thomas <rrt@sc3d.org>
+
+ * Fix a printf with a non-literal format string.
+
+ * Fix formatting and punctuation of help for "--apple".
+
+2008-10-30 11:00 Reuben Thomas <rrt@sc3d.org>
+
+ * Correct words counts in comments of struct magic.
+
+ * Fix handle_annotation to allow both Apple and MIME types to be
+ printed, and to return correct code if MIME type is
+ printed (1, not 0) or if there's an error (-1 not 1).
+
+ * Fix output of charset for MIME type (precede with semi-colon;
+ fixes Debian bug #501460).
+
+ * Fix potential attacks via conversion specifications in magic
+ strings.
+
+ * Add a FIXME for Debian bug #488562 (magic files should be
+ read in a defined order, by sorting the names).
+
+2008-10-18 16:45 Christos Zoulas <christos@astron.com>
+
+ * Added APPLE file creator/type
+
+2008-10-12 10:20 Christos Zoulas <christos@astron.com>
+
+ * Added CDF parsing
+
+2008-10-09 16:40 Christos Zoulas <christos@astron.com>
+
+ * filesystem and msdos patches (Joerg Jenderek)
+
+2008-10-09 13:20 Christos Zoulas <christos@astron.com>
+
+ * correct --exclude documentation issues: remove troff and fortran
+ and rename "token" to "tokens". (Randy McMurchy)
+
+2008-10-01 10:30 Christos Zoulas <christos@astron.com>
+
+ * Read ~/.magic in addition to the default magic file not instead
+ of, as documented in the man page.
+
+2008-09-10 21:30 Reuben Thomas <rrt@sc3d.org>
+
+ * Comment out graphviz patterns, as they match too many files.
+
2008-08-30 12:54 Christos Zoulas <christos@astron.com>
* Don't eat trailing \n in magic enties.
* Cast defines to allow compilation using a c++ compiler.
+2008-08-25 23:56 Reuben Thomas <rrt@sc3d.org>
+
+ * Add text/x-lua MIME type for Lua scripts.
+
+ * Escape { in regex in graphviz patterns.
+
2008-07-26 00:59 Reuben Thomas <rrt@sc3d.org>
* Add MIME types for special files.
@@ -55,22 +151,22 @@
2008-05-06 00:13 Robert Byrnes <byrnes@wildpumpkin.net>
- * src/Makefile.am:
+ * src/Makefile.am:
Ensure that getopt_long and [v]asprintf are included in libmagic,
as needed.
Remove unnecessary EXTRA_DIST.
- * src/Makefile.in:
+ * src/Makefile.in:
Rerun automake.
- * src/vasprintf.c (dispatch):
+ * src/vasprintf.c (dispatch):
Fix variable precision bug: be sure to step past '*'.
- * src/vasprintf.c (core):
+ * src/vasprintf.c (core):
Remove unreachable code.
- * src/apprentice.c (set_test_type):
+ * src/apprentice.c (set_test_type):
Add cast to avoid compiler warning.
2008-04-22 23:45 Christos Zoulas <christos@astron.com>
@@ -81,12 +177,12 @@
2008-04-04 11:00 Christos Zoulas <christos@astron.com>
- * >= <= is not supported, so fix the magic and warn about it.
+ * >= <= is not supported, so fix the magic and warn about it.
reported by: Thien-Thi Nguyen <ttn@gnuvola.org>
2008-03-27 16:16 Robert Byrnes <byrnes@wildpumpkin.net>
- * src/readelf.c (donote):
+ * src/readelf.c (donote):
ELF core file command name/line bug fixes and enhancements:
Try larger offsets first to avoid false matches
@@ -112,7 +208,7 @@
* Clarify UTF-8 BOM message (Reuben Thomas)
* Add HTML comment to token list in names.h
-
+
2007-02-04 15:50 Christos Zoulas <christos@astron.com>
* Debian fixes (Reuben Thomas)
@@ -152,7 +248,7 @@
2007-10-28 20:48 Christos Zoulas <christos@astron.com>
- * float and double magic support (Behan Webster)
+ * float and double magic support (Behan Webster)
2007-10-28 20:48 Christos Zoulas <christos@astron.com>
@@ -199,7 +295,7 @@
be easily parsed:
mimetype [charset=character-set] [encoding=encoding-mime-type]
- Remove spurious extra text from some MIME type printouts
+ Remove spurious extra text from some MIME type printouts
(mostly in is_tar).
Fix one case where -i produced nothing at all (for a 1-byte file,
@@ -229,7 +325,7 @@
2007-03-15 10:51 Christos Zoulas <christos@astron.com>
* fix fortran and nroff reversed tests (Dmitry V. Levin)
-
+
* fix exclude option (Dmitry V. Levin)
2007-02-08 17:30 Christos Zoulas <christos@astron.com>
@@ -248,7 +344,7 @@
* Add exclude flag.
2007-01-18 05:29 Anon Ymous <do@not.spam.me>
-
+
* Move the "type" detection code from parse() into its own table
driven routine. This avoids maintaining multiple lists in
file.h.
@@ -256,7 +352,7 @@
* Add an optional conditional field (ust before the type field).
This code is wrapped in "#ifdef ENABLE_CONDITIONALS" as it is
likely to go away.
-
+
2007-01-16 23:24 Anon Ymous <do@not.spam.me>
* Fix an initialization bug in check_mem().
@@ -327,7 +423,7 @@
2006-12-08 16:32 Christos Zoulas <christos@astron.com>
* store and print the line number of the magic
- entry for debugging.
+ entry for debugging.
* if the magic entry did not print anything,
don't treat it as a match
@@ -342,7 +438,7 @@
file_softmagic.
2006-11-25 13:35 Christos Zoulas <christos@astron.com>
-
+
* Don't store the current offset in the magic
struct, because it needs to be restored and
it was not done properly all the time. Bug
@@ -432,7 +528,7 @@
* Look for note sections in non executables.
2005-09-20 13:33 Christos Zoulas <christos@astron.com>
-
+
* Don't print SVR4 Style in core files multiple times
(Radek Vokál)
@@ -443,9 +539,9 @@
2005-08-18 09:53 Christos Zoulas <christos@astron.com>
* Remove erroreous mention of /etc/magic in the file man page
- This is gentoo bug 101639. (Mike Frysinger)
+ This is gentoo bug 101639. (Mike Frysinger)
- * Cross-compile support and detection (Mike Frysinger)
+ * Cross-compile support and detection (Mike Frysinger)
2005-08-12 10:17 Christos Zoulas <christos@astron.com>
@@ -477,20 +573,20 @@
* Avoid NULL pointer dereference in time conversion.
2005-03-06 00:00 Joerg Walter <jwalt@mail.garni.ch>
-
+
* Add indirect magic offset support, and search mode.
2005-01-12 00:00 Stepan Kasal <kasal@ucw.cz>
- * src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
- If a CRLF text file happens to have CR at offset HOWMANY - 1
- (currently 0xffff), it should not be counted as CR line
- terminator.
- If a line has length exactly MAXLINELEN, it should not yet be
- treated as a ``very long line'', as MAXLINELEN is ``longest sane
- line length''.
- With CRLF, the line length was not computed correctly, and even
- lines of length MAXLINELEN - 1 were treated as ``very long''.
+ * src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
+ If a CRLF text file happens to have CR at offset HOWMANY - 1
+ (currently 0xffff), it should not be counted as CR line
+ terminator.
+ If a line has length exactly MAXLINELEN, it should not yet be
+ treated as a ``very long line'', as MAXLINELEN is ``longest sane
+ line length''.
+ With CRLF, the line length was not computed correctly, and even
+ lines of length MAXLINELEN - 1 were treated as ``very long''.
2004-12-07 14:15 Christos Zoulas <christos@astron.com>
@@ -525,12 +621,12 @@
* Remove 3rd and 4th copyright clause; approved by Ian Darwin.
- * Fix small memory leaks; caught by: Tamas Sarlos
+ * Fix small memory leaks; caught by: Tamas Sarlos
<stamas@csillag.ilab.sztaki.hu>
2004-07-24 16:33 Christos Zoulas <christos@astron.com>
- * magic.mime update Danny Milosavljevic <danny.milo@gmx.net>
+ * magic.mime update Danny Milosavljevic <danny.milo@gmx.net>
* FreeBSD version update Oliver Eikemeier <eikemeier@fillmore-labs.com>
diff --git a/contrib/file/Magdir/animation b/contrib/file/Magdir/animation
index 443338a6c4821..46b23ecd507ca 100644
--- a/contrib/file/Magdir/animation
+++ b/contrib/file/Magdir/animation
@@ -325,6 +325,7 @@
# MP2, M1A
0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
+!:mime audio/mpeg
# rates
>2 byte&0xF0 0x10 \b, 32 kbps
>2 byte&0xF0 0x20 \b, 48 kbps
@@ -399,6 +400,7 @@
# MP3, M2A
0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
+!:mime audio/mpeg
# rate
>2 byte&0xF0 0x10 \b, 8 kbps
>2 byte&0xF0 0x20 \b, 16 kbps
@@ -790,3 +792,24 @@
0 belong 0x00000001
>4 byte&0x1F 0x07
!:mime video/h264
+
+# Type: Bink Video
+# URL: http://wiki.multimedia.cx/index.php?title=3DBink_Container
+# From: <hoehle@users.sourceforge.net> 2008-07-18
+0 string BIK Bink Video
+>3 regex =[a-z] rev.%s
+#>4 ulelong x size %d
+>20 ulelong x \b, %d
+>24 ulelong x \bx%d
+>8 ulelong x \b, %d frames
+>32 ulelong x at rate %d/
+>28 ulelong >1 \b%d
+>40 ulelong =0 \b, no audio
+>40 ulelong !0 \b, %d audio track
+>>40 ulelong !1 \bs
+# follow properties of the first audio track only
+>>48 uleshort x %dHz
+>>51 byte&0x20 0 mono
+>>51 byte&0x20 !0 stereo
+#>>51 byte&0x10 0 FFT
+#>>51 byte&0x10 !0 DCT
diff --git a/contrib/file/Magdir/audio b/contrib/file/Magdir/audio
index 3a9c176c8a4bf..04f80eb47ca35 100644
--- a/contrib/file/Magdir/audio
+++ b/contrib/file/Magdir/audio
@@ -286,43 +286,14 @@
# SGI SoundTrack <mpruett@sgi.com>
0 string _SGI_SoundTrack SGI SoundTrack project file
# ID3 version 2 tags <waschk@informatik.uni-rostock.de>
-0 string ID3 Audio file with ID3 version 2.
-# ??? Normally such a file is an MP3 file, but this will give false positives
-!:mime audio/mpeg
->3 ubyte <0xff \b%d
-#>4 ubyte <0xff \b%d tag
->2584 string fLaC \b, FLAC encoding
->>2588 byte&0x7f >0 \b, unknown version
->>2588 byte&0x7f 0 \b
-# some common bits/sample values
->>>2600 beshort&0x1f0 0x030 \b, 4 bit
->>>2600 beshort&0x1f0 0x050 \b, 6 bit
->>>2600 beshort&0x1f0 0x070 \b, 8 bit
->>>2600 beshort&0x1f0 0x0b0 \b, 12 bit
->>>2600 beshort&0x1f0 0x0f0 \b, 16 bit
->>>2600 beshort&0x1f0 0x170 \b, 24 bit
->>>2600 byte&0xe 0x0 \b, mono
->>>2600 byte&0xe 0x2 \b, stereo
->>>2600 byte&0xe 0x4 \b, 3 channels
->>>2600 byte&0xe 0x6 \b, 4 channels
->>>2600 byte&0xe 0x8 \b, 5 channels
->>>2600 byte&0xe 0xa \b, 6 channels
->>>2600 byte&0xe 0xc \b, 7 channels
->>>2600 byte&0xe 0xe \b, 8 channels
-# some common sample rates
->>>2597 belong&0xfffff0 0x0ac440 \b, 44.1 kHz
->>>2597 belong&0xfffff0 0x0bb800 \b, 48 kHz
->>>2597 belong&0xfffff0 0x07d000 \b, 32 kHz
->>>2597 belong&0xfffff0 0x056220 \b, 22.05 kHz
->>>2597 belong&0xfffff0 0x05dc00 \b, 24 kHz
->>>2597 belong&0xfffff0 0x03e800 \b, 16 kHz
->>>2597 belong&0xfffff0 0x02b110 \b, 11.025 kHz
->>>2597 belong&0xfffff0 0x02ee00 \b, 12 kHz
->>>2597 belong&0xfffff0 0x01f400 \b, 8 kHz
->>>2597 belong&0xfffff0 0x177000 \b, 96 kHz
->>>2597 belong&0xfffff0 0x0fa000 \b, 64 kHz
->>>2601 byte&0xf >0 \b, >4G samples
->2584 string !fLaC \b, MP3 encoding
+0 string ID3 Audio file with ID3 version 2
+>3 byte x \b.%d
+>4 byte x \b.%d
+>>5 byte &0x80 \b, unsynchronized frames
+>>5 byte &0x40 \b, extended header
+>>5 byte &0x20 \b, experimental
+>>5 byte &0x10 \b, footer present
+>(6.I) indirect x \b, contains:
# NSF (NES sound file) magic
0 string NESM\x1a NES Sound File
diff --git a/contrib/file/Magdir/cafebabe b/contrib/file/Magdir/cafebabe
index db385eaab819a..2168d95139d82 100644
--- a/contrib/file/Magdir/cafebabe
+++ b/contrib/file/Magdir/cafebabe
@@ -12,16 +12,18 @@
# (and use as a hack). Let's not use 18, because the Mach-O people
# might add another one or two as time goes by...
#
-0 beshort 0xcafe
->2 beshort 0xbabe
+0 belong 0xcafebabe
!:mime application/x-java-applet
->>2 belong >30 compiled Java class data,
->>>6 beshort x version %d.
->>>4 beshort x \b%d
->>4 belong 1 Mach-O fat file with 1 architecture
->>4 belong >1
->>>4 belong <20 Mach-O fat file with %ld architectures
->2 beshort 0xd00d JAR compressed with pack200,
+>4 belong >30 compiled Java class data,
+>>6 beshort x version %d.
+>>4 beshort x \b%d
+
+0 belong 0xcafebabe
+>4 belong 1 Mach-O fat file with 1 architecture
+>4 belong >1
+>>4 belong <20 Mach-O fat file with %ld architectures
+
+0 belong 0xcafed00d JAR compressed with pack200,
>>5 byte x version %d.
>>4 byte x \b%d
!:mime application/x-java-pack200
diff --git a/contrib/file/Magdir/compress b/contrib/file/Magdir/compress
index e2e4e03261cb3..5cbb1c87a911d 100644
--- a/contrib/file/Magdir/compress
+++ b/contrib/file/Magdir/compress
@@ -11,6 +11,7 @@
# standard unix compress
0 string \037\235 compress'd data
!:mime application/x-compress
+!:apple LZIVZIVU
>2 byte&0x80 >0 block compressed
>2 byte&0x1f x %d bits
@@ -76,6 +77,11 @@
!:mime application/x-bzip2
>3 byte >47 \b, block size = %c00k
+# lzip
+0 string LZIP lzip compressed data
+!:mime application/x-lzip
+>4 byte x \b, version: %d
+
# squeeze and crunch
# Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
0 beshort 0x76FF squeezed data,
diff --git a/contrib/file/Magdir/elf b/contrib/file/Magdir/elf
index 891e2ad4e6529..c605495429a85 100644
--- a/contrib/file/Magdir/elf
+++ b/contrib/file/Magdir/elf
@@ -143,11 +143,13 @@
>>18 leshort 91 picoJava,
>>18 leshort 92 OpenRISC,
>>18 leshort 93 ARC Cores Tangent-A5,
->>18 leshort 0x3426 OpenRISC (obsolete),
->>18 leshort 0x8472 OpenRISC (obsolete),
>>18 leshort 94 Tensilica Xtensa,
>>18 leshort 97 NatSemi 32k,
>>18 leshort 106 Analog Devices Blackfin,
+>>18 leshort 113 Altera Nios II,
+>>18 leshort 0xae META,
+>>18 leshort 0x3426 OpenRISC (obsolete),
+>>18 leshort 0x8472 OpenRISC (obsolete),
>>18 leshort 0x9026 Alpha (unofficial),
>>20 lelong 0 invalid version
>>20 lelong 1 version 1
diff --git a/contrib/file/Magdir/epoc b/contrib/file/Magdir/epoc
index 29bd94799b206..80229c47f38f5 100644
--- a/contrib/file/Magdir/epoc
+++ b/contrib/file/Magdir/epoc
@@ -1,10 +1,11 @@
-
#------------------------------------------------------------------------------
-# Epoc 32 : file(1) magic for Epoc Documents [psion/osaris
+# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
# Stefan Praszalowicz (hpicollo@worldnet.fr)
-#0 lelong 0x10000037 Epoc32
+# Useful information for improving this file can be found at:
+# http://software.frodo.looijaard.name/psiconv/formats/Index.html
+0 lelong 0x10000037
>4 lelong 0x1000006D
->>8 lelong 0x1000007F Word
->>8 lelong 0x10000088 Sheet
->>8 lelong 0x1000007D Sketch
->>8 lelong 0x10000085 TextEd
+>>8 lelong 0x1000007F Psion Word
+>>8 lelong 0x10000088 Psion Sheet
+>>8 lelong 0x1000007D Psion Sketch
+>>8 lelong 0x10000085 Psion TextEd
diff --git a/contrib/file/Magdir/filesystems b/contrib/file/Magdir/filesystems
index 36c2f72d3ca8b..b682df1d931ec 100644
--- a/contrib/file/Magdir/filesystems
+++ b/contrib/file/Magdir/filesystems
@@ -104,9 +104,32 @@
>>>346 string des\ Betriebssystems
>>>>366 string Betriebssystem\ nicht\ vorhanden \b, Microsoft Windows XP MBR (german)
>>>>>0x1B8 ulelong >0 \b, Serial 0x%-.4x
->0x145 string Default:\ F \b, FREE-DOS MBR
+#>0x145 string Default:\ F \b, FREE-DOS MBR
+#>0x14B string Default:\ F \b, FREE-DOS 1.0 MBR
+>0x145 search/7 Default:\ F \b, FREE-DOS MBR
+#>>313 string F0\ .\ .\ .
+#>>>322 string disk\ 1
+#>>>>382 string FAT3
>64 string no\ active\ partition\ found
>>96 string read\ error\ while\ reading\ drive \b, FREE-DOS Beta 0.9 MBR
+# Ranish Partition Manager http://www.ranish.com/part/
+>387 search/4 \0\ Error!\r
+>>378 search/7 Virus!
+>>>397 search/4 Booting\
+>>>>408 search/4 HD1/\0 \b, Ranish MBR (
+>>>>>416 string Writing\ changes... \b2.37
+>>>>>>438 ubyte x \b,0x%x dots
+>>>>>>440 ubyte >0 \b,virus check
+>>>>>>441 ubyte >0 \b,partition %c
+#2.38,2.42,2.44
+>>>>>416 string !Writing\ changes... \b
+>>>>>>418 ubyte 1 \bvirus check,
+>>>>>>419 ubyte x \b0x%x seconds
+>>>>>>420 ubyte&0x0F >0 \b,partition
+>>>>>>>420 ubyte&0x0F <5 \b %x
+>>>>>>>420 ubyte&0x0F 0Xf \b ask
+>>>>>420 ubyte x \b)
+#
>271 string Operating\ system\ loading
>>296 string error\r \b, SYSLINUX MBR (2.10)
# http://www.acronis.de/
@@ -124,18 +147,20 @@
>0x40 string SBML
# label with 11 characters of FAT 12 bit filesystem
>>43 string SMART\ BTMGR
->>>430 string SBMK\ Bad!\r
->>>>3 string SBM \b, Smart Boot Manager
->>>>>6 string >\0 \b, version %s
+>>>430 string SBMK\ Bad!\r \b, Smart Boot Manager
+# OEM-ID not always "SBM"
+#>>>>3 strings SBM
+>>>>6 string >\0 \b, version %s
>382 string XOSLLOADXCF \b, eXtended Operating System Loader
>6 string LILO \b, LInux i386 boot LOader
>>120 string LILO \b, version 22.3.4 SuSe
>>172 string LILO \b, version 22.5.8 Debian
-# updated by Joerg Jenderek
+# updated by Joerg Jenderek at Oct 2008
# variables according to grub-0.97/stage1/stage1.S or
# http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
# usual values are marked with comments to get only informations of strange GRUB loaders
->0 ulelong 0x009048EB
+>342 search/60 \0Geom\0
+#>0 ulelong x %x=0x009048EB , 0x2a9048EB 0
>>0x41 ubyte <2
>>>0x3E ubyte >2 \b; GRand Unified Bootloader
# 0x3 for 0.5.95,0.93,0.94,0.96 0x4 for 1.90
@@ -178,15 +203,14 @@
>3 string BCDL
>>498 string BCDL\ \ \ \ BIN \b, Bootable CD Loader (1.50Z)
# mbr partion table entries
-# OEM-ID not Microsoft,SYSLINUX,or MTOOLs
+# OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs
>3 string !MS
>>3 string !SYSLINUX
>>>3 string !MTOOL
+>>>>3 string !NEWLDR
+>>>>>5 string !DOS
# not FAT (32 bit)
->>>>82 string !FAT32
-#not IO.SYS
->>>>>472 string !IO\ \ \ \ \ \ SYS
->>>>>>480 string !IO\ \ \ \ \ \ SYS
+>>>>>>82 string !FAT32
#not Linux kernel
>>>>>>>514 string !HdrS
#not BeOS
@@ -272,6 +296,11 @@
>>>>>>>>>(1.b+11) ubyte 0xb
>>>>>>>>>>(1.b+12) ubyte 0x56
>>>>>>>>>>(1.b+13) ubyte 0xb4 \b, mkdosfs boot message display
+>214 string Please\ try\ to\ install\ FreeDOS\ \b, DOS Emulator boot message display
+#>>244 string from\ dosemu-freedos-*-bin.tgz\r
+#>>>170 string Sorry,\ could\ not\ load\ an\
+#>>>>195 string operating\ system.\r\n
+#
>103 string This\ is\ not\ a\ bootable\ disk.\
>>132 string Please\ insert\ a\ bootable\
>>>157 string floppy\ and\r\n
@@ -374,12 +403,22 @@
>430 string Datentr\204ger\ entfernen\xFF\r\n
>>454 string Medienfehler\xFF\r\n
>>>469 string Neustart:\ Taste\ dr\201cken\r \b, Microsoft Windows XP Bootloader (4.german)
->>>>368 ubyte&0xDF >0
->>>>>368 string x %-.5s
->>>>>>373 ubyte&0xDF >0
->>>>>>>373 string x \b%-.3s
->>>>>376 ubyte&0xDF >0
->>>>>>376 string x \b.%-.3s
+>>>>379 string \0
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+# variant
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+
#>3 string NTFS\ \ \ \
>389 string Fehler\ beim\ Lesen\
>>407 string des\ Datentr\204gers
@@ -567,12 +606,27 @@
>>>489 string Any\ key\ to\ retry \b, DR-DOS Bootloader
>>471 string Cannot\ load\ DOS\
>>487 string press\ key\ to\ retry \b, Open-DOS Bootloader
+#??
>444 string KERNEL\ \ SYS
>>314 string BOOT\ error! \b, FREE-DOS Bootloader
>499 string KERNEL\ \ SYS
>>305 string BOOT\ err!\0 \b, Free-DOS Bootloader
>449 string KERNEL\ \ SYS
>>319 string BOOT\ error! \b, FREE-DOS 0.5 Bootloader
+#
+>449 string Loading\ FreeDOS
+>>0x1AF ulelong >0 \b, FREE-DOS 0.95,1.0 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+#
+>331 string Error!.0 \b, FREE-DOS 1.0 bootloader
+#
>125 string Loading\ FreeDOS...\r
>>311 string BOOT\ error!\r \b, FREE-DOS bootloader
>>>441 ubyte&0xDF >0
@@ -706,13 +760,7 @@
#it also hangs with another message ("NF").
>>>>>492 string RENF \b, FAT (12 bit)
>>>>>495 string RENF \b, FAT (16 bit)
-# added by Joerg Jenderek
-# http://syslinux.zytor.com/iso.php
-0 ulelong 0x7c40eafa isolinux Loader
-# http://syslinux.zytor.com/pxe.php
-0 ulelong 0x007c05ea pxelinux Loader
-0 ulelong 0x60669c66 pxelinux Loader
-# loader end
+# x86 bootloader end
# updated by Joerg Jenderek at Sep 2007
>3 ubyte 0
#no active flag
@@ -732,6 +780,7 @@
# older drives may use Near JuMP instruction E9 xx xx
>0 lelong&0x009000EB 0x009000EB
>0 lelong&0x000000E9 0x000000E9
+# minimal short forward jump found 03cx??
# maximal short forward jump is 07fx
>1 ubyte <0xff \b, code offset 0x%x
# mtools-3.9.8/msdos.h
@@ -740,91 +789,92 @@
>>11 uleshort&0x000f x
>>>11 uleshort <32769
>>>>11 uleshort >31
->>>>>3 string >\0 \b, OEM-ID "%8.8s"
+>>>>>21 ubyte&0xf0 0xF0
+>>>>>>3 string >\0 \b, OEM-ID "%8.8s"
#http://mirror.href.com/thestarman/asm/debug/debug2.htm#IHC
->>>>>>8 string IHC \b cached by Windows 9M
->>>>>11 uleshort >512 \b, Bytes/sector %u
-#>>>>>11 uleshort =512 \b, Bytes/sector %u=512 (usual)
->>>>>11 uleshort <512 \b, Bytes/sector %u
->>>>>13 ubyte >1 \b, sectors/cluster %u
-#>>>>>13 ubyte =1 \b, sectors/cluster %u (usual on Floppies)
->>>>>14 uleshort >32 \b, reserved sectors %u
-#>>>>>14 uleshort =32 \b, reserved sectors %u (usual Fat32)
-#>>>>>14 uleshort >1 \b, reserved sectors %u
-#>>>>>14 uleshort =1 \b, reserved sectors %u (usual FAT12,FAT16)
->>>>>14 uleshort <1 \b, reserved sectors %u
->>>>>16 ubyte >2 \b, FATs %u
-#>>>>>16 ubyte =2 \b, FATs %u (usual)
->>>>>16 ubyte =1 \b, FAT %u
->>>>>16 ubyte >0
->>>>>17 uleshort >0 \b, root entries %u
-#>>>>>17 uleshort =0 \b, root entries %u=0 (usual Fat32)
->>>>>19 uleshort >0 \b, sectors %u (volumes <=32 MB)
-#>>>>>19 uleshort =0 \b, sectors %u=0 (usual Fat32)
->>>>>21 ubyte >0xF0 \b, Media descriptor 0x%x
-#>>>>>21 ubyte =0xF0 \b, Media descriptor 0x%x (usual floppy)
->>>>>21 ubyte <0xF0 \b, Media descriptor 0x%x
->>>>>22 uleshort >0 \b, sectors/FAT %u
-#>>>>>22 uleshort =0 \b, sectors/FAT %u=0 (usual Fat32)
->>>>>26 ubyte >2 \b, heads %u
-#>>>>>26 ubyte =2 \b, heads %u (usual floppy)
->>>>>26 ubyte =1 \b, heads %u
+>>>>>>>8 string IHC \b cached by Windows 9M
+>>>>>>11 uleshort >512 \b, Bytes/sector %u
+#>>>>>>11 uleshort =512 \b, Bytes/sector %u=512 (usual)
+>>>>>>11 uleshort <512 \b, Bytes/sector %u
+>>>>>>13 ubyte >1 \b, sectors/cluster %u
+#>>>>>>13 ubyte =1 \b, sectors/cluster %u (usual on Floppies)
+>>>>>>14 uleshort >32 \b, reserved sectors %u
+#>>>>>>14 uleshort =32 \b, reserved sectors %u (usual Fat32)
+#>>>>>>14 uleshort >1 \b, reserved sectors %u
+#>>>>>>14 uleshort =1 \b, reserved sectors %u (usual FAT12,FAT16)
+>>>>>>14 uleshort <1 \b, reserved sectors %u
+>>>>>>16 ubyte >2 \b, FATs %u
+#>>>>>>16 ubyte =2 \b, FATs %u (usual)
+>>>>>>16 ubyte =1 \b, FAT %u
+>>>>>>16 ubyte >0
+>>>>>>17 uleshort >0 \b, root entries %u
+#>>>>>>17 uleshort =0 \b, root entries %u=0 (usual Fat32)
+>>>>>>19 uleshort >0 \b, sectors %u (volumes <=32 MB)
+#>>>>>>19 uleshort =0 \b, sectors %u=0 (usual Fat32)
+>>>>>>21 ubyte >0xF0 \b, Media descriptor 0x%x
+#>>>>>>21 ubyte =0xF0 \b, Media descriptor 0x%x (usual floppy)
+>>>>>>21 ubyte <0xF0 \b, Media descriptor 0x%x
+>>>>>>22 uleshort >0 \b, sectors/FAT %u
+#>>>>>>22 uleshort =0 \b, sectors/FAT %u=0 (usual Fat32)
+>>>>>>26 ubyte >2 \b, heads %u
+#>>>>>>26 ubyte =2 \b, heads %u (usual floppy)
+>>>>>>26 ubyte =1 \b, heads %u
#skip for Digital Research DOS (version 3.41) 1440 kB Bootdisk
->>>>>38 ubyte !0x70
->>>>>>28 ulelong >0 \b, hidden sectors %u
-#>>>>>>28 ulelong =0 \b, hidden sectors %u (usual floppy)
->>>>>>32 ulelong >0 \b, sectors %u (volumes > 32 MB)
-#>>>>>>32 ulelong =0 \b, sectors %u (volumes > 32 MB)
+>>>>>>38 ubyte !0x70
+>>>>>>>28 ulelong >0 \b, hidden sectors %u
+#>>>>>>>28 ulelong =0 \b, hidden sectors %u (usual floppy)
+>>>>>>>32 ulelong >0 \b, sectors %u (volumes > 32 MB)
+#>>>>>>>32 ulelong =0 \b, sectors %u (volumes > 32 MB)
# FAT<32 specific
->>>>>82 string !FAT32
-#>>>>>>36 ubyte 0x80 \b, physical drive 0x%x=0x80 (usual harddisk)
-#>>>>>>36 ubyte 0 \b, physical drive 0x%x=0 (usual floppy)
->>>>>>36 ubyte !0x80
->>>>>>>36 ubyte !0 \b, physical drive 0x%x
->>>>>>37 ubyte >0 \b, reserved 0x%x
-#>>>>>>37 ubyte =0 \b, reserved 0x%x
->>>>>>38 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
->>>>>>38 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
->>>>>>38 ubyte =0x29
->>>>>>>39 ulelong x \b, serial number 0x%x
->>>>>>>43 string <NO\ NAME \b, label: "%11.11s"
->>>>>>>43 string >NO\ NAME \b, label: "%11.11s"
->>>>>>>43 string =NO\ NAME \b, unlabeled
->>>>>>54 string FAT \b, FAT
->>>>>>>54 string FAT12 \b (12 bit)
->>>>>>>54 string FAT16 \b (16 bit)
+>>>>>>82 string !FAT32
+#>>>>>>>36 ubyte 0x80 \b, physical drive 0x%x=0x80 (usual harddisk)
+#>>>>>>>36 ubyte 0 \b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>36 ubyte !0x80
+>>>>>>>>36 ubyte !0 \b, physical drive 0x%x
+>>>>>>>37 ubyte >0 \b, reserved 0x%x
+#>>>>>>>37 ubyte =0 \b, reserved 0x%x
+>>>>>>>38 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38 ubyte =0x29
+>>>>>>>>39 ulelong x \b, serial number 0x%x
+>>>>>>>>43 string <NO\ NAME \b, label: "%11.11s"
+>>>>>>>>43 string >NO\ NAME \b, label: "%11.11s"
+>>>>>>>>43 string =NO\ NAME \b, unlabeled
+>>>>>>>54 string FAT \b, FAT
+>>>>>>>>54 string FAT12 \b (12 bit)
+>>>>>>>>54 string FAT16 \b (16 bit)
# FAT32 specific
->>>>>82 string FAT32 \b, FAT (32 bit)
->>>>>>36 ulelong x \b, sectors/FAT %u
->>>>>>40 uleshort >0 \b, extension flags %u
-#>>>>>>40 uleshort =0 \b, extension flags %u
->>>>>>42 uleshort >0 \b, fsVersion %u
-#>>>>>>42 uleshort =0 \b, fsVersion %u (usual)
->>>>>>44 ulelong >2 \b, rootdir cluster %u
-#>>>>>>44 ulelong =2 \b, rootdir cluster %u
-#>>>>>>44 ulelong =1 \b, rootdir cluster %u
->>>>>>48 uleshort >1 \b, infoSector %u
-#>>>>>>48 uleshort =1 \b, infoSector %u (usual)
->>>>>>48 uleshort <1 \b, infoSector %u
->>>>>>50 uleshort >6 \b, Backup boot sector %u
-#>>>>>>50 uleshort =6 \b, Backup boot sector %u (usual)
->>>>>>50 uleshort <6 \b, Backup boot sector %u
->>>>>>54 ulelong >0 \b, reserved1 0x%x
->>>>>>58 ulelong >0 \b, reserved2 0x%x
->>>>>>62 ulelong >0 \b, reserved3 0x%x
+>>>>>>82 string FAT32 \b, FAT (32 bit)
+>>>>>>>36 ulelong x \b, sectors/FAT %u
+>>>>>>>40 uleshort >0 \b, extension flags %u
+#>>>>>>>40 uleshort =0 \b, extension flags %u
+>>>>>>>42 uleshort >0 \b, fsVersion %u
+#>>>>>>>42 uleshort =0 \b, fsVersion %u (usual)
+>>>>>>>44 ulelong >2 \b, rootdir cluster %u
+#>>>>>>>44 ulelong =2 \b, rootdir cluster %u
+#>>>>>>>44 ulelong =1 \b, rootdir cluster %u
+>>>>>>>48 uleshort >1 \b, infoSector %u
+#>>>>>>>48 uleshort =1 \b, infoSector %u (usual)
+>>>>>>>48 uleshort <1 \b, infoSector %u
+>>>>>>>50 uleshort >6 \b, Backup boot sector %u
+#>>>>>>>50 uleshort =6 \b, Backup boot sector %u (usual)
+>>>>>>>50 uleshort <6 \b, Backup boot sector %u
+>>>>>>>54 ulelong >0 \b, reserved1 0x%x
+>>>>>>>58 ulelong >0 \b, reserved2 0x%x
+>>>>>>>62 ulelong >0 \b, reserved3 0x%x
# same structure as FAT1X
->>>>>>64 ubyte >0x80 \b, physical drive 0x%x
-#>>>>>>64 ubyte =0x80 \b, physical drive 0x%x=80 (usual harddisk)
->>>>>>64 ubyte&0x7F >0 \b, physical drive 0x%x
-#>>>>>>64 ubyte =0 \b, physical drive 0x%x=0 (usual floppy)
->>>>>>65 ubyte >0 \b, reserved 0x%x
->>>>>>66 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
->>>>>>66 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
->>>>>>66 ubyte =0x29
->>>>>>>67 ulelong x \b, serial number 0x%x
->>>>>>>71 string <NO\ NAME \b, label: "%11.11s"
->>>>>>71 string >NO\ NAME \b, label: "%11.11s"
->>>>>>71 string =NO\ NAME \b, unlabeled
+>>>>>>>64 ubyte >0x80 \b, physical drive 0x%x
+#>>>>>>>64 ubyte =0x80 \b, physical drive 0x%x=80 (usual harddisk)
+>>>>>>>64 ubyte&0x7F >0 \b, physical drive 0x%x
+#>>>>>>>64 ubyte =0 \b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>65 ubyte >0 \b, reserved 0x%x
+>>>>>>>66 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66 ubyte =0x29
+>>>>>>>>67 ulelong x \b, serial number 0x%x
+>>>>>>>>71 string <NO\ NAME \b, label: "%11.11s"
+>>>>>>>71 string >NO\ NAME \b, label: "%11.11s"
+>>>>>>>71 string =NO\ NAME \b, unlabeled
### FATs end
>0x200 lelong 0x82564557 \b, BSD disklabel
# FATX
@@ -854,6 +904,13 @@
0x18b string OS/2 OS/2 Boot Manager
+# updated by Joerg Jenderek at Oct 2008!!
+# http://syslinux.zytor.com/iso.php
+0 ulelong 0x7c40eafa isolinux Loader
+# http://syslinux.zytor.com/pxe.php
+0 ulelong 0x007c05ea pxelinux Loader
+0 ulelong 0x60669c66 pxelinux Loader
+
# added by Joerg Jenderek
# In the second sector (+0x200) are variables according to grub-0.97/stage2/asm.S or
# grub-1.94/kern/i386/pc/startup.S
@@ -1324,6 +1381,14 @@
>0x10024 belong x (blocksize %d,
>0x10060 string >\0 lockproto %s)
+# BTRFS
+0x10040 string _BHRfS_M BTRFS Filesystem
+>0x1012b string >\0 (label "%s",
+>0x10090 lelong x sectorsize %d,
+>0x10094 lelong x nodesize %d,
+>0x10098 lelong x leafsize %d)
+
+
# dvdisaster's .ecc
# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
0 string *dvdisaster* dvdisaster error correction file
diff --git a/contrib/file/Magdir/graphviz b/contrib/file/Magdir/graphviz
index cf47f4e28c84f..831a00259506d 100644
--- a/contrib/file/Magdir/graphviz
+++ b/contrib/file/Magdir/graphviz
@@ -1,7 +1,10 @@
-
#------------------------------------------------------------------------------
# graphviz: file(1) magic for http://www.graphviz.org/
-0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]*.*\\{ graphviz graph text
-!:mime text/vnd.graphviz
-0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]*.*\\{ graphviz digraph text
-!:mime text/vnd.graphviz
+
+# FIXME: These patterns match too generally. For example, the first
+# line matches a LaTeX file containing the word "graph" (with a {
+# following later) and the second line matches this file.
+#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
+#!:mime text/vnd.graphviz
+#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
+#!:mime text/vnd.graphviz
diff --git a/contrib/file/Magdir/images b/contrib/file/Magdir/images
index cccc70be50356..7eacf862fa04a 100644
--- a/contrib/file/Magdir/images
+++ b/contrib/file/Magdir/images
@@ -110,6 +110,7 @@
# GIF
0 string GIF8 GIF image data
!:mime image/gif
+!:apple 8BIMGIFf
>4 string 7a \b, version 8%s,
>4 string 9a \b, version 8%s,
>6 leshort >0 %hd x
@@ -600,3 +601,7 @@
# Wavelet Scalar Quantization format used in gray-scale fingerprint images
# From Tano M Fotang <mfotang@quanteq.com>
0 string \xff\xa0\xff\xa8\x00 Wavelet Scalar Quantization image data
+
+# JPEG 2000 Code Stream Bitmap
+# From Petr Splichal <psplicha@redhat.com>
+0 string \xFF\x4F\xFF\x51\x00 JPEG-2000 Code Stream Bitmap data
diff --git a/contrib/file/Magdir/jpeg b/contrib/file/Magdir/jpeg
index d728de6e6f7f5..4470be4adf9a5 100644
--- a/contrib/file/Magdir/jpeg
+++ b/contrib/file/Magdir/jpeg
@@ -10,6 +10,7 @@
#
0 beshort 0xffd8 JPEG image data
!:mime image/jpeg
+!:apple 8BIMJPEG
!:strength +1
>6 string JFIF \b, JFIF standard
# The following added by Erik Rossen <rossen@freesurf.ch> 1999-09-06
diff --git a/contrib/file/Magdir/mach b/contrib/file/Magdir/mach
index cced3a5873238..e53be0737d92f 100644
--- a/contrib/file/Magdir/mach
+++ b/contrib/file/Magdir/mach
@@ -4,7 +4,7 @@
# Java ByteCode, so they are both handled in the file "cafebabe".
# The "feedface" ones are handled herein.
#------------------------------------------------------------
-0 lelong&0xfeffffff 0xfeedface Mach-O
+0 lelong&0xfffffffe 0xfeedface Mach-O
>0 byte 0xcf 64-bit
>12 lelong 1 object
>12 lelong 2 executable
diff --git a/contrib/file/Magdir/macintosh b/contrib/file/Magdir/macintosh
index 77187a398d67e..ca665ded8cb87 100644
--- a/contrib/file/Magdir/macintosh
+++ b/contrib/file/Magdir/macintosh
@@ -11,6 +11,8 @@
# Stuffit archives are the de facto standard of compression for Macintosh
# files obtained from most archives. (franklsm@tuns.ca)
0 string SIT! StuffIt Archive (data)
+!:mime application/x-stuffit
+!:apple SIT!SIT!
>2 string x : %s
0 string SITD StuffIt Deluxe (data)
>2 string x : %s
@@ -20,6 +22,7 @@
# Newer StuffIt archives (grant@netbsd.org)
0 string StuffIt StuffIt Archive
!:mime application/x-stuffit
+!:apple SIT!SIT!
#>162 string >0 : %s
# Macintosh Applications and Installation binaries (franklsm@tuns.ca)
diff --git a/contrib/file/Magdir/msdos b/contrib/file/Magdir/msdos
index cdd7c931c9858..a44533b43735f 100644
--- a/contrib/file/Magdir/msdos
+++ b/contrib/file/Magdir/msdos
@@ -4,15 +4,15 @@
#
# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
-# updated by Joerg Jenderek
+# updated by Joerg Jenderek at Oct 2008
0 string @
->1 string/cB \ echo\ off MS-DOS batch file text
+>1 string/cB \ echo\ off DOS batch file text
!:mime text/x-msdos-batch
->1 string/cB echo\ off MS-DOS batch file text
+>1 string/cB echo\ off DOS batch file text
!:mime text/x-msdos-batch
->1 string/cB rem\ MS-DOS batch file text
+>1 string/cB rem\ DOS batch file text
!:mime text/x-msdos-batch
->1 string/cB set\ MS-DOS batch file text
+>1 string/cB set\ DOS batch file text
!:mime text/x-msdos-batch
@@ -285,8 +285,9 @@
# Uncommenting only the first two lines will cover about 2/3 of COM files,
# but it isn't feasible to match all COM files since there must be at least
# two dozen different one-byte "magics".
-#0 byte 0xe9 DOS executable (COM)
-#>0x1FE leshort 0xAA55 \b, boot code
+# test too generic ?
+0 byte 0xe9 DOS executable (COM)
+>0x1FE leshort 0xAA55 \b, boot code
>6 string SFX\ of\ LHarc (%s)
0 belong 0xffffffff DOS executable (device driver)
#CMD640X2.SYS
@@ -309,25 +310,38 @@
>>77 string >\x40
>>>77 string <\x5B
>>>>77 string x \b, name: %.8s
-#0 byte 0x8c DOS executable (COM)
-# 0xeb conflicts with "sequent" magic
-#0 byte 0xeb DOS executable (COM)
-#>0x1FE leshort 0xAA55 \b, boot code
-#>85 string UPX \b, UPX compressed
-#>4 string \ $ARX \b, ARX self-extracting archive
-#>4 string \ $LHarc \b, LHarc self-extracting archive
-#>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive
+# test too generic ?
+0 byte 0x8c DOS executable (COM)
+# updated by Joerg Jenderek at Oct 2008
+0 ulelong 0xffff10eb DR-DOS executable (COM)
+# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
+0 ubeshort&0xeb8d >0xeb00
+# DR-DOS STACKER.COM SCREATE.SYS missed
+>0 byte 0xeb DOS executable (COM)
+>>0x1FE leshort 0xAA55 \b, boot code
+>>85 string UPX \b, UPX compressed
+>>4 string \ $ARX \b, ARX self-extracting archive
+>>4 string \ $LHarc \b, LHarc self-extracting archive
+>>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive
+# updated by Joerg Jenderek at Oct 2008
#0 byte 0xb8 COM executable
+0 uleshort&0x80ff 0x00b8
# modified by Joerg Jenderek
->1 lelong !0x21cd4cff for DOS
+>1 lelong !0x21cd4cff COM executable for DOS
# http://syslinux.zytor.com/comboot.php
# (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
# start with assembler instructions mov eax,21cd4cffh
->1 lelong 0x21cd4cff (32-bit COMBOOT)
+0 uleshort&0xc0ff 0xc0b8
+>1 lelong 0x21cd4cff COM executable (32-bit COMBOOT)
0 string \x81\xfc
>4 string \x77\x02\xcd\x20\xb9
>>36 string UPX! FREE-DOS executable (COM), UPX compressed
252 string Must\ have\ DOS\ version DR-DOS executable (COM)
+# added by Joerg Jenderek at Oct 2008
+# GRR search is not working
+#34 search/2 UPX! FREE-DOS executable (COM), UPX compressed
+34 string UPX! FREE-DOS executable (COM), UPX compressed
+35 string UPX! FREE-DOS executable (COM), UPX compressed
# GRR search is not working
#2 search/28 \xcd\x21 COM executable for MS-DOS
#WHICHFAT.cOM
@@ -564,6 +578,7 @@
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string MSCF\0\0\0\0 Microsoft Cabinet archive data
+!:mime application/vnd.ms-cab-compressed
>8 lelong x \b, %u bytes
>28 leshort 1 \b, 1 file
>28 leshort >1 \b, %u files
diff --git a/contrib/file/Magdir/perl b/contrib/file/Magdir/perl
index 73fb88ba4b286..4c22ecc9b5b00 100644
--- a/contrib/file/Magdir/perl
+++ b/contrib/file/Magdir/perl
@@ -23,7 +23,6 @@
# by Dmitry V. Levin and Alexey Tourbin
# check the first line
0 search/1 package
-0 regex \^package[\ \t]+[A-Za-z_]
>0 regex \^package[\ \t]+[0-9A-Za-z_:]+\ *; Perl5 module source text
# not 'p', check other lines
0 search/1 !p
diff --git a/contrib/file/Magdir/printer b/contrib/file/Magdir/printer
index 15a175821308f..e25d03e977f5c 100644
--- a/contrib/file/Magdir/printer
+++ b/contrib/file/Magdir/printer
@@ -6,6 +6,7 @@
# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
0 string %! PostScript document text
!:mime application/postscript
+!:apple ASPSTEXT
>2 string PS-Adobe- conforming
>>11 string >\0 DSC level %.3s
>>>15 string EPS \b, type %s
@@ -16,6 +17,7 @@
# Some PCs have the annoying habit of adding a ^D as a document separator
0 string \004%! PostScript document text
!:mime application/postscript
+!:apple ASPSTEXT
>3 string PS-Adobe- conforming
>>12 string >\0 DSC level %.3s
>>>16 string EPS \b, type %s
diff --git a/contrib/file/Magdir/timezone b/contrib/file/Magdir/timezone
index 40e7558f8de10..c9ce02582ce0b 100644
--- a/contrib/file/Magdir/timezone
+++ b/contrib/file/Magdir/timezone
@@ -6,6 +6,23 @@
# this should work on Linux, SunOS, and maybe others
# Added new official magic number for recent versions of the Olson code
0 string TZif timezone data
+>4 byte 0 \b, old version
+>4 byte >0 \b, version %c
+>20 belong 0 \b, no gmt time flags
+>20 belong 1 \b, 1 gmt time flag
+>20 belong >1 \b, %d gmt time flags
+>24 belong 0 \b, no std time flags
+>20 belong 1 \b, 1 std time flag
+>24 belong >1 \b, %d std time flags
+>28 belong 0 \b, no leap seconds
+>28 belong 1 \b, 1 leap second
+>28 belong >1 \b, %d leap seconds
+>32 belong 0 \b, no transition times
+>32 belong 1 \b, 1 transition time
+>32 belong >1 \b, %d transition times
+>36 belong 0 \b, no abbreviation chars
+>36 belong 1 \b, 1 abbreviation char
+>36 belong >1 \b, %d abbreviation chars
0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0 old timezone data
0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0 old timezone data
0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0 old timezone data
diff --git a/contrib/file/Magdir/wireless b/contrib/file/Magdir/wireless
new file mode 100644
index 0000000000000..aaae5a58fb679
--- /dev/null
+++ b/contrib/file/Magdir/wireless
@@ -0,0 +1,5 @@
+#------------------------------------------------------------------------------
+# wireless-regdb: file(1) magic for CRDA wireless-regdb file format
+#
+0 string RGDB CRDA wireless regulatory database file
+>4 belong 19 (Version 1)
diff --git a/contrib/file/Magdir/xwindows b/contrib/file/Magdir/xwindows
index 57f3b083f3b3e..cae45a780dab6 100644
--- a/contrib/file/Magdir/xwindows
+++ b/contrib/file/Magdir/xwindows
@@ -23,3 +23,12 @@
>24 long x %ldx
>28 long 1008 YUV422]
>28 long 1000 RGB24]
+
+# Xcursor data
+# X11 mouse cursor format defined in libXcursor, see
+# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
+# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
+0 string Xcur Xcursor data
+!:mime image/x-xcursor
+>10 leshort x version %hd
+>>8 leshort x \b.%hd
diff --git a/contrib/file/Makefile.am b/contrib/file/Makefile.am
index 104dd8fa08158..5849f40e2f00d 100644
--- a/contrib/file/Makefile.am
+++ b/contrib/file/Makefile.am
@@ -1,5 +1,5 @@
#
-# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
+# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
#
MAGIC_FRAGMENT_BASE = Magdir
MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@@ -209,6 +209,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
$(MAGIC_FRAGMENT_DIR)/warc \
$(MAGIC_FRAGMENT_DIR)/weak \
$(MAGIC_FRAGMENT_DIR)/windows \
+$(MAGIC_FRAGMENT_DIR)/wireless \
$(MAGIC_FRAGMENT_DIR)/wordprocessors \
$(MAGIC_FRAGMENT_DIR)/xdelta \
$(MAGIC_FRAGMENT_DIR)/xenix \
diff --git a/contrib/file/Makefile.am-src b/contrib/file/Makefile.am-src
new file mode 100644
index 0000000000000..db78d96e49d45
--- /dev/null
+++ b/contrib/file/Makefile.am-src
@@ -0,0 +1,18 @@
+MAGIC = $(pkgdatadir)/magic
+lib_LTLIBRARIES = libmagic.la
+include_HEADERS = magic.h
+
+bin_PROGRAMS = file
+
+AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
+AM_CFLAGS = @WARNINGS@
+
+libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
+ encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
+ funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
+ file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
+libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
+libmagic_la_LIBADD = $(LTLIBOBJS)
+
+file_SOURCES = file.c
+file_LDADD = libmagic.la
diff --git a/contrib/file/Makefile.in b/contrib/file/Makefile.in
index d52e20de8514c..b6fabbef0a872 100644
--- a/contrib/file/Makefile.in
+++ b/contrib/file/Makefile.in
@@ -163,7 +163,7 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
#
-# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
+# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
#
MAGIC_FRAGMENT_BASE = Magdir
MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@@ -371,6 +371,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
$(MAGIC_FRAGMENT_DIR)/warc \
$(MAGIC_FRAGMENT_DIR)/weak \
$(MAGIC_FRAGMENT_DIR)/windows \
+$(MAGIC_FRAGMENT_DIR)/wireless \
$(MAGIC_FRAGMENT_DIR)/wordprocessors \
$(MAGIC_FRAGMENT_DIR)/xdelta \
$(MAGIC_FRAGMENT_DIR)/xenix \
diff --git a/contrib/file/README b/contrib/file/README
index 30b9fa832043b..25c3abca34a7f 100644
--- a/contrib/file/README
+++ b/contrib/file/README
@@ -1,5 +1,5 @@
** README for file(1) Command **
-@(#) $File: README,v 1.40 2008/04/23 03:45:20 christos Exp $
+@(#) $File: README,v 1.41 2008/12/02 16:34:46 christos Exp $
E-mail: christos@astron.com
Mailing List: file@mx.gw.com
@@ -48,33 +48,35 @@ in magic(5) format please, to the maintainer, Christos Zoulas.
COPYING - read this first.
README - read this second (you are currently reading this file).
-PORTING - read this only if the program won't compile.
-Makefile - read this next, adapt it as needed (particularly
- the location of the old existing file command and
- the man page layouts), type "make" to compile,
- "make try" to try it out against your old version.
- Expect some diffs, particularly since your original
- file(1) may not grok the embedded-space ("\ ") in
- the current magic file, or may even not use the
- magic file.
-apprentice.c - parses /etc/magic to learn magic
-ascmagic.c - third & last set of tests, based on hardwired assumptions.
-core - not included in distribution due to mailer limitations.
-debug.c - includes -c printout routine
-file.1 - man page for the command
-magic.4 - man page for the magic file, courtesy Guy Harris.
+INSTALL - read on how to install
+
+src/apprentice.c - parses /etc/magic to learn magic
+src/apptype.c - used for OS/2 specific application type magic
+src/asprintf.c - replacement for OS's that don't have it.
+src/ascmagic.c - third & last set of tests, based on hardwired assumptions.
+src/cdf.c - parser for Microsoft Compound Document Files
+src/cdf_time.c - time converter for CDF.
+src/compress.c - handles decompressing files to look inside.
+src/encoding.c - handles unicode encodings
+src/file.c - the main program
+src/file.h - header file
+src/fsmagic.c - first set of tests the program runs, based on filesystem info
+src/funcs.c - utilility functions
+src/getopt_long.c - used for OS/2 specific application type magic
+src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
+src/names.h - header file for ascmagic.c
+src/magic.c - the libmagic api
+src/print.c - print results, errors, warnings.
+src/readcdf.c - CDF wrapper.
+src/readelf.[ch] - Stand-alone elf parsing code.
+src/softmagic.c - 2nd set of tests, based on /etc/magic
+src/strlcat.c - used for OS/2 specific application type magic
+src/strlcpy.c - used for OS/2 specific application type magic
+src/vasprintf.c - used for OS/2 specific application type magic
+doc/file.1 - man page for the command
+doc/magic.4 - man page for the magic file, courtesy Guy Harris.
Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile.
-file.c - main program
-file.h - header file
-fsmagic.c - first set of tests the program runs, based on filesystem info
-is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
-magdir - directory of /etc/magic pieces
- magdir/Makefile - ADJUST THIS FOR YOUR CONFIGURATION
-names.h - header file for ascmagic.c
-softmagic.c - 2nd set of tests, based on /etc/magic
-readelf.[ch] - Stand-alone elf parsing code.
-compress.c - on-the-fly decompression.
-print.c - print results, errors, warnings.
+Magdir - directory of /etc/magic pieces
------------------------------------------------------------------------------
diff --git a/contrib/file/TODO b/contrib/file/TODO
index 50fd87fbd2841..4e3e17ee73834 100644
--- a/contrib/file/TODO
+++ b/contrib/file/TODO
@@ -1,3 +1,9 @@
+Fix output so that tests for MIME and APPLE flags are not needed all
+over the place, and actual output is only done in one place. This
+needs a design. Suggestion: push possible outputs on to a list, then
+pick the last-pushed (most specific, one hopes) value at the end, or
+use a default if the list is empty.
+
Continue to squash all magic bugs. See Debian BTS for a good source.
Store arbitrarily long strings, for example for %s patterns, so that
diff --git a/contrib/file/apprentice.c b/contrib/file/apprentice.c
index 714c86c61d287..ba7f783e913c6 100644
--- a/contrib/file/apprentice.c
+++ b/contrib/file/apprentice.c
@@ -30,6 +30,11 @@
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: apprentice.c,v 1.147 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
#include "patchlevel.h"
#include <stdlib.h>
@@ -40,18 +45,11 @@
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/param.h>
#ifdef QUICK
#include <sys/mman.h>
#endif
-#include <sys/types.h>
#include <dirent.h>
-#ifndef lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.140 2008/07/20 04:02:15 christos Exp $")
-#endif /* lint */
-
#define EATAB {while (isascii((unsigned char) *l) && \
isspace((unsigned char) *l)) ++l;}
#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
@@ -106,7 +104,7 @@ private void bs1(struct magic *);
private uint16_t swap2(uint16_t);
private uint32_t swap4(uint32_t);
private uint64_t swap8(uint64_t);
-private void mkdbname(const char *, char **, int);
+private char *mkdbname(struct magic_set *, const char *, int);
private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
const char *);
private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
@@ -115,8 +113,8 @@ private int check_format_type(const char *, int);
private int check_format(struct magic_set *, struct magic *);
private int get_op(char);
private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
-private int parse_strength(struct magic_set *, struct magic_entry *,
- const char *);
+private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
+private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
private size_t maxmagic = 0;
@@ -131,6 +129,7 @@ private struct {
} bang[] = {
#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
DECLARE_FIELD(mime),
+ DECLARE_FIELD(apple),
DECLARE_FIELD(strength),
#undef DECLARE_FIELD
{ NULL, 0, NULL }
@@ -215,6 +214,9 @@ static const struct type_tbl_s {
{ XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
{ XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
{ XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
+ { XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
+ { XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
+ { XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE },
{ XX_NULL, FILE_INVALID, FILE_FMT_NONE },
# undef XX
# undef XX_NULL
@@ -590,7 +592,8 @@ set_test_type(struct magic *mstart, struct magic *m)
case FILE_SEARCH:
#ifndef COMPILE_ONLY
/* binary test if pattern is not text */
- if (file_looks_utf8(m->value.us, m->vallen, NULL, NULL) <= 0)
+ if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
+ NULL) <= 0)
mstart->flag |= BINTEST;
#endif
break;
@@ -706,6 +709,8 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
(void)fprintf(stderr, "%s\n", usg_hdr);
/* load directory or file */
+ /* FIXME: Read file names and sort them to prevent
+ non-determinism. See Debian bug #488562. */
if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
dir = opendir(fn);
if (dir) {
@@ -870,6 +875,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
case FILE_REGEX:
case FILE_SEARCH:
case FILE_DEFAULT:
+ case FILE_INDIRECT:
break;
default:
if (ms->flags & MAGIC_CHECK)
@@ -1186,6 +1192,12 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
case 'G':
m->in_type = FILE_BEDOUBLE;
break;
+ case 'i':
+ m->in_type = FILE_LEID3;
+ break;
+ case 'I':
+ m->in_type = FILE_BEID3;
+ break;
default:
if (ms->flags & MAGIC_CHECK)
file_magwarn(ms,
@@ -1475,6 +1487,38 @@ out:
}
/*
+ * Parse an Apple CREATOR/TYPE annotation from magic file and put it into magic[index - 1]
+ */
+private int
+parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
+{
+ size_t i;
+ const char *l = line;
+ struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
+
+ if (m->apple[0] != '\0') {
+ file_magwarn(ms, "Current entry already has a APPLE type `%.8s',"
+ " new type `%s'", m->mimetype, l);
+ return -1;
+ }
+
+ EATAB;
+ for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
+ || strchr("-+/.", *l)) && i < sizeof(m->apple); m->apple[i++] = *l++)
+ continue;
+ if (i == sizeof(m->apple) && *l) {
+ if (ms->flags & MAGIC_CHECK)
+ file_magwarn(ms, "APPLE type `%s' truncated %zu",
+ line, i);
+ }
+
+ if (i > 0)
+ return 0;
+ else
+ return -1;
+}
+
+/*
* parse a MIME annotation line from magic file, put into magic[index - 1]
* if valid
*/
@@ -1492,10 +1536,8 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
}
EATAB;
- for (i = 0;
- *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
- || strchr("-+/.", *l)) && i < sizeof(m->mimetype);
- m->mimetype[i++] = *l++)
+ for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
+ || strchr("-+/.", *l)) && i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
continue;
if (i == sizeof(m->mimetype)) {
m->desc[sizeof(m->mimetype) - 1] = '\0';
@@ -2016,7 +2058,7 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
char *dbname = NULL;
void *mm = NULL;
- mkdbname(fn, &dbname, 0);
+ dbname = mkdbname(ms, fn, 0);
if (dbname == NULL)
goto error2;
@@ -2113,7 +2155,7 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp,
char *dbname;
int rv = -1;
- mkdbname(fn, &dbname, 1);
+ dbname = mkdbname(ms, fn, 1);
if (dbname == NULL)
goto out;
@@ -2151,24 +2193,45 @@ private const char ext[] = ".mgc";
/*
* make a dbname
*/
-private void
-mkdbname(const char *fn, char **buf, int strip)
+private char *
+mkdbname(struct magic_set *ms, const char *fn, int strip)
{
- const char *p;
+ const char *p, *q;
+ char *buf;
+
if (strip) {
if ((p = strrchr(fn, '/')) != NULL)
fn = ++p;
}
- if ((p = strstr(fn, ext)) != NULL && p[sizeof(ext) - 1] == '\0')
- *buf = strdup(fn);
- else
- (void)asprintf(buf, "%s%s", fn, ext);
+ for (q = fn; *q; q++)
+ continue;
+ /* Look for .mgc */
+ for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
+ if (*p != *q)
+ break;
- if (buf && *buf && strlen(*buf) > MAXPATHLEN) {
- free(*buf);
- *buf = NULL;
+ /* Did not find .mgc, restore q */
+ if (p >= ext)
+ while (*q)
+ q++;
+
+ q++;
+ /* Compatibility with old code that looked in .mime */
+ if (ms->flags & MAGIC_MIME) {
+ asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext);
+ if (access(buf, R_OK) != -1) {
+ ms->flags &= MAGIC_MIME_TYPE;
+ return buf;
+ }
+ free(buf);
}
+ asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext);
+
+ /* Compatibility with old code that looked in .mime */
+ if (strstr(p, ".mime") != NULL)
+ ms->flags &= MAGIC_MIME_TYPE;
+ return buf;
}
/*
diff --git a/contrib/file/apptype.c b/contrib/file/apptype.c
index 42cee40e92b98..6f171ad1e0a49 100644
--- a/contrib/file/apptype.c
+++ b/contrib/file/apptype.c
@@ -26,15 +26,13 @@
#include "file.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
#ifndef lint
-FILE_RCSID("@(#)$File: apptype.c,v 1.7 2007/01/12 17:38:27 christos Exp $")
+FILE_RCSID("@(#)$File: apptype.c,v 1.10 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
+#include <stdlib.h>
+#include <string.h>
+
#ifdef __EMX__
#include <io.h>
#define INCL_DOSSESMGR
diff --git a/contrib/file/ascmagic.c b/contrib/file/ascmagic.c
index c374e02b4b83d..9236fb4a27a33 100644
--- a/contrib/file/ascmagic.c
+++ b/contrib/file/ascmagic.c
@@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -31,14 +31,15 @@
*
* Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
* to handle character codes other than ASCII on a unified basis.
- *
- * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
- * international characters, now subsumed into this file.
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
-#include <stdio.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>
@@ -48,39 +49,71 @@
#endif
#include "names.h"
-#ifndef lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.64 2008/07/16 18:00:57 christos Exp $")
-#endif /* lint */
-
#define MAXLINELEN 300 /* longest sane line length */
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
|| (x) == 0x85 || (x) == '\f')
-private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
- size_t *);
-private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
-private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
private int ascmatch(const unsigned char *, const unichar *, size_t);
private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
+private size_t trim_nuls(const unsigned char *, size_t);
+/*
+ * Undo the NUL-termination kindly provided by process()
+ * but leave at least one byte to look at
+ */
+private size_t
+trim_nuls(const unsigned char *buf, size_t nbytes)
+{
+ while (nbytes > 1 && buf[nbytes - 1] == '\0')
+ nbytes--;
+
+ return nbytes;
+}
protected int
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
{
- size_t i;
- unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
- unichar *ubuf = NULL;
- size_t ulen, mlen;
- const struct names *p;
- int rv = -1;
- int mime = ms->flags & MAGIC_MIME;
+ unichar *ubuf = NULL;
+ size_t ulen;
+ int rv = 1;
const char *code = NULL;
const char *code_mime = NULL;
const char *type = NULL;
+
+ if (ms->flags & MAGIC_APPLE)
+ return 0;
+
+ nbytes = trim_nuls(buf, nbytes);
+
+ /* If file doesn't look like any sort of text, give up. */
+ if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
+ &type) == 0) {
+ rv = 0;
+ goto done;
+ }
+
+ rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code,
+ type);
+
+ done:
+ if (ubuf)
+ free(ubuf);
+
+ return rv;
+}
+
+protected int
+file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
+ size_t nbytes, unichar *ubuf, size_t ulen, const char *code,
+ const char *type)
+{
+ unsigned char *utf8_buf = NULL, *utf8_end;
+ size_t mlen, i;
+ const struct names *p;
+ int rv = -1;
+ int mime = ms->flags & MAGIC_MIME;
+
const char *subtype = NULL;
const char *subtype_mime = NULL;
@@ -96,82 +129,20 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
size_t last_line_end = (size_t)-1;
int has_long_lines = 0;
- /*
- * Undo the NUL-termination kindly provided by process()
- * but leave at least one byte to look at
- */
- while (nbytes > 1 && buf[nbytes - 1] == '\0')
- nbytes--;
-
- if ((nbuf = CAST(unsigned char *, calloc((size_t)1,
- (nbytes + 1) * sizeof(nbuf[0])))) == NULL)
- goto done;
- if ((ubuf = CAST(unichar *, calloc((size_t)1,
- (nbytes + 1) * sizeof(ubuf[0])))) == NULL)
- goto done;
+ if (ms->flags & MAGIC_APPLE)
+ return 0;
- /*
- * Then try to determine whether it's any character code we can
- * identify. Each of these tests, if it succeeds, will leave
- * the text converted into one-unichar-per-character Unicode in
- * ubuf, and the number of characters converted in ulen.
- */
- if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
- code = "ASCII";
- code_mime = "us-ascii";
- type = "text";
- } else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
- code = "UTF-8 Unicode (with BOM)";
- code_mime = "utf-8";
- type = "text";
- } else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
- code = "UTF-8 Unicode";
- code_mime = "utf-8";
- type = "text";
- } else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
- if (i == 1)
- code = "Little-endian UTF-16 Unicode";
- else
- code = "Big-endian UTF-16 Unicode";
-
- type = "character data";
- code_mime = "utf-16"; /* is this defined? */
- } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
- code = "ISO-8859";
- type = "text";
- code_mime = "iso-8859-1";
- } else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
- code = "Non-ISO extended-ASCII";
- type = "text";
- code_mime = "unknown";
- } else {
- from_ebcdic(buf, nbytes, nbuf);
-
- if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
- code = "EBCDIC";
- type = "character data";
- code_mime = "ebcdic";
- } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
- code = "International EBCDIC";
- type = "character data";
- code_mime = "ebcdic";
- } else {
- rv = 0;
- goto done; /* doesn't look like text at all */
- }
- }
+ nbytes = trim_nuls(buf, nbytes);
+ /* If we have fewer than 2 bytes, give up. */
if (nbytes <= 1) {
rv = 0;
goto done;
}
/* Convert ubuf to UTF-8 and try text soft magic */
- /* If original was ASCII or UTF-8, could use nbuf instead of
- re-converting. */
/* malloc size is a conservative overestimate; could be
- re-converting improved, or at least realloced after
- re-converting conversion. */
+ improved, or at least realloced after conversion. */
mlen = ulen * 6;
if ((utf8_buf = CAST(unsigned char *, malloc(mlen))) == NULL) {
file_oomem(ms, mlen);
@@ -179,10 +150,11 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
}
if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
goto done;
- if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
- rv = 1;
+ if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf),
+ TEXTTEST)) != 0)
goto done;
- }
+ else
+ rv = -1;
/* look for tokens from names.h - this is expensive! */
if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
@@ -255,41 +227,30 @@ subtype_identified:
if (seen_cr && nbytes < HOWMANY)
n_cr++;
+ if (strcmp(type, "binary") == 0) {
+ rv = 0;
+ goto done;
+ }
if (mime) {
- if (mime & MAGIC_MIME_TYPE) {
+ if ((mime & MAGIC_MIME_TYPE) != 0) {
if (subtype_mime) {
- if (file_printf(ms, subtype_mime) == -1)
+ if (file_printf(ms, "%s", subtype_mime) == -1)
goto done;
} else {
if (file_printf(ms, "text/plain") == -1)
goto done;
}
}
-
- if ((mime == 0 || mime == MAGIC_MIME) && code_mime) {
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, " charset=") == -1)
- goto done;
- if (file_printf(ms, code_mime) == -1)
- goto done;
- }
-
- if (mime == MAGIC_MIME_ENCODING)
- file_printf(ms, "binary");
} else {
- if (file_printf(ms, code) == -1)
+ if (file_printf(ms, "%s", code) == -1)
goto done;
if (subtype) {
- if (file_printf(ms, " ") == -1)
- goto done;
- if (file_printf(ms, subtype) == -1)
+ if (file_printf(ms, " %s", subtype) == -1)
goto done;
}
- if (file_printf(ms, " ") == -1)
- goto done;
- if (file_printf(ms, type) == -1)
+ if (file_printf(ms, " %s", type) == -1)
goto done;
if (has_long_lines)
@@ -305,7 +266,7 @@ subtype_identified:
if (file_printf(ms, ", with") == -1)
goto done;
- if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
+ if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
if (file_printf(ms, " no") == -1)
goto done;
} else {
@@ -348,10 +309,6 @@ subtype_identified:
}
rv = 1;
done:
- if (nbuf)
- free(nbuf);
- if (ubuf)
- free(ubuf);
if (utf8_buf)
free(utf8_buf);
@@ -375,144 +332,6 @@ ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
}
/*
- * This table reflects a particular philosophy about what constitutes
- * "text," and there is room for disagreement about it.
- *
- * Version 3.31 of the file command considered a file to be ASCII if
- * each of its characters was approved by either the isascii() or
- * isalpha() function. On most systems, this would mean that any
- * file consisting only of characters in the range 0x00 ... 0x7F
- * would be called ASCII text, but many systems might reasonably
- * consider some characters outside this range to be alphabetic,
- * so the file command would call such characters ASCII. It might
- * have been more accurate to call this "considered textual on the
- * local system" than "ASCII."
- *
- * It considered a file to be "International language text" if each
- * of its characters was either an ASCII printing character (according
- * to the real ASCII standard, not the above test), a character in
- * the range 0x80 ... 0xFF, or one of the following control characters:
- * backspace, tab, line feed, vertical tab, form feed, carriage return,
- * escape. No attempt was made to determine the language in which files
- * of this type were written.
- *
- *
- * The table below considers a file to be ASCII if all of its characters
- * are either ASCII printing characters (again, according to the X3.4
- * standard, not isascii()) or any of the following controls: bell,
- * backspace, tab, line feed, form feed, carriage return, esc, nextline.
- *
- * I include bell because some programs (particularly shell scripts)
- * use it literally, even though it is rare in normal text. I exclude
- * vertical tab because it never seems to be used in real text. I also
- * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
- * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
- * character to. It might be more appropriate to include it in the 8859
- * set instead of the ASCII set, but it's got to be included in *something*
- * we recognize or EBCDIC files aren't going to be considered textual.
- * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
- * and Latin characters, so these should possibly be allowed. But they
- * make a real mess on VT100-style displays if they're not paired properly,
- * so we are probably better off not calling them text.
- *
- * A file is considered to be ISO-8859 text if its characters are all
- * either ASCII, according to the above definition, or printing characters
- * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
- *
- * Finally, a file is considered to be international text from some other
- * character code if its characters are all either ISO-8859 (according to
- * the above definition) or characters in the range 0x80 ... 0x9F, which
- * ISO-8859 considers to be control characters but the IBM PC and Macintosh
- * consider to be printing characters.
- */
-
-#define F 0 /* character never appears in text */
-#define T 1 /* character appears in plain ASCII text */
-#define I 2 /* character appears in ISO-8859 text */
-#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
-
-private char text_chars[256] = {
- /* BEL BS HT LF FF CR */
- F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
- /* ESC */
- F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
- /* NEL */
- X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
- X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
-};
-
-private int
-looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- size_t i;
-
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- int t = text_chars[buf[i]];
-
- if (t != T)
- return 0;
-
- ubuf[(*ulen)++] = buf[i];
- }
-
- return 1;
-}
-
-private int
-looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
-{
- size_t i;
-
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- int t = text_chars[buf[i]];
-
- if (t != T && t != I)
- return 0;
-
- ubuf[(*ulen)++] = buf[i];
- }
-
- return 1;
-}
-
-private int
-looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- size_t i;
-
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- int t = text_chars[buf[i]];
-
- if (t != T && t != I && t != X)
- return 0;
-
- ubuf[(*ulen)++] = buf[i];
- }
-
- return 1;
-}
-
-/*
* Encode Unicode string as UTF-8, returning pointer to character
* after end of string, or NULL if an invalid character is found.
*/
@@ -568,226 +387,3 @@ encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen)
return buf;
}
-
-/*
- * Decide whether some text looks like UTF-8. Returns:
- *
- * -1: invalid UTF-8
- * 0: uses odd control characters, so doesn't look like text
- * 1: 7-bit text
- * 2: definitely UTF-8 text (valid high-bit set bytes)
- *
- * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
- * ubuf must be big enough!
- */
-protected int
-file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
-{
- size_t i;
- int n;
- unichar c;
- int gotone = 0, ctrl = 0;
-
- if (ubuf)
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
- /*
- * Even if the whole file is valid UTF-8 sequences,
- * still reject it if it uses weird control characters.
- */
-
- if (text_chars[buf[i]] != T)
- ctrl = 1;
-
- if (ubuf)
- ubuf[(*ulen)++] = buf[i];
- } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
- return -1;
- } else { /* 11xxxxxx begins UTF-8 */
- int following;
-
- if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
- c = buf[i] & 0x1f;
- following = 1;
- } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
- c = buf[i] & 0x0f;
- following = 2;
- } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
- c = buf[i] & 0x07;
- following = 3;
- } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
- c = buf[i] & 0x03;
- following = 4;
- } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
- c = buf[i] & 0x01;
- following = 5;
- } else
- return -1;
-
- for (n = 0; n < following; n++) {
- i++;
- if (i >= nbytes)
- goto done;
-
- if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
- return -1;
-
- c = (c << 6) + (buf[i] & 0x3f);
- }
-
- if (ubuf)
- ubuf[(*ulen)++] = c;
- gotone = 1;
- }
- }
-done:
- return ctrl ? 0 : (gotone ? 2 : 1);
-}
-
-/*
- * Decide whether some text looks like UTF-8 with BOM. If there is no
- * BOM, return -1; otherwise return the result of looks_utf8 on the
- * rest of the text.
- */
-private int
-looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
- return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
- else
- return -1;
-}
-
-private int
-looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- int bigend;
- size_t i;
-
- if (nbytes < 2)
- return 0;
-
- if (buf[0] == 0xff && buf[1] == 0xfe)
- bigend = 0;
- else if (buf[0] == 0xfe && buf[1] == 0xff)
- bigend = 1;
- else
- return 0;
-
- *ulen = 0;
-
- for (i = 2; i + 1 < nbytes; i += 2) {
- /* XXX fix to properly handle chars > 65536 */
-
- if (bigend)
- ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
- else
- ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
-
- if (ubuf[*ulen - 1] == 0xfffe)
- return 0;
- if (ubuf[*ulen - 1] < 128 &&
- text_chars[(size_t)ubuf[*ulen - 1]] != T)
- return 0;
- }
-
- return 1 + bigend;
-}
-
-#undef F
-#undef T
-#undef I
-#undef X
-
-/*
- * This table maps each EBCDIC character to an (8-bit extended) ASCII
- * character, as specified in the rationale for the dd(1) command in
- * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
- *
- * Unfortunately it does not seem to correspond exactly to any of the
- * five variants of EBCDIC documented in IBM's _Enterprise Systems
- * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
- * Edition, July, 1999, pp. I-1 - I-4.
- *
- * Fortunately, though, all versions of EBCDIC, including this one, agree
- * on most of the printing characters that also appear in (7-bit) ASCII.
- * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
- *
- * Fortunately too, there is general agreement that codes 0x00 through
- * 0x3F represent control characters, 0x41 a nonbreaking space, and the
- * remainder printing characters.
- *
- * This is sufficient to allow us to identify EBCDIC text and to distinguish
- * between old-style and internationalized examples of text.
- */
-
-private unsigned char ebcdic_to_ascii[] = {
- 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
-128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
-144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
-' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
-'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
-'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
-186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
-195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
-202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
-209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
-216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
-'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
-'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
-'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
-'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
-};
-
-#ifdef notdef
-/*
- * The following EBCDIC-to-ASCII table may relate more closely to reality,
- * or at least to modern reality. It comes from
- *
- * http://ftp.s390.ibm.com/products/oe/bpxqp9.html
- *
- * and maps the characters of EBCDIC code page 1047 (the code used for
- * Unix-derived software on IBM's 390 systems) to the corresponding
- * characters from ISO 8859-1.
- *
- * If this table is used instead of the above one, some of the special
- * cases for the NEL character can be taken out of the code.
- */
-
-private unsigned char ebcdic_1047_to_8859[] = {
-0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
-0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
-0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
-0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
-0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
-0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
-0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
-0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
-0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
-0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
-0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
-0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
-0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
-0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
-0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
-};
-#endif
-
-/*
- * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
- */
-private void
-from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
-{
- size_t i;
-
- for (i = 0; i < nbytes; i++) {
- out[i] = ebcdic_to_ascii[buf[i]];
- }
-}
diff --git a/contrib/file/asprintf.c b/contrib/file/asprintf.c
index c103cf18e553b..0606593ed3b62 100644
--- a/contrib/file/asprintf.c
+++ b/contrib/file/asprintf.c
@@ -26,7 +26,11 @@
* SUCH DAMAGE.
*/
-#include <stdarg.h>
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: asprintf.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
+#endif
int vasprintf(char **ptr, const char *format_string, va_list vargs);
diff --git a/contrib/file/cdf.c b/contrib/file/cdf.c
new file mode 100644
index 0000000000000..0c66d11bdb124
--- /dev/null
+++ b/contrib/file/cdf.c
@@ -0,0 +1,1105 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Parse composite document files, the format used in Microsoft Office
+ * document files before they switched to zipped xml.
+ * Info from: http://sc.openoffice.org/compdocfileformat.pdf
+ */
+
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: cdf.c,v 1.17 2009/02/03 20:27:51 christos Exp $")
+#endif
+
+#include <assert.h>
+#ifdef CDF_DEBUG
+#include <err.h>
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+#ifndef EFTYPE
+#define EFTYPE EINVAL
+#endif
+
+#include "cdf.h"
+
+#ifndef __arraycount
+#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+#ifdef CDF_DEBUG
+#define DPRINTF(a) printf a
+#else
+#define DPRINTF(a)
+#endif
+
+static union {
+ char s[4];
+ uint32_t u;
+} cdf_bo;
+
+#define NEED_SWAP (cdf_bo.u == (uint32_t)0x01020304)
+
+#define CDF_TOLE8(x) (NEED_SWAP ? cdf_tole8(x) : (uint64_t)(x))
+#define CDF_TOLE4(x) (NEED_SWAP ? cdf_tole4(x) : (uint32_t)(x))
+#define CDF_TOLE2(x) (NEED_SWAP ? cdf_tole2(x) : (uint16_t)(x))
+
+/*
+ * swap a short
+ */
+uint16_t
+cdf_tole2(uint16_t sv)
+{
+ uint16_t rv;
+ uint8_t *s = (uint8_t *)(void *)&sv;
+ uint8_t *d = (uint8_t *)(void *)&rv;
+ d[0] = s[1];
+ d[1] = s[0];
+ return rv;
+}
+
+/*
+ * swap an int
+ */
+uint32_t
+cdf_tole4(uint32_t sv)
+{
+ uint32_t rv;
+ uint8_t *s = (uint8_t *)(void *)&sv;
+ uint8_t *d = (uint8_t *)(void *)&rv;
+ d[0] = s[3];
+ d[1] = s[2];
+ d[2] = s[1];
+ d[3] = s[0];
+ return rv;
+}
+
+/*
+ * swap a quad
+ */
+uint64_t
+cdf_tole8(uint64_t sv)
+{
+ uint64_t rv;
+ uint8_t *s = (uint8_t *)(void *)&sv;
+ uint8_t *d = (uint8_t *)(void *)&rv;
+ d[0] = s[7];
+ d[1] = s[6];
+ d[2] = s[5];
+ d[3] = s[4];
+ d[4] = s[3];
+ d[5] = s[2];
+ d[6] = s[1];
+ d[7] = s[0];
+ return rv;
+}
+
+#define CDF_UNPACK(a) \
+ (void)memcpy(&(a), &buf[len], sizeof(a)), len += sizeof(a)
+#define CDF_UNPACKA(a) \
+ (void)memcpy((a), &buf[len], sizeof(a)), len += sizeof(a)
+
+void
+cdf_swap_header(cdf_header_t *h)
+{
+ size_t i;
+
+ h->h_magic = CDF_TOLE8(h->h_magic);
+ h->h_uuid[0] = CDF_TOLE8(h->h_uuid[0]);
+ h->h_uuid[1] = CDF_TOLE8(h->h_uuid[1]);
+ h->h_revision = CDF_TOLE2(h->h_revision);
+ h->h_version = CDF_TOLE2(h->h_version);
+ h->h_byte_order = CDF_TOLE2(h->h_byte_order);
+ h->h_sec_size_p2 = CDF_TOLE2(h->h_sec_size_p2);
+ h->h_short_sec_size_p2 = CDF_TOLE2(h->h_short_sec_size_p2);
+ h->h_num_sectors_in_sat = CDF_TOLE4(h->h_num_sectors_in_sat);
+ h->h_secid_first_directory = CDF_TOLE4(h->h_secid_first_directory);
+ h->h_min_size_standard_stream =
+ CDF_TOLE4(h->h_min_size_standard_stream);
+ h->h_secid_first_sector_in_short_sat =
+ CDF_TOLE4(h->h_secid_first_sector_in_short_sat);
+ h->h_num_sectors_in_short_sat =
+ CDF_TOLE4(h->h_num_sectors_in_short_sat);
+ h->h_secid_first_sector_in_master_sat =
+ CDF_TOLE4(h->h_secid_first_sector_in_master_sat);
+ h->h_num_sectors_in_master_sat =
+ CDF_TOLE4(h->h_num_sectors_in_master_sat);
+ for (i = 0; i < __arraycount(h->h_master_sat); i++)
+ h->h_master_sat[i] = CDF_TOLE4(h->h_master_sat[i]);
+}
+
+void
+cdf_unpack_header(cdf_header_t *h, char *buf)
+{
+ size_t i;
+ size_t len = 0;
+
+ CDF_UNPACK(h->h_magic);
+ CDF_UNPACKA(h->h_uuid);
+ CDF_UNPACK(h->h_revision);
+ CDF_UNPACK(h->h_version);
+ CDF_UNPACK(h->h_byte_order);
+ CDF_UNPACK(h->h_sec_size_p2);
+ CDF_UNPACK(h->h_short_sec_size_p2);
+ CDF_UNPACKA(h->h_unused0);
+ CDF_UNPACK(h->h_num_sectors_in_sat);
+ CDF_UNPACK(h->h_secid_first_directory);
+ CDF_UNPACKA(h->h_unused1);
+ CDF_UNPACK(h->h_min_size_standard_stream);
+ CDF_UNPACK(h->h_secid_first_sector_in_short_sat);
+ CDF_UNPACK(h->h_num_sectors_in_short_sat);
+ CDF_UNPACK(h->h_secid_first_sector_in_master_sat);
+ CDF_UNPACK(h->h_num_sectors_in_master_sat);
+ for (i = 0; i < __arraycount(h->h_master_sat); i++)
+ CDF_UNPACK(h->h_master_sat[i]);
+}
+
+void
+cdf_swap_dir(cdf_directory_t *d)
+{
+ d->d_namelen = CDF_TOLE2(d->d_namelen);
+ d->d_left_child = CDF_TOLE4(d->d_left_child);
+ d->d_right_child = CDF_TOLE4(d->d_right_child);
+ d->d_storage = CDF_TOLE4(d->d_storage);
+ d->d_storage_uuid[0] = CDF_TOLE8(d->d_storage_uuid[0]);
+ d->d_storage_uuid[1] = CDF_TOLE8(d->d_storage_uuid[1]);
+ d->d_flags = CDF_TOLE4(d->d_flags);
+ d->d_created = CDF_TOLE8(d->d_created);
+ d->d_modified = CDF_TOLE8(d->d_modified);
+ d->d_stream_first_sector = CDF_TOLE4(d->d_stream_first_sector);
+ d->d_size = CDF_TOLE4(d->d_size);
+}
+
+void
+cdf_swap_class(cdf_classid_t *d)
+{
+ d->cl_dword = CDF_TOLE4(d->cl_dword);
+ d->cl_word[0] = CDF_TOLE2(d->cl_word[0]);
+ d->cl_word[1] = CDF_TOLE2(d->cl_word[1]);
+}
+
+void
+cdf_unpack_dir(cdf_directory_t *d, char *buf)
+{
+ size_t len = 0;
+
+ CDF_UNPACKA(d->d_name);
+ CDF_UNPACK(d->d_namelen);
+ CDF_UNPACK(d->d_type);
+ CDF_UNPACK(d->d_color);
+ CDF_UNPACK(d->d_left_child);
+ CDF_UNPACK(d->d_right_child);
+ CDF_UNPACK(d->d_storage);
+ CDF_UNPACKA(d->d_storage_uuid);
+ CDF_UNPACK(d->d_flags);
+ CDF_UNPACK(d->d_created);
+ CDF_UNPACK(d->d_modified);
+ CDF_UNPACK(d->d_stream_first_sector);
+ CDF_UNPACK(d->d_size);
+ CDF_UNPACK(d->d_unused0);
+}
+
+int
+cdf_read_header(int fd, cdf_header_t *h)
+{
+ (void)memcpy(cdf_bo.s, "\01\02\03\04", 4);
+ char buf[512];
+ if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1)
+ return -1;
+ if (read(fd, buf, sizeof(buf)) != sizeof(buf))
+ return -1;
+ cdf_unpack_header(h, buf);
+ cdf_swap_header(h);
+ if (h->h_magic != CDF_MAGIC) {
+ DPRINTF(("Bad magic 0x%x != 0x$x\n", h->h_magic, CDF_MAGIC));
+ errno = EFTYPE;
+ return -1;
+ }
+ return 0;
+}
+
+
+ssize_t
+cdf_read_sector(int fd, void *buf, size_t offs, size_t len,
+ const cdf_header_t *h, cdf_secid_t id)
+{
+ assert((size_t)CDF_SEC_SIZE(h) == len);
+ if (lseek(fd, (off_t)CDF_SEC_POS(h, id), SEEK_SET) == (off_t)-1)
+ return -1;
+ return read(fd, ((char *)buf) + offs, len);
+}
+
+ssize_t
+cdf_read_short_sector(const cdf_stream_t *sst, void *buf, size_t offs,
+ size_t len, const cdf_header_t *h, cdf_secid_t id)
+{
+ assert((size_t)CDF_SHORT_SEC_SIZE(h) == len);
+ (void)memcpy(((char *)buf) + offs,
+ ((const char *)sst->sst_tab) + CDF_SHORT_SEC_POS(h, id), len);
+ return len;
+}
+
+/*
+ * Read the sector allocation table.
+ */
+int
+cdf_read_sat(int fd, cdf_header_t *h, cdf_sat_t *sat)
+{
+ size_t i, j, k;
+ size_t ss = CDF_SEC_SIZE(h);
+ cdf_secid_t *msa, mid;
+
+ for (i = 0; i < __arraycount(h->h_master_sat); i++)
+ if (h->h_master_sat[i] == CDF_SECID_FREE)
+ break;
+
+ sat->sat_len = (h->h_num_sectors_in_master_sat + i);
+ if ((sat->sat_tab = calloc(sat->sat_len, ss)) == NULL)
+ return -1;
+
+ for (i = 0; i < __arraycount(h->h_master_sat); i++) {
+ if (h->h_master_sat[i] < 0)
+ break;
+ if (cdf_read_sector(fd, sat->sat_tab, ss * i, ss, h,
+ h->h_master_sat[i]) != (ssize_t)ss) {
+ DPRINTF(("Reading sector %d", h->h_master_sat[i]));
+ goto out1;
+ }
+ }
+
+ if ((msa = calloc(1, ss)) == NULL)
+ goto out1;
+
+ mid = h->h_secid_first_sector_in_master_sat;
+ for (j = 0; j < h->h_num_sectors_in_master_sat; j++) {
+ if (j >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Reading master sector loop limit"));
+ errno = EFTYPE;
+ goto out2;
+ }
+ if (cdf_read_sector(fd, msa, 0, ss, h, mid) != (ssize_t)ss) {
+ DPRINTF(("Reading master sector %d", mid));
+ goto out2;
+ }
+ for (k = 0; k < (ss / sizeof(mid)) - 1; k++, i++)
+ if (cdf_read_sector(fd, sat->sat_tab, ss * i, ss, h,
+ CDF_TOLE4(msa[k])) != (ssize_t)ss) {
+ DPRINTF(("Reading sector %d",
+ CDF_TOLE4(msa[k])));
+ goto out2;
+ }
+ mid = CDF_TOLE4(msa[(ss / sizeof(mid)) - 1]);
+ }
+ free(msa);
+ return 0;
+out2:
+ free(msa);
+out1:
+ free(sat->sat_tab);
+ return -1;
+}
+
+size_t
+cdf_count_chain(const cdf_header_t *h, const cdf_sat_t *sat,
+ cdf_secid_t sid)
+{
+ size_t i, j, s = CDF_SEC_SIZE(h) / sizeof(cdf_secid_t);
+ cdf_secid_t maxsector = (cdf_secid_t)(sat->sat_len * s);
+
+ DPRINTF(("Chain:"));
+ for (j = i = 0; sid >= 0; i++, j++) {
+ DPRINTF((" %d", sid));
+ if (j >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Counting chain loop limit"));
+ errno = EFTYPE;
+ return (size_t)-1;
+ }
+ if (sid > maxsector) {
+ DPRINTF(("Sector %d > %d\n", sid, maxsector));
+ errno = EFTYPE;
+ return (size_t)-1;
+ }
+ sid = CDF_TOLE4(sat->sat_tab[sid]);
+ }
+ DPRINTF(("\n"));
+ return i;
+}
+
+int
+cdf_read_long_sector_chain(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
+ cdf_secid_t sid, size_t len, cdf_stream_t *scn)
+{
+ size_t ss = CDF_SEC_SIZE(h), i, j;
+ ssize_t nr;
+ scn->sst_len = cdf_count_chain(h, sat, sid);
+ scn->sst_dirlen = len;
+
+ if (scn->sst_len == (size_t)-1)
+ return -1;
+
+ scn->sst_tab = calloc(scn->sst_len, ss);
+ if (scn->sst_tab == NULL)
+ return -1;
+
+ for (j = i = 0; sid >= 0; i++, j++) {
+ if ((nr = cdf_read_sector(fd, scn->sst_tab, i * ss, ss, h,
+ sid)) != (ssize_t)ss) {
+ if (i == scn->sst_len - 1 && nr > 0) {
+ /* Last sector might be truncated */
+ return 0;
+ }
+ DPRINTF(("Reading long sector chain %d", sid));
+ goto out;
+ }
+ sid = CDF_TOLE4(sat->sat_tab[sid]);
+ if (j >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Read long sector chain loop limit"));
+ errno = EFTYPE;
+ goto out;
+ }
+ }
+ return 0;
+out:
+ free(scn->sst_tab);
+ return (size_t)-1;
+}
+
+int
+cdf_read_short_sector_chain(const cdf_header_t *h,
+ const cdf_sat_t *ssat, const cdf_stream_t *sst,
+ cdf_secid_t sid, size_t len, cdf_stream_t *scn)
+{
+ size_t ss = CDF_SHORT_SEC_SIZE(h), i, j;
+ scn->sst_len = cdf_count_chain(h, ssat, sid);
+ scn->sst_dirlen = len;
+
+ if (scn->sst_len == (size_t)-1)
+ return -1;
+
+ scn->sst_tab = calloc(scn->sst_len, ss);
+ if (scn->sst_tab == NULL)
+ return -1;
+
+ for (j = i = 0; sid >= 0; i++, j++) {
+ if (j >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Read short sector chain loop limit"));
+ errno = EFTYPE;
+ goto out;
+ }
+ if (cdf_read_short_sector(sst, scn->sst_tab, i * ss, ss, h,
+ sid) != (ssize_t)ss) {
+ DPRINTF(("Reading short sector chain %d", sid));
+ goto out;
+ }
+ sid = CDF_TOLE4(ssat->sat_tab[sid]);
+ }
+ return 0;
+out:
+ free(scn->sst_tab);
+ return (size_t)-1;
+}
+
+int
+cdf_read_sector_chain(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
+ const cdf_sat_t *ssat, const cdf_stream_t *sst,
+ cdf_secid_t sid, size_t len, cdf_stream_t *scn)
+{
+
+ if (len < h->h_min_size_standard_stream)
+ return cdf_read_short_sector_chain(h, ssat, sst, sid, len,
+ scn);
+ else
+ return cdf_read_long_sector_chain(fd, h, sat, sid, len, scn);
+}
+
+int
+cdf_read_dir(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
+ cdf_dir_t *dir)
+{
+ size_t i, j;
+ size_t ss = CDF_SEC_SIZE(h), ns, nd;
+ char *buf;
+ cdf_secid_t sid = h->h_secid_first_directory;
+
+ ns = cdf_count_chain(h, sat, sid);
+ if (ns == (size_t)-1)
+ return -1;
+
+ nd = ss / CDF_DIRECTORY_SIZE;
+
+ dir->dir_len = ns * nd;
+ dir->dir_tab = calloc(dir->dir_len, sizeof(dir->dir_tab[0]));
+ if (dir->dir_tab == NULL)
+ return -1;
+
+ if ((buf = malloc(ss)) == NULL) {
+ free(dir->dir_tab);
+ return -1;
+ }
+
+ for (j = i = 0; i < ns; i++, j++) {
+ if (j >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Read dir loop limit"));
+ errno = EFTYPE;
+ goto out;
+ }
+ if (cdf_read_sector(fd, buf, 0, ss, h, sid) != (ssize_t)ss) {
+ DPRINTF(("Reading directory sector %d", sid));
+ goto out;
+ }
+ for (j = 0; j < nd; j++) {
+ cdf_unpack_dir(&dir->dir_tab[i * nd + j],
+ &buf[j * CDF_DIRECTORY_SIZE]);
+ }
+ sid = CDF_TOLE4(sat->sat_tab[sid]);
+ }
+ if (NEED_SWAP)
+ for (i = 0; i < dir->dir_len; i++)
+ cdf_swap_dir(&dir->dir_tab[i]);
+ free(buf);
+ return 0;
+out:
+ free(dir->dir_tab);
+ free(buf);
+ return -1;
+}
+
+
+int
+cdf_read_ssat(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
+ cdf_sat_t *ssat)
+{
+ size_t i, j;
+ size_t ss = CDF_SEC_SIZE(h);
+ cdf_secid_t sid = h->h_secid_first_sector_in_short_sat;
+
+ ssat->sat_len = cdf_count_chain(h, sat, sid);
+ if (ssat->sat_len == (size_t)-1)
+ return -1;
+
+ ssat->sat_tab = calloc(ssat->sat_len, ss);
+ if (ssat->sat_tab == NULL)
+ return -1;
+
+ for (j = i = 0; sid >= 0; i++, j++) {
+ if (j >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Read short sat sector loop limit"));
+ errno = EFTYPE;
+ goto out;
+ }
+ if (cdf_read_sector(fd, ssat->sat_tab, i * ss, ss, h, sid) !=
+ (ssize_t)ss) {
+ DPRINTF(("Reading short sat sector %d", sid));
+ goto out;
+ }
+ sid = CDF_TOLE4(sat->sat_tab[sid]);
+ }
+ return 0;
+out:
+ free(ssat->sat_tab);
+ return -1;
+}
+
+int
+cdf_read_short_stream(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
+ const cdf_dir_t *dir, cdf_stream_t *scn)
+{
+ size_t i;
+ const cdf_directory_t *d;
+
+ for (i = 0; i < dir->dir_len; i++)
+ if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_ROOT_STORAGE)
+ break;
+
+ if (i == dir->dir_len) {
+ DPRINTF(("Cannot find root storage node\n"));
+ errno = EFTYPE;
+ return -1;
+ }
+ d = &dir->dir_tab[i];
+
+ /* If the it is not there, just fake it; some docs don't have it */
+ if (d->d_stream_first_sector < 0) {
+ scn->sst_tab = NULL;
+ scn->sst_len = 0;
+ return 0;
+ }
+
+ return cdf_read_long_sector_chain(fd, h, sat,
+ d->d_stream_first_sector, d->d_size, scn);
+}
+
+static int
+cdf_namecmp(const char *d, const uint16_t *s, size_t l)
+{
+ for (; l--; d++, s++)
+ if (*d != CDF_TOLE2(*s))
+ return (unsigned char)*d - CDF_TOLE2(*s);
+ return 0;
+}
+
+int
+cdf_read_summary_info(int fd, const cdf_header_t *h,
+ const cdf_sat_t *sat, const cdf_sat_t *ssat, const cdf_stream_t *sst,
+ const cdf_dir_t *dir, cdf_stream_t *scn)
+{
+ size_t i;
+ const cdf_directory_t *d;
+ static const char name[] = "\05SummaryInformation";
+
+ for (i = 0; i < dir->dir_len; i++)
+ if (dir->dir_tab[i].d_type == CDF_DIR_TYPE_USER_STREAM &&
+ cdf_namecmp(name, dir->dir_tab[i].d_name, sizeof(name))
+ == 0)
+ break;
+
+ if (i == dir->dir_len) {
+ DPRINTF(("Cannot find summary information section\n"));
+ errno = EFTYPE;
+ return -1;
+ }
+ d = &dir->dir_tab[i];
+ return cdf_read_sector_chain(fd, h, sat, ssat, sst,
+ d->d_stream_first_sector, d->d_size, scn);
+}
+
+int
+cdf_read_property_info(const cdf_stream_t *sst, uint32_t offs,
+ cdf_property_info_t **info, size_t *count, size_t *maxcount)
+{
+ const cdf_section_header_t *shp;
+ cdf_section_header_t sh;
+ const uint32_t *p, *q, *e;
+ int16_t s16;
+ int32_t s32;
+ uint32_t u32;
+ int64_t s64;
+ uint64_t u64;
+ cdf_timestamp_t tp;
+ size_t i, o, nelements, j;
+ cdf_property_info_t *inp;
+
+ shp = (const void *)((const char *)sst->sst_tab + offs);
+ sh.sh_len = CDF_TOLE4(shp->sh_len);
+ sh.sh_properties = CDF_TOLE4(shp->sh_properties);
+ DPRINTF(("section len: %d properties %d\n", sh.sh_len,
+ sh.sh_properties));
+ if (*maxcount) {
+ *maxcount += sh.sh_properties;
+ inp = realloc(*info, *maxcount * sizeof(*inp));
+ } else {
+ *maxcount = sh.sh_properties;
+ inp = malloc(*maxcount * sizeof(*inp));
+ }
+ if (inp == NULL)
+ goto out;
+ *info = inp;
+ inp += *count;
+ *count += sh.sh_properties;
+ p = (const void *)((const char *)sst->sst_tab + offs + sizeof(sh));
+ e = (const void *)(((const char *)shp) + sh.sh_len);
+ for (i = 0; i < sh.sh_properties; i++) {
+ q = (const uint32_t *)((const char *)p +
+ CDF_TOLE4(p[(i << 1) + 1])) - 2;
+ if (q > e) {
+ DPRINTF(("Ran of the end %p > %p\n", q, e));
+ goto out;
+ }
+ inp[i].pi_id = CDF_TOLE4(p[i << 1]);
+ inp[i].pi_type = CDF_TOLE4(q[0]);
+ DPRINTF(("%d) id=%x type=%x offs=%x\n", i, inp[i].pi_id,
+ inp[i].pi_type, (const char *)q - (const char *)p));
+ if (inp[i].pi_type & CDF_VECTOR) {
+ nelements = CDF_TOLE4(q[1]);
+ o = 2;
+ } else {
+ nelements = 1;
+ o = 1;
+ }
+ if (inp[i].pi_type & (CDF_ARRAY|CDF_BYREF|CDF_RESERVED))
+ goto unknown;
+ switch (inp[i].pi_type & CDF_TYPEMASK) {
+ case CDF_EMPTY:
+ break;
+ case CDF_SIGNED16:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ (void)memcpy(&s16, &q[o], sizeof(s16));
+ inp[i].pi_s16 = CDF_TOLE2(s16);
+ break;
+ case CDF_SIGNED32:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ (void)memcpy(&s32, &q[o], sizeof(s32));
+ inp[i].pi_s32 = CDF_TOLE4(s32);
+ break;
+ case CDF_BOOL:
+ case CDF_UNSIGNED32:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ (void)memcpy(&u32, &q[o], sizeof(u32));
+ inp[i].pi_u32 = CDF_TOLE4(u32);
+ break;
+ case CDF_SIGNED64:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ (void)memcpy(&s64, &q[o], sizeof(s64));
+ inp[i].pi_s64 = CDF_TOLE4(s64);
+ break;
+ case CDF_UNSIGNED64:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ (void)memcpy(&u64, &q[o], sizeof(u64));
+ inp[i].pi_u64 = CDF_TOLE4(u64);
+ break;
+ case CDF_LENGTH32_STRING:
+ if (nelements > 1) {
+ size_t nelem = inp - *info;
+ *maxcount += nelements;
+ inp = realloc(*info, *maxcount * sizeof(*inp));
+ if (inp == NULL)
+ goto out;
+ *info = inp;
+ inp = *info + nelem;
+ }
+ DPRINTF(("nelements = %d\n", nelements));
+ for (j = 0; j < nelements; j++, i++) {
+ uint32_t l = CDF_TOLE4(q[o]);
+ inp[i].pi_str.s_len = l;
+ inp[i].pi_str.s_buf = (const char *)(&q[o+1]);
+ DPRINTF(("l = %d, r = %d, s = %s\n", l,
+ CDF_ROUND(l, sizeof(l)),
+ inp[i].pi_str.s_buf));
+ l = 4 + CDF_ROUND(l, sizeof(l));
+ o += l >> 2;
+ }
+ i--;
+ break;
+ case CDF_FILETIME:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ (void)memcpy(&tp, &q[o], sizeof(tp));
+ inp[i].pi_tp = CDF_TOLE8(tp);
+ break;
+ case CDF_CLIPBOARD:
+ if (inp[i].pi_type & CDF_VECTOR)
+ goto unknown;
+ break;
+ default:
+ unknown:
+ DPRINTF(("Don't know how to deal with %x\n",
+ inp[i].pi_type));
+ goto out;
+ }
+ }
+ return 0;
+out:
+ free(*info);
+ return -1;
+}
+
+int
+cdf_unpack_summary_info(const cdf_stream_t *sst, cdf_summary_info_header_t *ssi,
+ cdf_property_info_t **info, size_t *count)
+{
+ size_t i, maxcount;
+ const cdf_summary_info_header_t *si = sst->sst_tab;
+ const cdf_section_declaration_t *sd = (const void *)
+ ((const char *)sst->sst_tab + CDF_SECTION_DECLARATION_OFFSET);
+
+ ssi->si_byte_order = CDF_TOLE2(si->si_byte_order);
+ ssi->si_os_version = CDF_TOLE2(si->si_os_version);
+ ssi->si_os = CDF_TOLE2(si->si_os);
+ ssi->si_class = si->si_class;
+ cdf_swap_class(&ssi->si_class);
+ ssi->si_count = CDF_TOLE2(si->si_count);
+ *count = 0;
+ maxcount = 0;
+ *info = NULL;
+ for (i = 0; i < CDF_TOLE4(si->si_count); i++) {
+ if (i >= CDF_LOOP_LIMIT) {
+ DPRINTF(("Unpack summary info loop limit"));
+ errno = EFTYPE;
+ return -1;
+ }
+ if (cdf_read_property_info(sst, CDF_TOLE4(sd->sd_offset),
+ info, count, &maxcount) == -1)
+ return -1;
+ }
+ return 0;
+}
+
+
+
+int
+cdf_print_classid(char *buf, size_t buflen, const cdf_classid_t *id)
+{
+ return snprintf(buf, buflen, "%.8x-%.4x-%.4x-%.2x%.2x-"
+ "%.2x%.2x%.2x%.2x%.2x%.2x", id->cl_dword, id->cl_word[0],
+ id->cl_word[1], id->cl_two[0], id->cl_two[1], id->cl_six[0],
+ id->cl_six[1], id->cl_six[2], id->cl_six[3], id->cl_six[4],
+ id->cl_six[5]);
+}
+
+static const struct {
+ uint32_t v;
+ const char *n;
+} vn[] = {
+ { CDF_PROPERTY_CODE_PAGE, "Code page" },
+ { CDF_PROPERTY_TITLE, "Title" },
+ { CDF_PROPERTY_SUBJECT, "Subject" },
+ { CDF_PROPERTY_AUTHOR, "Author" },
+ { CDF_PROPERTY_KEYWORDS, "Keywords" },
+ { CDF_PROPERTY_COMMENTS, "Comments" },
+ { CDF_PROPERTY_TEMPLATE, "Template" },
+ { CDF_PROPERTY_LAST_SAVED_BY, "Last Saved By" },
+ { CDF_PROPERTY_REVISION_NUMBER, "Revision Number" },
+ { CDF_PROPERTY_TOTAL_EDITING_TIME, "Total Editing Time" },
+ { CDF_PROPERTY_LAST_PRINTED, "Last Printed" },
+ { CDF_PROPERTY_CREATE_TIME, "Create Time/Date" },
+ { CDF_PROPERTY_LAST_SAVED_TIME, "Last Saved Time/Date" },
+ { CDF_PROPERTY_NUMBER_OF_PAGES, "Number of Pages" },
+ { CDF_PROPERTY_NUMBER_OF_WORDS, "Number of Words" },
+ { CDF_PROPERTY_NUMBER_OF_CHARACTERS, "Number of Characters" },
+ { CDF_PROPERTY_THUMBNAIL, "Thumbnail" },
+ { CDF_PROPERTY_NAME_OF_APPLICATION, "Name of Creating Application" },
+ { CDF_PROPERTY_SECURITY, "Security" },
+ { CDF_PROPERTY_LOCALE_ID, "Locale ID" },
+};
+
+int
+cdf_print_property_name(char *buf, size_t bufsiz, uint32_t p)
+{
+ size_t i;
+
+ for (i = 0; i < __arraycount(vn); i++)
+ if (vn[i].v == p)
+ return snprintf(buf, bufsiz, "%s", vn[i].n);
+ return snprintf(buf, bufsiz, "0x%x", p);
+}
+
+int
+cdf_print_elapsed_time(char *buf, size_t bufsiz, cdf_timestamp_t ts)
+{
+ size_t len = 0;
+ int days, hours, mins, secs;
+
+ ts /= CDF_TIME_PREC;
+ secs = ts % 60;
+ ts /= 60;
+ mins = ts % 60;
+ ts /= 60;
+ hours = ts % 24;
+ ts /= 24;
+ days = ts;
+
+ if (days) {
+ len += snprintf(buf + len, bufsiz - len, "%dd+", days);
+ if (len >= bufsiz)
+ return len;
+ }
+
+ if (days || hours) {
+ len += snprintf(buf + len, bufsiz - len, "%.2d:", hours);
+ if (len >= bufsiz)
+ return len;
+ }
+
+ len += snprintf(buf + len, bufsiz - len, "%.2d:", mins);
+ if (len >= bufsiz)
+ return len;
+
+ len += snprintf(buf + len, bufsiz - len, "%.2d", secs);
+ return len;
+}
+
+
+#ifdef CDF_DEBUG
+void
+cdf_dump_header(const cdf_header_t *h)
+{
+ size_t i;
+
+#define DUMP(a, b) printf("%40.40s = " a "\n", # b, h->h_ ## b)
+ DUMP("%d", revision);
+ DUMP("%d", version);
+ DUMP("0x%x", byte_order);
+ DUMP("%d", sec_size_p2);
+ DUMP("%d", short_sec_size_p2);
+ DUMP("%d", num_sectors_in_sat);
+ DUMP("%d", secid_first_directory);
+ DUMP("%d", min_size_standard_stream);
+ DUMP("%d", secid_first_sector_in_short_sat);
+ DUMP("%d", num_sectors_in_short_sat);
+ DUMP("%d", secid_first_sector_in_master_sat);
+ DUMP("%d", num_sectors_in_master_sat);
+ for (i = 0; i < __arraycount(h->h_master_sat); i++) {
+ if (h->h_master_sat[i] == CDF_SECID_FREE)
+ break;
+ printf("%35.35s[%.3zu] = %d\n",
+ "master_sat", i, h->h_master_sat[i]);
+ }
+}
+
+void
+cdf_dump_sat(const char *prefix, const cdf_header_t *h, const cdf_sat_t *sat)
+{
+ size_t i, j, s = CDF_SEC_SIZE(h) / sizeof(cdf_secid_t);
+
+ for (i = 0; i < sat->sat_len; i++) {
+ printf("%s[%zu]:\n", prefix, i);
+ for (j = 0; j < s; j++) {
+ printf("%5d, ", CDF_TOLE4(sat->sat_tab[s * i + j]));
+ if ((j + 1) % 10 == 0)
+ printf("\n");
+ }
+ printf("\n");
+ }
+}
+
+void
+cdf_dump(void *v, size_t len)
+{
+ size_t i, j;
+ unsigned char *p = v;
+ char abuf[16];
+ printf("%.4x: ", 0);
+ for (i = 0, j = 0; i < len; i++, p++) {
+ printf("%.2x ", *p);
+ abuf[j++] = isprint(*p) ? *p : '.';
+ if (j == 16) {
+ j = 0;
+ abuf[15] = '\0';
+ printf("%s\n%.4x: ", abuf, i + 1);
+ }
+ }
+ printf("\n");
+}
+
+void
+cdf_dump_stream(const cdf_header_t *h, const cdf_stream_t *sst)
+{
+ size_t ss = sst->sst_dirlen < h->h_min_size_standard_stream ?
+ CDF_SHORT_SEC_SIZE(h) : CDF_SEC_SIZE(h);
+ cdf_dump(sst->sst_tab, ss * sst->sst_len);
+}
+
+void
+cdf_dump_dir(int fd, const cdf_header_t *h, const cdf_sat_t *sat,
+ const cdf_sat_t *ssat, const cdf_stream_t *sst,
+ const cdf_dir_t *dir)
+{
+ size_t i, j;
+ cdf_directory_t *d;
+ char name[__arraycount(d->d_name)];
+ cdf_stream_t scn;
+ struct timespec ts;
+
+ static const char *types[] = { "empty", "user storage",
+ "user stream", "lockbytes", "property", "root storage" };
+
+ for (i = 0; i < dir->dir_len; i++) {
+ d = &dir->dir_tab[i];
+ for (j = 0; j < sizeof(name); j++)
+ name[j] = (char)CDF_TOLE2(d->d_name[j]);
+ printf("Directory %zu: %s\n", i, name);
+ if (d->d_type < __arraycount(types))
+ printf("Type: %s\n", types[d->d_type]);
+ else
+ printf("Type: %d\n", d->d_type);
+ printf("Color: %s\n", d->d_color ? "black" : "red");
+ printf("Left child: %d\n", d->d_left_child);
+ printf("Right child: %d\n", d->d_right_child);
+ printf("Flags: 0x%x\n", d->d_flags);
+ cdf_timestamp_to_timespec(&ts, d->d_created);
+ printf("Created %s", ctime(&ts.tv_sec));
+ cdf_timestamp_to_timespec(&ts, d->d_modified);
+ printf("Modified %s", ctime(&ts.tv_sec));
+ printf("Stream %d\n", d->d_stream_first_sector);
+ printf("Size %d\n", d->d_size);
+ switch (d->d_type) {
+ case CDF_DIR_TYPE_USER_STORAGE:
+ printf("Storage: %d\n", d->d_storage);
+ break;
+ case CDF_DIR_TYPE_USER_STREAM:
+ if (sst == NULL)
+ break;
+ if (cdf_read_sector_chain(fd, h, sat, ssat, sst,
+ d->d_stream_first_sector, d->d_size, &scn) == -1) {
+ warn("Can't read stream for %s at %d len %d",
+ name, d->d_stream_first_sector, d->d_size);
+ break;
+ }
+ cdf_dump_stream(h, &scn);
+ free(scn.sst_tab);
+ break;
+ default:
+ break;
+ }
+
+ }
+}
+
+void
+cdf_dump_property_info(const cdf_property_info_t *info, size_t count)
+{
+ cdf_timestamp_t tp;
+ struct timespec ts;
+ char buf[64];
+ size_t i;
+
+ for (i = 0; i < count; i++) {
+ cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
+ printf("%zu) %s: ", i, buf);
+ switch (info[i].pi_type) {
+ case CDF_SIGNED16:
+ printf("signed 16 [%hd]\n", info[i].pi_s16);
+ break;
+ case CDF_SIGNED32:
+ printf("signed 32 [%d]\n", info[i].pi_s32);
+ break;
+ case CDF_UNSIGNED32:
+ printf("unsigned 32 [%u]\n", info[i].pi_u32);
+ break;
+ case CDF_LENGTH32_STRING:
+ printf("string %u [%.*s]\n", info[i].pi_str.s_len,
+ info[i].pi_str.s_len, info[i].pi_str.s_buf);
+ break;
+ case CDF_FILETIME:
+ tp = info[i].pi_tp;
+ if (tp < 1000000000000000LL) {
+ cdf_print_elapsed_time(buf, sizeof(buf), tp);
+ printf("timestamp %s\n", buf);
+ } else {
+ cdf_timestamp_to_timespec(&ts, tp);
+ printf("timestamp %s", ctime(&ts.tv_sec));
+ }
+ break;
+ case CDF_CLIPBOARD:
+ printf("CLIPBOARD %u\n", info[i].pi_u32);
+ break;
+ default:
+ DPRINTF(("Don't know how to deal with %x\n",
+ info[i].pi_type));
+ break;
+ }
+ }
+}
+
+
+void
+cdf_dump_summary_info(const cdf_header_t *h, const cdf_stream_t *sst)
+{
+ char buf[128];
+ cdf_summary_info_header_t ssi;
+ cdf_property_info_t *info;
+ size_t count;
+
+ (void)&h;
+ if (cdf_unpack_summary_info(sst, &ssi, &info, &count) == -1)
+ return;
+ printf("Endian: %x\n", ssi.si_byte_order);
+ printf("Os Version %d.%d\n", ssi.si_os_version & 0xff,
+ ssi.si_os_version >> 8);
+ printf("Os %d\n", ssi.si_os);
+ cdf_print_classid(buf, sizeof(buf), &ssi.si_class);
+ printf("Class %s\n", buf);
+ printf("Count %d\n", ssi.si_count);
+ cdf_dump_property_info(info, count);
+ free(info);
+}
+
+#endif
+
+#ifdef TEST
+int
+main(int argc, char *argv[])
+{
+ int fd, i;
+ cdf_header_t h;
+ cdf_sat_t sat, ssat;
+ cdf_stream_t sst, scn;
+ cdf_dir_t dir;
+
+ if (argc < 2) {
+ (void)fprintf(stderr, "Usage: %s <filename>\n", getprogname());
+ return -1;
+ }
+
+ for (i = 1; i < argc; i++) {
+ if ((fd = open(argv[1], O_RDONLY)) == -1)
+ err(1, "Cannot open `%s'", argv[1]);
+
+ if (cdf_read_header(fd, &h) == -1)
+ err(1, "Cannot read header");
+#ifdef CDF_DEBUG
+ cdf_dump_header(&h);
+#endif
+
+ if (cdf_read_sat(fd, &h, &sat) == -1)
+ err(1, "Cannot read sat");
+#ifdef CDF_DEBUG
+ cdf_dump_sat("SAT", &h, &sat);
+#endif
+
+ if (cdf_read_ssat(fd, &h, &sat, &ssat) == -1)
+ err(1, "Cannot read ssat");
+#ifdef CDF_DEBUG
+ cdf_dump_sat("SSAT", &h, &ssat);
+#endif
+
+ if (cdf_read_dir(fd, &h, &sat, &dir) == -1)
+ err(1, "Cannot read dir");
+
+ if (cdf_read_short_stream(fd, &h, &sat, &dir, &sst) == -1)
+ err(1, "Cannot read short stream");
+#ifdef CDF_DEBUG
+ cdf_dump_stream(&h, &sst);
+#endif
+
+#ifdef CDF_DEBUG
+ cdf_dump_dir(fd, &h, &sat, &ssat, &sst, &dir);
+#endif
+
+
+ if (cdf_read_summary_info(fd, &h, &sat, &ssat, &sst, &dir,
+ &scn) == -1)
+ err(1, "Cannot read summary info");
+#ifdef CDF_DEBUG
+ cdf_dump_summary_info(&h, &scn);
+#endif
+
+ (void)close(fd);
+ }
+
+ return 0;
+}
+#endif
diff --git a/contrib/file/cdf.h b/contrib/file/cdf.h
new file mode 100644
index 0000000000000..eace7ab714bb6
--- /dev/null
+++ b/contrib/file/cdf.h
@@ -0,0 +1,298 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Info from: http://sc.openoffice.org/compdocfileformat.pdf
+ */
+
+#ifndef _H_CDF_
+#define _H_CDF_
+
+typedef int32_t cdf_secid_t;
+
+#define CDF_LOOP_LIMIT 10000
+
+#define CDF_SECID_NULL 0
+#define CDF_SECID_FREE -1
+#define CDF_SECID_END_OF_CHAIN -2
+#define CDF_SECID_SECTOR_ALLOCATION_TABLE -3
+#define CDF_SECID_MASTER_SECTOR_ALLOCATION_TABLE -4
+
+typedef struct {
+ uint64_t h_magic;
+#define CDF_MAGIC 0xE11AB1A1E011CFD0LL
+ uint64_t h_uuid[2];
+ uint16_t h_revision;
+ uint16_t h_version;
+ uint16_t h_byte_order;
+ uint16_t h_sec_size_p2;
+ uint16_t h_short_sec_size_p2;
+ uint8_t h_unused0[10];
+ uint32_t h_num_sectors_in_sat;
+ uint32_t h_secid_first_directory;
+ uint8_t h_unused1[4];
+ uint32_t h_min_size_standard_stream;
+ cdf_secid_t h_secid_first_sector_in_short_sat;
+ uint32_t h_num_sectors_in_short_sat;
+ cdf_secid_t h_secid_first_sector_in_master_sat;
+ uint32_t h_num_sectors_in_master_sat;
+ cdf_secid_t h_master_sat[436/4];
+} cdf_header_t;
+
+#define CDF_SEC_SIZE(h) (1 << (h)->h_sec_size_p2)
+#define CDF_SEC_POS(h, secid) (CDF_SEC_SIZE(h) + (secid) * CDF_SEC_SIZE(h))
+#define CDF_SHORT_SEC_SIZE(h) (1 << (h)->h_short_sec_size_p2)
+#define CDF_SHORT_SEC_POS(h, secid) ((secid) * CDF_SHORT_SEC_SIZE(h))
+
+typedef int32_t cdf_dirid_t;
+#define CDF_DIRID_NULL -1
+
+typedef int64_t cdf_timestamp_t;
+#define CDF_BASE_YEAR 1601
+#define CDF_TIME_PREC 10000000
+
+typedef struct {
+ uint16_t d_name[32];
+ uint16_t d_namelen;
+ uint8_t d_type;
+#define CDF_DIR_TYPE_EMPTY 0
+#define CDF_DIR_TYPE_USER_STORAGE 1
+#define CDF_DIR_TYPE_USER_STREAM 2
+#define CDF_DIR_TYPE_LOCKBYTES 3
+#define CDF_DIR_TYPE_PROPERTY 4
+#define CDF_DIR_TYPE_ROOT_STORAGE 5
+ uint8_t d_color;
+#define CDF_DIR_COLOR_READ 0
+#define CDF_DIR_COLOR_BLACK 1
+ cdf_dirid_t d_left_child;
+ cdf_dirid_t d_right_child;
+ cdf_dirid_t d_storage;
+ uint64_t d_storage_uuid[2];
+ uint32_t d_flags;
+ cdf_timestamp_t d_created;
+ cdf_timestamp_t d_modified;
+ cdf_secid_t d_stream_first_sector;
+ uint32_t d_size;
+ uint32_t d_unused0;
+} cdf_directory_t;
+
+#define CDF_DIRECTORY_SIZE 128
+
+typedef struct {
+ cdf_secid_t *sat_tab;
+ size_t sat_len;
+} cdf_sat_t;
+
+typedef struct {
+ cdf_directory_t *dir_tab;
+ size_t dir_len;
+} cdf_dir_t;
+
+typedef struct {
+ void *sst_tab;
+ size_t sst_len;
+ size_t sst_dirlen;
+} cdf_stream_t;
+
+typedef struct {
+ uint32_t cl_dword;
+ uint16_t cl_word[2];
+ uint8_t cl_two[2];
+ uint8_t cl_six[6];
+} cdf_classid_t;
+
+typedef struct {
+ uint16_t si_byte_order;
+ uint16_t si_zero;
+ uint16_t si_os_version;
+ uint16_t si_os;
+ cdf_classid_t si_class;
+ uint32_t si_count;
+} cdf_summary_info_header_t;
+
+#define CDF_SECTION_DECLARATION_OFFSET 0x1c
+
+typedef struct {
+ cdf_classid_t sd_class;
+ uint32_t sd_offset;
+} cdf_section_declaration_t;
+
+typedef struct {
+ uint32_t sh_len;
+ uint32_t sh_properties;
+} cdf_section_header_t;
+
+typedef struct {
+ uint32_t pi_id;
+ uint32_t pi_type;
+ union {
+ uint16_t _pi_u16;
+ int16_t _pi_s16;
+ uint32_t _pi_u32;
+ int32_t _pi_s32;
+ uint64_t _pi_u64;
+ int64_t _pi_s64;
+ cdf_timestamp_t _pi_tp;
+ struct {
+ uint32_t s_len;
+ const char *s_buf;
+ } _pi_str;
+ } pi_val;
+#define pi_u64 pi_val._pi_u64
+#define pi_s64 pi_val._pi_s64
+#define pi_u32 pi_val._pi_u32
+#define pi_s32 pi_val._pi_s32
+#define pi_u16 pi_val._pi_u16
+#define pi_s16 pi_val._pi_s16
+#define pi_tp pi_val._pi_tp
+#define pi_str pi_val._pi_str
+} cdf_property_info_t;
+
+#define CDF_ROUND(val, by) (((val) + (by) - 1) & ~((by) - 1))
+
+/* Variant type definitions */
+#define CDF_EMPTY 0x00000000
+#define CDF_NULL 0x00000001
+#define CDF_SIGNED16 0x00000002
+#define CDF_SIGNED32 0x00000003
+#define CDF_FLOAT 0x00000004
+#define CDF_DOUBLE 0x00000005
+#define CDF_CY 0x00000006
+#define CDF_DATE 0x00000007
+#define CDF_BSTR 0x00000008
+#define CDF_DISPATCH 0x00000009
+#define CDF_ERROR 0x0000000a
+#define CDF_BOOL 0x0000000b
+#define CDF_VARIANT 0x0000000c
+#define CDF_UNKNOWN 0x0000000d
+#define CDF_DECIMAL 0x0000000e
+#define CDF_SIGNED8 0x00000010
+#define CDF_UNSIGNED8 0x00000011
+#define CDF_UNSIGNED16 0x00000012
+#define CDF_UNSIGNED32 0x00000013
+#define CDF_SIGNED64 0x00000014
+#define CDF_UNSIGNED64 0x00000015
+#define CDF_INT 0x00000016
+#define CDF_UINT 0x00000017
+#define CDF_VOID 0x00000018
+#define CDF_HRESULT 0x00000019
+#define CDF_PTR 0x0000001a
+#define CDF_SAFEARRAY 0x0000001b
+#define CDF_CARRAY 0x0000001c
+#define CDF_USERDEFINED 0x0000001d
+#define CDF_LENGTH32_STRING 0x0000001e
+#define CDF_LENGTH32_WSTRING 0x0000001f
+#define CDF_FILETIME 0x00000040
+#define CDF_BLOB 0x00000041
+#define CDF_STREAM 0x00000042
+#define CDF_STORAGE 0x00000043
+#define CDF_STREAMED_OBJECT 0x00000044
+#define CDF_STORED_OBJECT 0x00000045
+#define CDF_BLOB_OBJECT 0x00000046
+#define CDF_CLIPBOARD 0x00000047
+#define CDF_CLSID 0x00000048
+#define CDF_VECTOR 0x00001000
+#define CDF_ARRAY 0x00002000
+#define CDF_BYREF 0x00004000
+#define CDF_RESERVED 0x00008000
+#define CDF_ILLEGAL 0x0000ffff
+#define CDF_ILLEGALMASKED 0x00000fff
+#define CDF_TYPEMASK 0x00000fff
+
+#define CDF_PROPERTY_CODE_PAGE 0x00000001
+#define CDF_PROPERTY_TITLE 0x00000002
+#define CDF_PROPERTY_SUBJECT 0x00000003
+#define CDF_PROPERTY_AUTHOR 0x00000004
+#define CDF_PROPERTY_KEYWORDS 0x00000005
+#define CDF_PROPERTY_COMMENTS 0x00000006
+#define CDF_PROPERTY_TEMPLATE 0x00000007
+#define CDF_PROPERTY_LAST_SAVED_BY 0x00000008
+#define CDF_PROPERTY_REVISION_NUMBER 0x00000009
+#define CDF_PROPERTY_TOTAL_EDITING_TIME 0x0000000a
+#define CDF_PROPERTY_LAST_PRINTED 0X0000000b
+#define CDF_PROPERTY_CREATE_TIME 0x0000000c
+#define CDF_PROPERTY_LAST_SAVED_TIME 0x0000000d
+#define CDF_PROPERTY_NUMBER_OF_PAGES 0x0000000e
+#define CDF_PROPERTY_NUMBER_OF_WORDS 0x0000000f
+#define CDF_PROPERTY_NUMBER_OF_CHARACTERS 0x00000010
+#define CDF_PROPERTY_THUMBNAIL 0x00000011
+#define CDF_PROPERTY_NAME_OF_APPLICATION 0x00000012
+#define CDF_PROPERTY_SECURITY 0x00000013
+#define CDF_PROPERTY_LOCALE_ID 0x80000000
+
+struct timespec;
+int cdf_timestamp_to_timespec(struct timespec *, cdf_timestamp_t);
+int cdf_timespec_to_timestamp(cdf_timestamp_t *, const struct timespec *);
+int cdf_read_header(int, cdf_header_t *);
+void cdf_swap_header(cdf_header_t *);
+void cdf_unpack_header(cdf_header_t *, char *);
+void cdf_swap_dir(cdf_directory_t *);
+void cdf_unpack_dir(cdf_directory_t *, char *);
+void cdf_swap_class(cdf_classid_t *);
+ssize_t cdf_read_sector(int, void *, size_t, size_t, const cdf_header_t *,
+ cdf_secid_t);
+ssize_t cdf_read_short_sector(const cdf_stream_t *, void *, size_t, size_t,
+ const cdf_header_t *, cdf_secid_t);
+int cdf_read_sat(int, cdf_header_t *, cdf_sat_t *);
+size_t cdf_count_chain(const cdf_header_t *, const cdf_sat_t *,
+ cdf_secid_t);
+int cdf_read_long_sector_chain(int, const cdf_header_t *,
+ const cdf_sat_t *, cdf_secid_t, size_t, cdf_stream_t *);
+int cdf_read_short_sector_chain(const cdf_header_t *, const cdf_sat_t *,
+ const cdf_stream_t *, cdf_secid_t, size_t, cdf_stream_t *);
+int cdf_read_sector_chain(int, const cdf_header_t *,
+ const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, cdf_secid_t,
+ size_t, cdf_stream_t *);
+int cdf_read_dir(int, const cdf_header_t *, const cdf_sat_t *, cdf_dir_t *);
+int cdf_read_ssat(int, const cdf_header_t *, const cdf_sat_t *, cdf_sat_t *);
+int cdf_read_short_stream(int, const cdf_header_t *, const cdf_sat_t *,
+ const cdf_dir_t *, cdf_stream_t *);
+int cdf_read_property_info(const cdf_stream_t *, uint32_t,
+ cdf_property_info_t **, size_t *, size_t *);
+int cdf_read_summary_info(int, const cdf_header_t *, const cdf_sat_t *,
+ const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *,
+ cdf_stream_t *);
+int cdf_unpack_summary_info(const cdf_stream_t *, cdf_summary_info_header_t *,
+ cdf_property_info_t **, size_t *);
+int cdf_print_classid(char *, size_t, const cdf_classid_t *);
+int cdf_print_property_name(char *, size_t, uint32_t);
+int cdf_print_elapsed_time(char *, size_t, cdf_timestamp_t);
+uint16_t cdf_tole2(uint16_t);
+uint32_t cdf_tole4(uint32_t);
+uint64_t cdf_tole8(uint64_t);
+
+#ifdef CDF_DEBUG
+void cdf_dump_header(const cdf_header_t *);
+void cdf_dump_sat(const char *, const cdf_header_t *, const cdf_sat_t *);
+void cdf_dump(void *, size_t);
+void cdf_dump_stream(const cdf_header_t *, const cdf_stream_t *);
+void cdf_dump_dir(int, const cdf_header_t *, const cdf_sat_t *,
+ const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *);
+void cdf_dump_property_info(const cdf_property_info_t *, size_t);
+void cdf_dump_summary_info(const cdf_header_t *, const cdf_stream_t *);
+#endif
+
+
+#endif /* _H_CDF_ */
diff --git a/contrib/file/cdf_time.c b/contrib/file/cdf_time.c
new file mode 100644
index 0000000000000..e531b2dee7166
--- /dev/null
+++ b/contrib/file/cdf_time.c
@@ -0,0 +1,182 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: cdf_time.c,v 1.5 2009/02/03 20:27:51 christos Exp $")
+#endif
+
+#include <time.h>
+#ifdef TEST
+#include <err.h>
+#endif
+#include <string.h>
+
+#include "cdf.h"
+
+#define isleap(y) ((((y) % 4) == 0) && \
+ ((((y) % 100) != 0) || (((y) % 400) == 0)))
+
+static const int mdays[] = {
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+};
+
+/*
+ * Return the number of days between jan 01 1601 and jan 01 of year.
+ */
+static int
+cdf_getdays(int year)
+{
+ int days = 0;
+ int y;
+
+ for (y = CDF_BASE_YEAR; y < year; y++)
+ days += isleap(y) + 365;
+
+ return days;
+}
+
+/*
+ * Return the day within the month
+ */
+static int
+cdf_getday(int year, int days)
+{
+ size_t m;
+
+ for (m = 0; m < sizeof(mdays) / sizeof(mdays[0]); m++) {
+ int sub = mdays[m] + (m == 1 && isleap(year));
+ if (days < sub)
+ return days;
+ days -= sub;
+ }
+ return days;
+}
+
+/*
+ * Return the 0...11 month number.
+ */
+static int
+cdf_getmonth(int year, int days)
+{
+ size_t m;
+
+ for (m = 0; m < sizeof(mdays) / sizeof(mdays[0]); m++) {
+ days -= mdays[m];
+ if (m == 1 && isleap(year))
+ days--;
+ if (days <= 0)
+ return (int)m;
+ }
+ return (int)m;
+}
+
+int
+cdf_timestamp_to_timespec(struct timespec *ts, cdf_timestamp_t t)
+{
+ struct tm tm;
+#ifdef HAVE_STRUCT_TM_TM_ZONE
+ static char UTC[] = "UTC";
+#endif
+
+ /* Unit is 100's of nanoseconds */
+ ts->tv_nsec = (t % CDF_TIME_PREC) * 100;
+
+ t /= CDF_TIME_PREC;
+ tm.tm_sec = t % 60;
+ t /= 60;
+
+ tm.tm_min = t % 60;
+ t /= 60;
+
+ tm.tm_hour = t % 24;
+ t /= 24;
+
+ // XXX: Approx
+ tm.tm_year = CDF_BASE_YEAR + (t / 365);
+
+ int rdays = cdf_getdays(tm.tm_year);
+ t -= rdays;
+ tm.tm_mday = cdf_getday(tm.tm_year, t);
+ tm.tm_mon = cdf_getmonth(tm.tm_year, t);
+ tm.tm_wday = 0;
+ tm.tm_yday = 0;
+ tm.tm_isdst = 0;
+#ifdef HAVE_STRUCT_TM_TM_GMTOFF
+ tm.tm_gmtoff = 0;
+#endif
+#ifdef HAVE_STRUCT_TM_TM_ZONE
+ tm.tm_zone = UTC;
+#endif
+ tm.tm_year -= 1900;
+ ts->tv_sec = mktime(&tm);
+ if (ts->tv_sec == -1) {
+ errno = EINVAL;
+ return -1;
+ }
+ return 0;
+}
+
+int
+cdf_timespec_to_timestamp(cdf_timestamp_t *t, const struct timespec *ts)
+{
+ (void)&t;
+ (void)&ts;
+#ifdef notyet
+ struct tm tm;
+ if (gmtime_r(&ts->ts_sec, &tm) == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ *t = (ts->ts_nsec / 100) * CDF_TIME_PREC;
+ *t = tm.tm_sec;
+ *t += tm.tm_min * 60;
+ *t += tm.tm_hour * 60 * 60;
+ *t += tm.tm_mday * 60 * 60 * 24;
+#endif
+ return 0;
+}
+
+
+#ifdef TEST
+int
+main(int argc, char *argv[])
+{
+ struct timespec ts;
+ static const cdf_timestamp_t tst = 0x01A5E403C2D59C00ULL;
+ static const char *ref = "Sat Apr 23 01:30:00 1977";
+ char *p, *q;
+
+ cdf_timestamp_to_timespec(&ts, tst);
+ p = ctime(&ts.tv_sec);
+ if ((q = strchr(p, '\n')) != NULL)
+ *q = '\0';
+ if (strcmp(ref, p) != 0)
+ errx(1, "Error date %s != %s\n", ref, p);
+ return 0;
+}
+#endif
diff --git a/contrib/file/compress.c b/contrib/file/compress.c
index 5867ac94a8f5a..0a30803c40d9b 100644
--- a/contrib/file/compress.c
+++ b/contrib/file/compress.c
@@ -33,15 +33,18 @@
* using method, return sizeof new
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: compress.c,v 1.61 2009/02/03 20:27:51 christos Exp $")
+#endif
+
#include "magic.h"
-#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#include <errno.h>
-#include <sys/types.h>
#include <sys/ioctl.h>
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
@@ -54,11 +57,6 @@
#include <zlib.h>
#endif
-
-#ifndef lint
-FILE_RCSID("@(#)$File: compress.c,v 1.57 2008/07/16 18:00:57 christos Exp $")
-#endif
-
private const struct {
const char magic[8];
size_t maglen;
@@ -77,6 +75,7 @@ private const struct {
{ "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */
/* ...only first file examined */
{ "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */
+ { "LZIP", 4, { "lzip", "-cdq", NULL }, 1 },
};
private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
@@ -237,7 +236,7 @@ file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
char buf[4096];
int r, tfd;
- (void)strcpy(buf, "/tmp/file.XXXXXX");
+ (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
#ifndef HAVE_MKSTEMP
{
char *ptr = mktemp(buf);
diff --git a/contrib/file/config.h.in b/contrib/file/config.h.in
index 26585d825beeb..1b7ac287db547 100644
--- a/contrib/file/config.h.in
+++ b/contrib/file/config.h.in
@@ -78,6 +78,12 @@
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
+/* Define to 1 if you have the `strlcat' function. */
+#undef HAVE_STRLCAT
+
+/* Define to 1 if you have the `strlcpy' function. */
+#undef HAVE_STRLCPY
+
/* Define to 1 if you have the `strndup' function. */
#undef HAVE_STRNDUP
@@ -90,6 +96,12 @@
/* Define to 1 if `st_rdev' is member of `struct stat'. */
#undef HAVE_STRUCT_STAT_ST_RDEV
+/* Define to 1 if `tm_gmtoff' is member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_GMTOFF
+
+/* Define to 1 if `tm_zone' is member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_ZONE
+
/* Define to 1 if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
diff --git a/contrib/file/configure b/contrib/file/configure
index bf5ba58916fff..6b51d2925153b 100755
--- a/contrib/file/configure
+++ b/contrib/file/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for file 4.26.
+# Generated by GNU Autoconf 2.61 for file 5.00.
#
# Report bugs to <christos@astron.com>.
#
@@ -728,8 +728,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='file'
PACKAGE_TARNAME='file'
-PACKAGE_VERSION='4.26'
-PACKAGE_STRING='file 4.26'
+PACKAGE_VERSION='5.00'
+PACKAGE_STRING='file 5.00'
PACKAGE_BUGREPORT='christos@astron.com'
# Factoring default headers for most tests.
@@ -1395,7 +1395,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures file 4.26 to adapt to many kinds of systems.
+\`configure' configures file 5.00 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1465,7 +1465,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of file 4.26:";;
+ short | recursive ) echo "Configuration of file 5.00:";;
esac
cat <<\_ACEOF
@@ -1572,7 +1572,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-file configure 4.26
+file configure 5.00
generated by GNU Autoconf 2.61
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1586,7 +1586,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by file $as_me 4.26, which was
+It was created by file $as_me 5.00, which was
generated by GNU Autoconf 2.61. Invocation command line was
$ $0 $@
@@ -2276,7 +2276,7 @@ fi
# Define the identity of the package.
PACKAGE='file'
- VERSION='4.26'
+ VERSION='5.00'
cat >>confdefs.h <<_ACEOF
@@ -22166,6 +22166,209 @@ _ACEOF
fi
+{ echo "$as_me:$LINENO: checking for struct tm.tm_gmtoff" >&5
+echo $ECHO_N "checking for struct tm.tm_gmtoff... $ECHO_C" >&6; }
+if test "${ac_cv_member_struct_tm_tm_gmtoff+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (ac_aggr.tm_gmtoff)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_member_struct_tm_tm_gmtoff=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (sizeof ac_aggr.tm_gmtoff)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_member_struct_tm_tm_gmtoff=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_member_struct_tm_tm_gmtoff=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_gmtoff" >&5
+echo "${ECHO_T}$ac_cv_member_struct_tm_tm_gmtoff" >&6; }
+if test $ac_cv_member_struct_tm_tm_gmtoff = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_GMTOFF 1
+_ACEOF
+
+
+fi
+{ echo "$as_me:$LINENO: checking for struct tm.tm_zone" >&5
+echo $ECHO_N "checking for struct tm.tm_zone... $ECHO_C" >&6; }
+if test "${ac_cv_member_struct_tm_tm_zone+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (ac_aggr.tm_zone)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_member_struct_tm_tm_zone=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static struct tm ac_aggr;
+if (sizeof ac_aggr.tm_zone)
+return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_member_struct_tm_tm_zone=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_member_struct_tm_tm_zone=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_zone" >&5
+echo "${ECHO_T}$ac_cv_member_struct_tm_tm_zone" >&6; }
+if test $ac_cv_member_struct_tm_tm_zone = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_ZONE 1
+_ACEOF
+
+
+fi
+
{ echo "$as_me:$LINENO: checking for tm_zone in struct tm" >&5
echo $ECHO_N "checking for tm_zone in struct tm... $ECHO_C" >&6; }
if test "${ac_cv_struct_tm_zone+set}" = set; then
@@ -23853,7 +24056,9 @@ done
-for ac_func in getopt_long asprintf vasprintf
+
+
+for ac_func in getopt_long asprintf vasprintf strlcpy strlcat
do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -24469,7 +24674,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by file $as_me 4.26, which was
+This file was extended by file $as_me 5.00, which was
generated by GNU Autoconf 2.61. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -24522,7 +24727,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-file config.status 4.26
+file config.status 5.00
configured by $0, generated by GNU Autoconf 2.61,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/contrib/file/configure.ac b/contrib/file/configure.ac
index ae674ffee4825..071ee28124ce3 100644
--- a/contrib/file/configure.ac
+++ b/contrib/file/configure.ac
@@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(file, 4.26, christos@astron.com)
+AC_INIT(file, 5.00, christos@astron.com)
AM_INIT_AUTOMAKE
AM_CONFIG_HEADER(config.h)
@@ -75,6 +75,8 @@ AC_TYPE_OFF_T
AC_TYPE_SIZE_T
AC_CHECK_MEMBERS([struct stat.st_rdev])
+AC_STRUCT_TM
+AC_CHECK_MEMBERS([struct tm.tm_gmtoff, struct tm.tm_zone])
AC_STRUCT_TIMEZONE_DAYLIGHT
AC_SYS_LARGEFILE
AC_FUNC_FSEEKO
@@ -139,7 +141,7 @@ dnl Checks for functions
AC_CHECK_FUNCS(mmap strerror strndup strtoul mbrtowc mkstemp utimes utime wcwidth strtof)
dnl Provide implementation of some required functions if necessary
-AC_REPLACE_FUNCS(getopt_long asprintf vasprintf)
+AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat)
dnl Checks for libraries
AC_CHECK_LIB(z,gzopen)
diff --git a/contrib/file/encoding.c b/contrib/file/encoding.c
new file mode 100644
index 0000000000000..4e94f9b868f0a
--- /dev/null
+++ b/contrib/file/encoding.c
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) Ian F. Darwin 1986-1995.
+ * Software written by Ian F. Darwin and others;
+ * maintained 1995-present by Christos Zoulas and others.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice immediately at the beginning of the file, without modification,
+ * this list of conditions, and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Encoding -- determine the character encoding of a text file.
+ *
+ * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
+ * international characters.
+ */
+
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
+#include "magic.h"
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+
+private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
+private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
+ size_t *);
+private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
+private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
+private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
+private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
+
+/*
+ * Try to determine whether text is in some character code we can
+ * identify. Each of these tests, if it succeeds, will leave
+ * the text converted into one-unichar-per-character Unicode in
+ * ubuf, and the number of characters converted in ulen.
+ */
+protected int
+file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
+{
+ size_t mlen;
+ int rv = 1, ucs_type;
+ unsigned char *nbuf = NULL;
+
+ mlen = (nbytes + 1) * sizeof(nbuf[0]);
+ if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) {
+ file_oomem(ms, mlen);
+ goto done;
+ }
+ mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
+ if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) {
+ file_oomem(ms, mlen);
+ goto done;
+ }
+
+ *type = "text";
+ if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
+ *code = "ASCII";
+ *code_mime = "us-ascii";
+ } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
+ *code = "UTF-8 Unicode (with BOM)";
+ *code_mime = "utf-8";
+ } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
+ *code = "UTF-8 Unicode";
+ *code_mime = "utf-8";
+ } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
+ if (ucs_type == 1) {
+ *code = "Little-endian UTF-16 Unicode";
+ *code_mime = "utf-16le";
+ } else {
+ *code = "Big-endian UTF-16 Unicode";
+ *code_mime = "utf-16be";
+ }
+ } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
+ *code = "ISO-8859";
+ *code_mime = "iso-8859-1";
+ } else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
+ *code = "Non-ISO extended-ASCII";
+ *code_mime = "unknown-8bit";
+ } else {
+ from_ebcdic(buf, nbytes, nbuf);
+
+ if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
+ *code = "EBCDIC";
+ *code_mime = "ebcdic";
+ } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
+ *code = "International EBCDIC";
+ *code_mime = "ebcdic";
+ } else { /* Doesn't look like text at all */
+ rv = 0;
+ *type = "binary";
+ }
+ }
+
+ done:
+ if (nbuf)
+ free(nbuf);
+
+ return rv;
+}
+
+/*
+ * This table reflects a particular philosophy about what constitutes
+ * "text," and there is room for disagreement about it.
+ *
+ * Version 3.31 of the file command considered a file to be ASCII if
+ * each of its characters was approved by either the isascii() or
+ * isalpha() function. On most systems, this would mean that any
+ * file consisting only of characters in the range 0x00 ... 0x7F
+ * would be called ASCII text, but many systems might reasonably
+ * consider some characters outside this range to be alphabetic,
+ * so the file command would call such characters ASCII. It might
+ * have been more accurate to call this "considered textual on the
+ * local system" than "ASCII."
+ *
+ * It considered a file to be "International language text" if each
+ * of its characters was either an ASCII printing character (according
+ * to the real ASCII standard, not the above test), a character in
+ * the range 0x80 ... 0xFF, or one of the following control characters:
+ * backspace, tab, line feed, vertical tab, form feed, carriage return,
+ * escape. No attempt was made to determine the language in which files
+ * of this type were written.
+ *
+ *
+ * The table below considers a file to be ASCII if all of its characters
+ * are either ASCII printing characters (again, according to the X3.4
+ * standard, not isascii()) or any of the following controls: bell,
+ * backspace, tab, line feed, form feed, carriage return, esc, nextline.
+ *
+ * I include bell because some programs (particularly shell scripts)
+ * use it literally, even though it is rare in normal text. I exclude
+ * vertical tab because it never seems to be used in real text. I also
+ * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
+ * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
+ * character to. It might be more appropriate to include it in the 8859
+ * set instead of the ASCII set, but it's got to be included in *something*
+ * we recognize or EBCDIC files aren't going to be considered textual.
+ * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
+ * and Latin characters, so these should possibly be allowed. But they
+ * make a real mess on VT100-style displays if they're not paired properly,
+ * so we are probably better off not calling them text.
+ *
+ * A file is considered to be ISO-8859 text if its characters are all
+ * either ASCII, according to the above definition, or printing characters
+ * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
+ *
+ * Finally, a file is considered to be international text from some other
+ * character code if its characters are all either ISO-8859 (according to
+ * the above definition) or characters in the range 0x80 ... 0x9F, which
+ * ISO-8859 considers to be control characters but the IBM PC and Macintosh
+ * consider to be printing characters.
+ */
+
+#define F 0 /* character never appears in text */
+#define T 1 /* character appears in plain ASCII text */
+#define I 2 /* character appears in ISO-8859 text */
+#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
+
+private char text_chars[256] = {
+ /* BEL BS HT LF FF CR */
+ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
+ /* ESC */
+ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
+ /* NEL */
+ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
+ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
+};
+
+private int
+looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+ size_t *ulen)
+{
+ size_t i;
+
+ *ulen = 0;
+
+ for (i = 0; i < nbytes; i++) {
+ int t = text_chars[buf[i]];
+
+ if (t != T)
+ return 0;
+
+ ubuf[(*ulen)++] = buf[i];
+ }
+
+ return 1;
+}
+
+private int
+looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+{
+ size_t i;
+
+ *ulen = 0;
+
+ for (i = 0; i < nbytes; i++) {
+ int t = text_chars[buf[i]];
+
+ if (t != T && t != I)
+ return 0;
+
+ ubuf[(*ulen)++] = buf[i];
+ }
+
+ return 1;
+}
+
+private int
+looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+ size_t *ulen)
+{
+ size_t i;
+
+ *ulen = 0;
+
+ for (i = 0; i < nbytes; i++) {
+ int t = text_chars[buf[i]];
+
+ if (t != T && t != I && t != X)
+ return 0;
+
+ ubuf[(*ulen)++] = buf[i];
+ }
+
+ return 1;
+}
+
+/*
+ * Decide whether some text looks like UTF-8. Returns:
+ *
+ * -1: invalid UTF-8
+ * 0: uses odd control characters, so doesn't look like text
+ * 1: 7-bit text
+ * 2: definitely UTF-8 text (valid high-bit set bytes)
+ *
+ * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
+ * ubuf must be big enough!
+ */
+protected int
+file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+{
+ size_t i;
+ int n;
+ unichar c;
+ int gotone = 0, ctrl = 0;
+
+ if (ubuf)
+ *ulen = 0;
+
+ for (i = 0; i < nbytes; i++) {
+ if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
+ /*
+ * Even if the whole file is valid UTF-8 sequences,
+ * still reject it if it uses weird control characters.
+ */
+
+ if (text_chars[buf[i]] != T)
+ ctrl = 1;
+
+ if (ubuf)
+ ubuf[(*ulen)++] = buf[i];
+ } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
+ return -1;
+ } else { /* 11xxxxxx begins UTF-8 */
+ int following;
+
+ if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
+ c = buf[i] & 0x1f;
+ following = 1;
+ } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
+ c = buf[i] & 0x0f;
+ following = 2;
+ } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
+ c = buf[i] & 0x07;
+ following = 3;
+ } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
+ c = buf[i] & 0x03;
+ following = 4;
+ } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
+ c = buf[i] & 0x01;
+ following = 5;
+ } else
+ return -1;
+
+ for (n = 0; n < following; n++) {
+ i++;
+ if (i >= nbytes)
+ goto done;
+
+ if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
+ return -1;
+
+ c = (c << 6) + (buf[i] & 0x3f);
+ }
+
+ if (ubuf)
+ ubuf[(*ulen)++] = c;
+ gotone = 1;
+ }
+ }
+done:
+ return ctrl ? 0 : (gotone ? 2 : 1);
+}
+
+/*
+ * Decide whether some text looks like UTF-8 with BOM. If there is no
+ * BOM, return -1; otherwise return the result of looks_utf8 on the
+ * rest of the text.
+ */
+private int
+looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+ size_t *ulen)
+{
+ if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
+ return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
+ else
+ return -1;
+}
+
+private int
+looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
+ size_t *ulen)
+{
+ int bigend;
+ size_t i;
+
+ if (nbytes < 2)
+ return 0;
+
+ if (buf[0] == 0xff && buf[1] == 0xfe)
+ bigend = 0;
+ else if (buf[0] == 0xfe && buf[1] == 0xff)
+ bigend = 1;
+ else
+ return 0;
+
+ *ulen = 0;
+
+ for (i = 2; i + 1 < nbytes; i += 2) {
+ /* XXX fix to properly handle chars > 65536 */
+
+ if (bigend)
+ ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
+ else
+ ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
+
+ if (ubuf[*ulen - 1] == 0xfffe)
+ return 0;
+ if (ubuf[*ulen - 1] < 128 &&
+ text_chars[(size_t)ubuf[*ulen - 1]] != T)
+ return 0;
+ }
+
+ return 1 + bigend;
+}
+
+#undef F
+#undef T
+#undef I
+#undef X
+
+/*
+ * This table maps each EBCDIC character to an (8-bit extended) ASCII
+ * character, as specified in the rationale for the dd(1) command in
+ * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
+ *
+ * Unfortunately it does not seem to correspond exactly to any of the
+ * five variants of EBCDIC documented in IBM's _Enterprise Systems
+ * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
+ * Edition, July, 1999, pp. I-1 - I-4.
+ *
+ * Fortunately, though, all versions of EBCDIC, including this one, agree
+ * on most of the printing characters that also appear in (7-bit) ASCII.
+ * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
+ *
+ * Fortunately too, there is general agreement that codes 0x00 through
+ * 0x3F represent control characters, 0x41 a nonbreaking space, and the
+ * remainder printing characters.
+ *
+ * This is sufficient to allow us to identify EBCDIC text and to distinguish
+ * between old-style and internationalized examples of text.
+ */
+
+private unsigned char ebcdic_to_ascii[] = {
+ 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
+128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
+144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
+' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
+'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
+'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
+186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
+195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
+202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
+209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
+216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
+'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
+'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
+'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
+'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
+};
+
+#ifdef notdef
+/*
+ * The following EBCDIC-to-ASCII table may relate more closely to reality,
+ * or at least to modern reality. It comes from
+ *
+ * http://ftp.s390.ibm.com/products/oe/bpxqp9.html
+ *
+ * and maps the characters of EBCDIC code page 1047 (the code used for
+ * Unix-derived software on IBM's 390 systems) to the corresponding
+ * characters from ISO 8859-1.
+ *
+ * If this table is used instead of the above one, some of the special
+ * cases for the NEL character can be taken out of the code.
+ */
+
+private unsigned char ebcdic_1047_to_8859[] = {
+0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
+0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
+0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
+0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
+0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
+0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
+0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
+0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
+0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
+0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
+0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
+0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
+0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
+0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
+};
+#endif
+
+/*
+ * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
+ */
+private void
+from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
+{
+ size_t i;
+
+ for (i = 0; i < nbytes; i++) {
+ out[i] = ebcdic_to_ascii[buf[i]];
+ }
+}
diff --git a/contrib/file/file.c b/contrib/file/file.c
index 2f518da99e571..9b4747b2ec6d2 100644
--- a/contrib/file/file.c
+++ b/contrib/file/file.c
@@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -30,15 +30,16 @@
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: file.c,v 1.130 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
-#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
-#include <sys/types.h>
-#include <sys/param.h> /* for MAXPATHLEN */
-#include <sys/stat.h>
#ifdef RESTORE_TIME
# if (__COHERENT__ >= 0x420)
# include <sys/utime.h>
@@ -73,11 +74,6 @@ int getopt_long(int argc, char * const *argv, const char *optstring, const struc
#include "patchlevel.h"
-#ifndef lint
-FILE_RCSID("@(#)$File: file.c,v 1.121 2008/07/03 15:48:18 christos Exp $")
-#endif /* lint */
-
-
#ifdef S_IFLNK
#define SYMLINKFLAG "Lh"
#else
@@ -87,7 +83,7 @@ FILE_RCSID("@(#)$File: file.c,v 1.121 2008/07/03 15:48:18 christos Exp $")
# define USAGE "Usage: %s [-bcik" SYMLINKFLAG "nNrsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n %s -C -m magicfiles\n"
#ifndef MAXPATHLEN
-#define MAXPATHLEN 512
+#define MAXPATHLEN 1024
#endif
private int /* Global command-line options */
@@ -96,21 +92,45 @@ private int /* Global command-line options */
nobuffer = 0, /* Do not buffer stdout */
nulsep = 0; /* Append '\0' to the separator */
-private const char *magicfile = 0; /* where the magic is */
private const char *default_magicfile = MAGIC;
private const char *separator = ":"; /* Default field separator */
+private const char hmagic[] = "/.magic";
+private const struct option long_options[] = {
+#define OPT(shortname, longname, opt, doc) \
+ {longname, opt, NULL, shortname},
+#define OPT_LONGONLY(longname, opt, doc) \
+ {longname, opt, NULL, 0},
+#include "file_opts.h"
+#undef OPT
+#undef OPT_LONGONLY
+ {0, 0, NULL, 0}
+};
+#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0"
-private char *progname; /* used throughout */
+private const struct {
+ const char *name;
+ int value;
+} nv[] = {
+ { "apptype", MAGIC_NO_CHECK_APPTYPE },
+ { "ascii", MAGIC_NO_CHECK_ASCII },
+ { "cdf", MAGIC_NO_CHECK_CDF },
+ { "compress", MAGIC_NO_CHECK_COMPRESS },
+ { "elf", MAGIC_NO_CHECK_ELF },
+ { "encoding", MAGIC_NO_CHECK_ENCODING },
+ { "soft", MAGIC_NO_CHECK_SOFT },
+ { "tar", MAGIC_NO_CHECK_TAR },
+ { "tokens", MAGIC_NO_CHECK_TOKENS },
+};
-private struct magic_set *magic;
+private char *progname; /* used throughout */
-private void unwrap(char *);
private void usage(void);
private void help(void);
-
int main(int, char *[]);
-private void process(const char *, int);
-private void load(const char *, int);
+
+private int unwrap(struct magic_set *, const char *);
+private int process(struct magic_set *ms, const char *, int);
+private struct magic_set *load(const char *, int);
/*
@@ -122,36 +142,12 @@ main(int argc, char *argv[])
int c;
size_t i;
int action = 0, didsomefiles = 0, errflg = 0;
- int flags = 0;
+ int flags = 0, e = 0;
char *home, *usermagic;
- struct stat sb;
- static const char hmagic[] = "/.magic";
-#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0"
+ struct magic_set *magic = NULL;
+ char magicpath[2 * MAXPATHLEN + 2];
int longindex;
- static const struct option long_options[] =
- {
-#define OPT(shortname, longname, opt, doc) \
- {longname, opt, NULL, shortname},
-#define OPT_LONGONLY(longname, opt, doc) \
- {longname, opt, NULL, 0},
-#include "file_opts.h"
-#undef OPT
-#undef OPT_LONGONLY
- {0, 0, NULL, 0}
-};
-
- static const struct {
- const char *name;
- int value;
- } nv[] = {
- { "apptype", MAGIC_NO_CHECK_APPTYPE },
- { "ascii", MAGIC_NO_CHECK_ASCII },
- { "compress", MAGIC_NO_CHECK_COMPRESS },
- { "elf", MAGIC_NO_CHECK_ELF },
- { "soft", MAGIC_NO_CHECK_SOFT },
- { "tar", MAGIC_NO_CHECK_TAR },
- { "tokens", MAGIC_NO_CHECK_TOKENS },
- };
+ const char *magicfile; /* where the magic is */
/* makes islower etc work for other langs */
(void)setlocale(LC_CTYPE, "");
@@ -171,14 +167,12 @@ main(int argc, char *argv[])
magicfile = usermagic;
else
if ((home = getenv("HOME")) != NULL) {
- if ((usermagic = malloc(strlen(home)
- + sizeof(hmagic))) != NULL) {
- (void)strcpy(usermagic, home);
- (void)strcat(usermagic, hmagic);
- if (stat(usermagic, &sb)<0)
- free(usermagic);
- else
- magicfile = usermagic;
+ (void)snprintf(magicpath, sizeof(magicpath), "%s%s",
+ home, hmagic);
+ if (access(magicpath, R_OK) == 0) {
+ (void)snprintf(magicpath, sizeof(magicpath),
+ "%s%s:%s", home, hmagic, magicfile);
+ magicfile = magicpath;
}
}
@@ -194,9 +188,12 @@ main(int argc, char *argv[])
help();
break;
case 10:
- flags |= MAGIC_MIME_TYPE;
+ flags |= MAGIC_APPLE;
break;
case 11:
+ flags |= MAGIC_MIME_TYPE;
+ break;
+ case 12:
flags |= MAGIC_MIME_ENCODING;
break;
}
@@ -226,12 +223,14 @@ main(int argc, char *argv[])
else
flags |= nv[i].value;
break;
-
+
case 'f':
if(action)
usage();
- load(magicfile, flags);
- unwrap(optarg);
+ if (magic == NULL)
+ if ((magic = load(magicfile, flags)) == NULL)
+ return 1;
+ e |= unwrap(magic, optarg);
++didsomefiles;
break;
case 'F':
@@ -289,10 +288,18 @@ main(int argc, char *argv[])
if (errflg) {
usage();
}
+ if (e)
+ return e;
switch(action) {
case FILE_CHECK:
case FILE_COMPILE:
+ /*
+ * Don't try to check/compile ~/.magic unless we explicitly
+ * ask for it.
+ */
+ if (magicfile == magicpath)
+ magicfile = default_magicfile;
magic = magic_open(flags|MAGIC_CHECK);
if (magic == NULL) {
(void)fprintf(stderr, "%s: %s\n", progname,
@@ -304,18 +311,19 @@ main(int argc, char *argv[])
if (c == -1) {
(void)fprintf(stderr, "%s: %s\n", progname,
magic_error(magic));
- return -1;
+ return 1;
}
return 0;
default:
- load(magicfile, flags);
+ if (magic == NULL)
+ if ((magic = load(magicfile, flags)) == NULL)
+ return 1;
break;
}
if (optind == argc) {
- if (!didsomefiles) {
+ if (!didsomefiles)
usage();
- }
}
else {
size_t j, wid, nw;
@@ -332,42 +340,43 @@ main(int argc, char *argv[])
bflag = optind >= argc - 1;
}
for (; optind < argc; optind++)
- process(argv[optind], wid);
+ e |= process(magic, argv[optind], wid);
}
- c = magic->haderr ? 1 : 0;
- magic_close(magic);
- return c;
+ if (magic)
+ magic_close(magic);
+ return e;
}
-private void
+private struct magic_set *
/*ARGSUSED*/
-load(const char *m, int flags)
+load(const char *magicfile, int flags)
{
- if (magic || m == NULL)
- return;
- magic = magic_open(flags);
+ struct magic_set *magic = magic_open(flags);
if (magic == NULL) {
(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
- exit(1);
+ return NULL;
}
if (magic_load(magic, magicfile) == -1) {
(void)fprintf(stderr, "%s: %s\n",
progname, magic_error(magic));
- exit(1);
+ magic_close(magic);
+ return NULL;
}
+ return magic;
}
/*
* unwrap -- read a file of filenames, do each one.
*/
-private void
-unwrap(char *fn)
+private int
+unwrap(struct magic_set *ms, const char *fn)
{
char buf[MAXPATHLEN];
FILE *f;
int wid = 0, cwid;
+ int e = 0;
if (strcmp("-", fn) == 0) {
f = stdin;
@@ -376,7 +385,7 @@ unwrap(char *fn)
if ((f = fopen(fn, "r")) == NULL) {
(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
progname, fn, strerror(errno));
- exit(1);
+ return 1;
}
while (fgets(buf, sizeof(buf), f) != NULL) {
@@ -391,19 +400,20 @@ unwrap(char *fn)
while (fgets(buf, sizeof(buf), f) != NULL) {
buf[strcspn(buf, "\n")] = '\0';
- process(buf, wid);
+ e |= process(ms, buf, wid);
if(nobuffer)
(void)fflush(stdout);
}
(void)fclose(f);
+ return e;
}
/*
* Called for each input file on the command line (or in a list of files)
*/
-private void
-process(const char *inname, int wid)
+private int
+process(struct magic_set *ms, const char *inname, int wid)
{
const char *type;
int std_in = strcmp(inname, "-") == 0;
@@ -418,11 +428,14 @@ process(const char *inname, int wid)
(int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
}
- type = magic_file(magic, std_in ? NULL : inname);
- if (type == NULL)
- (void)printf("ERROR: %s\n", magic_error(magic));
- else
+ type = magic_file(ms, std_in ? NULL : inname);
+ if (type == NULL) {
+ (void)printf("ERROR: %s\n", magic_error(ms));
+ return 1;
+ } else {
(void)printf("%s\n", type);
+ return 0;
+ }
}
size_t
@@ -475,9 +488,9 @@ help(void)
"Determine type of FILEs.\n"
"\n", stderr);
#define OPT(shortname, longname, opt, doc) \
- fprintf(stderr, " -%c, --" longname doc, shortname);
+ fprintf(stderr, " -%c, --" longname doc, shortname);
#define OPT_LONGONLY(longname, opt, doc) \
- fprintf(stderr, " --" longname doc);
+ fprintf(stderr, " --" longname doc);
#include "file_opts.h"
#undef OPT
#undef OPT_LONGONLY
diff --git a/contrib/file/file.h b/contrib/file/file.h
index aab1137b342a1..204f49871380e 100644
--- a/contrib/file/file.h
+++ b/contrib/file/file.h
@@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -27,7 +27,7 @@
*/
/*
* file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.108 2008/07/16 18:00:57 christos Exp $
+ * @(#)$File: file.h,v 1.118 2009/02/03 20:27:51 christos Exp $
*/
#ifndef __file_h__
@@ -48,6 +48,7 @@
#endif
#include <regex.h>
#include <sys/types.h>
+#include <sys/param.h>
/* Do this here and now, because struct stat gets re-defined on solaris */
#include <sys/stat.h>
#include <stdarg.h>
@@ -103,8 +104,8 @@
#define MAXstring 32 /* max leng of "string" types */
#define MAGICNO 0xF11E041C
-#define VERSIONNO 6
-#define FILE_MAGICSIZE (32 * 6)
+#define VERSIONNO 7
+#define FILE_MAGICSIZE 200
#define FILE_LOAD 0
#define FILE_CHECK 1
@@ -122,7 +123,7 @@ union VALUETYPE {
unsigned char us[MAXstring];
float f;
double d;
-};
+};
struct magic {
/* Word 1 */
@@ -134,7 +135,7 @@ struct magic {
#define UNSIGNED 0x08 /* comparison is unsigned */
#define NOSPACE 0x10 /* suppress space character before output */
#define BINTEST 0x20 /* test is for a binary type (set only
- for top-level tests) */
+ for top-level tests) */
#define TEXTTEST 0 /* for passing to file_softmagic */
uint8_t factor;
@@ -183,7 +184,10 @@ struct magic {
#define FILE_DOUBLE 36
#define FILE_BEDOUBLE 37
#define FILE_LEDOUBLE 38
-#define FILE_NAMES_SIZE 39/* size of array to contain all names */
+#define FILE_BEID3 39
+#define FILE_LEID3 40
+#define FILE_INDIRECT 41
+#define FILE_NAMES_SIZE 42/* size of array to contain all names */
#define IS_STRING(t) \
((t) == FILE_STRING || \
@@ -209,7 +213,7 @@ struct magic {
#else
uint8_t dummy;
#endif
- uint8_t factor_op;
+ uint8_t factor_op;
#define FILE_FACTOR_OP_PLUS '+'
#define FILE_FACTOR_OP_MINUS '-'
#define FILE_FACTOR_OP_TIMES '*'
@@ -257,11 +261,13 @@ struct magic {
#define str_range _u._s._count
#define str_flags _u._s._flags
/* Words 9-16 */
- union VALUETYPE value; /* either number or string */
- /* Words 17..31 */
+ union VALUETYPE value; /* either number or string */
+ /* Words 17-24 */
char desc[MAXDESC]; /* description */
- /* Words 32..47 */
+ /* Words 25-32 */
char mimetype[MAXDESC]; /* MIME type */
+ /* Words 33-34 */
+ char apple[8];
};
#define BIT(A) (1 << (A))
@@ -302,7 +308,7 @@ struct level_info {
int last_match;
int last_cond; /* used for error checking by parse() */
#endif
-} *li;
+};
struct magic_set {
struct mlist *mlist;
struct cont {
@@ -315,8 +321,9 @@ struct magic_set {
} o;
uint32_t offset;
int error;
- int flags;
- int haderr;
+ int flags; /* Control magic tests. */
+ int event_flags; /* Note things that happened. */
+#define EVENT_HAD_ERR 0x01
const char *file;
size_t line; /* current magic line number */
@@ -348,11 +355,19 @@ protected int file_printf(struct magic_set *, const char *, ...)
protected int file_reset(struct magic_set *);
protected int file_tryelf(struct magic_set *, int, const unsigned char *,
size_t);
+protected int file_trycdf(struct magic_set *, int, const unsigned char *,
+ size_t);
protected int file_zmagic(struct magic_set *, int, const char *,
const unsigned char *, size_t);
protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
+protected int file_ascmagic_with_encoding(struct magic_set *,
+ const unsigned char *, size_t, unichar *, size_t, const char *,
+ const char *);
+protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
+ unichar **, size_t *, const char **, const char **, const char **);
protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
-protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
+protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
+ int);
protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
protected uint64_t file_signextend(struct magic_set *, struct magic *,
uint64_t);
@@ -397,6 +412,13 @@ int vasprintf(char **, const char *, va_list);
int asprintf(char **ptr, const char *format_string, ...);
#endif
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t siz);
+#endif
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t siz);
+#endif
+
#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
#define QUICK
#endif
@@ -407,12 +429,14 @@ int asprintf(char **ptr, const char *format_string, ...);
#ifndef __cplusplus
#ifdef __GNUC__
-static const char *rcsid(const char *) __attribute__((__used__));
-#endif
+#define FILE_RCSID(id) \
+static const char rcsid[] __attribute__((__used__)) = id;
+#else
#define FILE_RCSID(id) \
static const char *rcsid(const char *p) { \
return rcsid(p = id); \
}
+#endif
#else
#define FILE_RCSID(id)
#endif
diff --git a/contrib/file/file.man b/contrib/file/file.man
index 197700f69ad2b..1215e69e16454 100644
--- a/contrib/file/file.man
+++ b/contrib/file/file.man
@@ -1,5 +1,5 @@
-.\" $File: file.man,v 1.73 2008/02/19 17:58:00 rrt Exp $
-.Dd February 19, 2008
+.\" $File: file.man,v 1.79 2008/11/06 22:49:08 rrt Exp $
+.Dd October 9, 2008
.Dt FILE __CSECTION__
.Os
.Sh NAME
@@ -41,12 +41,12 @@ characters and is probably safe to read on an
terminal),
.Em executable
(the file contains the result of compiling a program
-in a form understandable to some
+in a form understandable to some
.Dv UNIX
kernel or another),
or
.Em data
-meaning anything else (data is usually
+meaning anything else (data is usually
.Sq binary
or non-printable).
Exceptions are well-known file formats (core files, tar archives)
@@ -54,13 +54,13 @@ that are known to contain binary data.
When modifying magic files or the program itself, make sure to
.Em "preserve these keywords" .
Users depend on knowing that all the readable files in a directory
-have the word
-.Dq text
+have the word
+.Sq text
printed.
-Don't do as Berkeley did and change
-.Dq shell commands text
-to
-.Dq shell script .
+Don't do as Berkeley did and change
+.Sq shell commands text
+to
+.Sq shell script .
.Pp
The filesystem tests are based on examining the return from a
.Xr stat 2
@@ -78,16 +78,16 @@ The magic tests are used to check for files with data in
particular fixed formats.
The canonical example of this is a binary executable (compiled program)
.Dv a.out
-file, whose format is defined in
+file, whose format is defined in
.In elf.h ,
.In a.out.h
and possibly
.In exec.h
in the standard include directory.
-These files have a
+These files have a
.Sq "magic number"
stored in a particular place
-near the beginning of the file that tells the
+near the beginning of the file that tells the
.Dv UNIX operating system
that the file is a binary executable, and which of several types thereof.
The concept of a
@@ -116,11 +116,11 @@ ranges and sequences of bytes that constitute printable text
in each set.
If a file passes any of these tests, its character set is reported.
ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
-as
-.Dq text
+as
+.Sq text
because they will be mostly readable on nearly any terminal;
-UTF-16 and EBCDIC are only
-.Dq character data
+UTF-16 and EBCDIC are only
+.Sq character data
because, while
they contain text, it is text that will require translation
before it can be read.
@@ -144,19 +144,19 @@ For example, the keyword
.Em .br
indicates that the file is most likely a
.Xr troff 1
-input file, just as the keyword
+input file, just as the keyword
.Em struct
indicates a C program.
These tests are less reliable than the previous
two groups, so they are performed last.
The language test routines also test for some miscellany
-(such as
+(such as
.Xr tar 1
archives).
.Pp
Any file that cannot be identified as having been written
in any of the character sets listed above is simply said to be
-.Dq data .
+.Sq data .
.Sh OPTIONS
.Bl -tag -width indent
.It Fl b , -brief
@@ -177,40 +177,41 @@ from the list of tests made to determine the file type. Valid test names
are:
.Bl -tag -width
.It apptype
-Check for
.Dv EMX
application type (only on EMX).
-.It ascii
-Check for various types of ascii files.
+.It text
+Various types of text files (this test will try to guess the text encoding, irrespective of the setting of the
+.Sq encoding
+option).
+.It encoding
+Different text encodings for soft magic tests.
+.It tokens
+Looks for known tokens inside text files.
+.It cdf
+Prints details of Compound Document Files.
.It compress
-Don't look for, or inside compressed files.
+Checks for, and looks inside, compressed files.
.It elf
-Don't print elf details.
-.It fortran
-Don't look for fortran sequences inside ascii files.
+Prints ELF file details.
.It soft
-Don't consult magic files.
+Consults magic files.
.It tar
-Don't examine tar files.
-.It token
-Don't look for known tokens inside ascii files.
-.It troff
-Don't look for troff sequences inside ascii files.
+Examines tar files.
.El
.It Fl f , -files-from Ar namefile
-Read the names of the files to be examined from
+Read the names of the files to be examined from
.Ar namefile
-(one per line)
+(one per line)
before the argument list.
-Either
+Either
.Ar namefile
or at least one filename argument must be present;
-to test the standard input, use
+to test the standard input, use
.Sq -
as a filename argument.
.It Fl F , -separator Ar separator
Use the specified string as the separator between the filename and the
-file result returned. Defaults to
+file result returned. Defaults to
.Sq \&: .
.It Fl h , -no-dereference
option causes symlinks not to be followed
@@ -221,17 +222,15 @@ is not defined.
.It Fl i , -mime
Causes the file command to output mime type strings rather than the more
traditional human readable ones. Thus it may say
-.Dq text/plain charset=us-ascii
+.Sq text/plain; charset=us-ascii
rather than
-.Dq ASCII text .
+.Sq ASCII text .
In order for this option to work, file changes the way
it handles files recognized by the command itself (such as many of the
text file types, directories etc), and makes use of an alternative
-.Dq magic
+.Sq magic
file.
-(See
-.Dq FILES
-section, below).
+(See the FILES section, below).
.It Fl -mime-type , -mime-encoding
Like
.Fl i ,
@@ -239,10 +238,10 @@ but print only the specified element(s).
.It Fl k , -keep-going
Don't stop at the first match, keep going. Subsequent matches will be
have the string
-.Dq "\[rs]012\- "
+.Sq "\[rs]012\- "
prepended.
(If you want a newline, see the
-.Dq "\-r"
+.Sq "\-r"
option.)
.It Fl L , -dereference
option causes symlinks to be followed, as the like-named option in
@@ -324,7 +323,7 @@ will not attempt to open
.Pa $HOME/.magic .
.Nm
adds
-.Dq .mgc
+.Sq .mgc
to the value of this variable as appropriate.
The environment variable
.Dv POSIXLY_CORRECT
@@ -347,47 +346,47 @@ options.
.Sh STANDARDS CONFORMANCE
This program is believed to exceed the System V Interface Definition
of FILE(CMD), as near as one can determine from the vague language
-contained therein.
+contained therein.
Its behavior is mostly compatible with the System V program of the same name.
This version knows more magic, however, so it will produce
-different (albeit more accurate) output in many cases.
+different (albeit more accurate) output in many cases.
.\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html
.Pp
-The one significant difference
+The one significant difference
between this version and System V
is that this version treats any white space
as a delimiter, so that spaces in pattern strings must be escaped.
For example,
-.Bd -literal -offset indent
+.Bd -literal -offset indent
>10 string language impress\ (imPRESS data)
.Ed
.Pp
in an existing magic file would have to be changed to
-.Bd -literal -offset indent
+.Bd -literal -offset indent
>10 string language\e impress (imPRESS data)
.Ed
.Pp
In addition, in this version, if a pattern string contains a backslash,
it must be escaped.
For example
-.Bd -literal -offset indent
+.Bd -literal -offset indent
0 string \ebegindata Andrew Toolkit document
.Ed
.Pp
in an existing magic file would have to be changed to
-.Bd -literal -offset indent
+.Bd -literal -offset indent
0 string \e\ebegindata Andrew Toolkit document
.Ed
.Pp
SunOS releases 3.2 and later from Sun Microsystems include a
-.Nm
+.Nm
command derived from the System V one, but with some extensions.
My version differs from Sun's only in minor ways.
-It includes the extension of the
+It includes the extension of the
.Sq &
operator, used as,
for example,
-.Bd -literal -offset indent
+.Bd -literal -offset indent
>16 long&0x7fffffff >0 not stripped
.Ed
.Sh MAGIC DIRECTORY
@@ -395,7 +394,7 @@ The magic file entries have been collected from various sources,
mainly USENET, and contributed by various authors.
Christos Zoulas (address below) will collect additional
or corrected magic file entries.
-A consolidation of magic file entries
+A consolidation of magic file entries
will be distributed periodically.
.Pp
The order of entries in the magic file is significant.
@@ -405,14 +404,14 @@ If your old
.Nm
command uses a magic file,
keep the old magic file around for comparison purposes
-(rename it to
+(rename it to
.Pa __MAGIC__.orig ).
.Sh EXAMPLES
-.Bd -literal -offset indent
+.Bd -literal -offset indent
$ file file.c file /dev/{wd0a,hda}
file.c: C program text
file: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV),
- dynamically linked (uses shared libs), stripped
+ dynamically linked (uses shared libs), stripped
/dev/wd0a: block special (0/0)
/dev/hda: block special (3/0)
@@ -441,9 +440,9 @@ file: application/x-executable
.Ed
.Sh HISTORY
-There has been a
-.Nm
-command in every
+There has been a
+.Nm
+command in every
.Dv UNIX since at least Research Version 4
(man page dated November, 1973).
The System V version introduced one significant major change:
@@ -466,7 +465,7 @@ Primary development and maintenance from 1990 to the present by
Christos Zoulas (christos@astron.com).
.Pp
Altered by Chris Lowth, chris@lowth.com, 2000:
-Handle the
+Handle the
.Fl i
option to output mime type strings, using an alternative
magic file and internal logic.
@@ -480,7 +479,7 @@ support and merge MIME and non-MIME magic, support directories as well
as files of magic, apply many bug fixes and improve the build system.
.Pp
The list of contributors to the
-.Dq magic
+.Sq magic
directory (magic files)
is too long to include here.
You know who you are; thank you.
@@ -512,10 +511,10 @@ files.
The support for text files (primarily for programming languages)
is simplistic, inefficient and requires recompilation to update.
.Pp
-The list of keywords in
+The list of keywords in
.Dv ascmagic
probably belongs in the Magic file.
-This could be done by using some keyword like
+This could be done by using some keyword like
.Sq *
for the offset value.
.Pp
@@ -523,20 +522,20 @@ Complain about conflicts in the magic file entries.
Make a rule that the magic entries sort based on file offset rather
than position within the magic file?
.Pp
-The program should provide a way to give an estimate
-of
-.Dq how good
+The program should provide a way to give an estimate
+of
+.Sq how good
a guess is.
-We end up removing guesses (e.g.
-.Dq From\
+We end up removing guesses (e.g.
+.Sq From\
as first 5 chars of file) because
-they are not as good as other guesses (e.g.
-.Dq Newsgroups:
+they are not as good as other guesses (e.g.
+.Sq Newsgroups:
versus
-.Dq Return-Path:
+.Sq Return-Path:
).
Still, if the others don't pan out, it should be possible to use the
-first guess.
+first guess.
.Pp
This manual page, and particularly this section, is too long.
.Sh RETURN CODE
diff --git a/contrib/file/file_opts.h b/contrib/file/file_opts.h
index 46bc08ae4b4e4..1a73e8732b14f 100644
--- a/contrib/file/file_opts.h
+++ b/contrib/file/file_opts.h
@@ -28,6 +28,7 @@ OPT('f', "files-from", 1, " FILE read the filenames to be examined from FIL
OPT('F', "separator", 1, " STRING use string as separator instead of `:'\n")
OPT('i', "mime", 0, " output MIME type strings (--mime-type and\n"
" --mime-encoding)\n")
+OPT_LONGONLY("apple", 0, " output the Apple CREATOR/TYPE\n")
OPT_LONGONLY("mime-type", 0, " output the MIME type\n")
OPT_LONGONLY("mime-encoding", 0, " output the MIME encoding\n")
OPT('k', "keep-going", 0, " don't stop at the first match\n")
diff --git a/contrib/file/fsmagic.c b/contrib/file/fsmagic.c
index f3b23725aafe7..a68cd109f79ec 100644
--- a/contrib/file/fsmagic.c
+++ b/contrib/file/fsmagic.c
@@ -30,13 +30,17 @@
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: fsmagic.c,v 1.59 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
-#include <sys/stat.h>
/* Since major is a function on SVR4, we cannot use `ifndef major'. */
#ifdef MAJOR_IN_MKDEV
# include <sys/mkdev.h>
@@ -56,10 +60,6 @@
#endif
#undef HAVE_MAJOR
-#ifndef lint
-FILE_RCSID("@(#)$File: fsmagic.c,v 1.52 2008/07/25 23:59:01 rrt Exp $")
-#endif /* lint */
-
private int
bad_link(struct magic_set *ms, int err, char *buf)
{
@@ -84,6 +84,21 @@ bad_link(struct magic_set *ms, int err, char *buf)
return 1;
}
+private int
+handle_mime(struct magic_set *ms, int mime, const char *str)
+{
+ if ((mime & MAGIC_MIME_TYPE)) {
+ if (file_printf(ms, "application/%s", str) == -1)
+ return -1;
+ if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms,
+ "; charset=") == -1)
+ return -1;
+ }
+ if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms, "binary") == -1)
+ return -1;
+ return 0;
+}
+
protected int
file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
{
@@ -95,6 +110,8 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
struct stat tstatbuf;
#endif
+ if (ms->flags & MAGIC_APPLE)
+ return 0;
if (fn == NULL)
return 0;
@@ -140,11 +157,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
switch (sb->st_mode & S_IFMT) {
case S_IFDIR:
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-directory")
- == -1)
- return -1;
- if (!mime && file_printf(ms, "directory") == -1)
+ if (mime) {
+ if (handle_mime(ms, mime, "x-directory") == -1)
+ return -1;
+ } else if (file_printf(ms, "directory") == -1)
return -1;
return 1;
#ifdef S_IFCHR
@@ -156,20 +172,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
*/
if ((ms->flags & MAGIC_DEVICES) != 0)
break;
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-character-device")
- == -1)
- return -1;
- if (!mime) {
+ if (mime) {
+ if (handle_mime(ms, mime, "x-character-device") == -1)
+ return -1;
+ } else {
#ifdef HAVE_STAT_ST_RDEV
# ifdef dv_unit
if (file_printf(ms, "character special (%d/%d/%d)",
- major(sb->st_rdev), dv_unit(sb->st_rdev),
+ major(sb->st_rdev), dv_unit(sb->st_rdev),
dv_subunit(sb->st_rdev)) == -1)
return -1;
# else
if (file_printf(ms, "character special (%ld/%ld)",
- (long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1)
+ (long)major(sb->st_rdev), (long)minor(sb->st_rdev))
+ == -1)
return -1;
# endif
#else
@@ -188,11 +204,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
*/
if ((ms->flags & MAGIC_DEVICES) != 0)
break;
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-block-device")
- == -1)
- return -1;
- if (!mime) {
+ if (mime) {
+ if (handle_mime(ms, mime, "x-block-device") == -1)
+ return -1;
+ } else {
#ifdef HAVE_STAT_ST_RDEV
# ifdef dv_unit
if (file_printf(ms, "block special (%d/%d/%d)",
@@ -216,21 +231,19 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
case S_IFIFO:
if((ms->flags & MAGIC_DEVICES) != 0)
break;
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-fifo")
- == -1)
- return -1;
- if (!mime && file_printf(ms, "fifo (named pipe)") == -1)
+ if (mime) {
+ if (handle_mime(ms, mime, "x-fifo") == -1)
+ return -1;
+ } else if (file_printf(ms, "fifo (named pipe)") == -1)
return -1;
return 1;
#endif
#ifdef S_IFDOOR
case S_IFDOOR:
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-door")
- == -1)
- return -1;
- if (!mime && file_printf(ms, "door") == -1)
+ if (mime) {
+ if (handle_mime(ms, mime, "x-door") == -1)
+ return -1;
+ } else if (file_printf(ms, "door") == -1)
return -1;
return 1;
#endif
@@ -242,11 +255,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
fn);
return -1;
}
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-symlink")
- == -1)
- return -1;
- if (!mime && file_printf(ms,
+ if (mime) {
+ if (handle_mime(ms, mime, "x-symlink") == -1)
+ return -1;
+ } else if (file_printf(ms,
"unreadable symlink `%s' (%s)", fn,
strerror(errno)) == -1)
return -1;
@@ -271,18 +283,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
"path too long: `%s'", buf);
return -1;
}
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-path-too-long")
- == -1)
- return -1;
- if (!mime && file_printf(ms,
+ if (mime) {
+ if (handle_mime(ms, mime,
+ "x-path-too-long") == -1)
+ return -1;
+ } else if (file_printf(ms,
"path too long: `%s'", fn) == -1)
return -1;
return 1;
}
- (void)strcpy(buf2, fn); /* take dir part */
+ /* take dir part */
+ (void)strlcpy(buf2, fn, sizeof buf2);
buf2[tmp - fn + 1] = '\0';
- (void)strcat(buf2, buf); /* plus (rel) link */
+ /* plus (rel) link */
+ (void)strlcat(buf2, buf, sizeof buf2);
tmp = buf2;
}
if (stat(tmp, &tstatbuf) < 0)
@@ -297,11 +311,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
ms->flags |= MAGIC_SYMLINK;
return p != NULL ? 1 : -1;
} else { /* just print what it points to */
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-symlink")
- == -1)
- return -1;
- if (!mime && file_printf(ms, "symbolic link to `%s'",
+ if (mime) {
+ if (handle_mime(ms, mime, "x-symlink") == -1)
+ return -1;
+ } else if (file_printf(ms, "symbolic link to `%s'",
buf) == -1)
return -1;
}
@@ -310,11 +323,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
#ifdef S_IFSOCK
#ifndef __COHERENT__
case S_IFSOCK:
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-socket")
- == -1)
- return -1;
- if (!mime && file_printf(ms, "socket") == -1)
+ if (mime) {
+ if (handle_mime(ms, mime, "x-socket") == -1)
+ return -1;
+ } else if (file_printf(ms, "socket") == -1)
return -1;
return 1;
#endif
@@ -340,9 +352,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
* when we read the file.)
*/
if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) {
- if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
- file_printf(ms, mime ? "application/x-empty" :
- "empty") == -1)
+ if (mime) {
+ if (handle_mime(ms, mime, "x-empty") == -1)
+ return -1;
+ } else if (file_printf(ms, "empty") == -1)
return -1;
return 1;
}
diff --git a/contrib/file/funcs.c b/contrib/file/funcs.c
index af9860517044d..af10688e3b991 100644
--- a/contrib/file/funcs.c
+++ b/contrib/file/funcs.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) Christos Zoulas 2003.
* All Rights Reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -11,7 +11,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,6 +25,11 @@
* SUCH DAMAGE.
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: funcs.c,v 1.51 2008/11/07 18:57:28 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
#include <stdarg.h>
#include <stdlib.h>
@@ -40,10 +45,6 @@
#include <limits.h>
#endif
-#ifndef lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.44 2008/07/16 18:00:57 christos Exp $")
-#endif /* lint */
-
#ifndef SIZE_MAX
#define SIZE_MAX ((size_t)~0)
#endif
@@ -97,17 +98,17 @@ file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
uint32_t lineno)
{
/* Only the first error is ok */
- if (ms->haderr)
+ if (ms->event_flags & EVENT_HAD_ERR)
return;
if (lineno != 0) {
free(ms->o.buf);
ms->o.buf = NULL;
file_printf(ms, "line %u: ", lineno);
}
- file_vprintf(ms, f, va);
+ file_vprintf(ms, f, va);
if (error > 0)
file_printf(ms, " (%s)", strerror(error));
- ms->haderr++;
+ ms->event_flags |= EVENT_HAD_ERR;
ms->error = error;
}
@@ -157,9 +158,16 @@ protected int
file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
size_t nb)
{
- int m;
+ int m = 0, rv = 0, looks_text = 0;
int mime = ms->flags & MAGIC_MIME;
const unsigned char *ubuf = CAST(const unsigned char *, buf);
+ unichar *u8buf = NULL;
+ size_t ulen;
+ const char *code = NULL;
+ const char *code_mime = "binary";
+ const char *type = NULL;
+
+
if (nb == 0) {
if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
@@ -175,6 +183,11 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
return 1;
}
+ if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
+ looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen,
+ &code, &code_mime, &type);
+ }
+
#ifdef __EMX__
if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
switch (file_os2_apptype(ms, inname, buf, nb)) {
@@ -189,41 +202,96 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
#endif
/* try compression stuff */
- if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 ||
- (m = file_zmagic(ms, fd, inname, ubuf, nb)) == 0) {
- /* Check if we have a tar file */
- if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 ||
- (m = file_is_tar(ms, ubuf, nb)) == 0) {
- /* try tests in /etc/magic (or surrogate magic file) */
- if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 ||
- (m = file_softmagic(ms, ubuf, nb, BINTEST)) == 0) {
- /* try known keywords, check whether it is ASCII */
- if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 ||
- (m = file_ascmagic(ms, ubuf, nb)) == 0) {
- /* abandon hope, all ye who remain here */
- if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
- file_printf(ms, mime ? "application/octet-stream" :
- "data") == -1)
- return -1;
- m = 1;
- }
+ if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0)
+ if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) {
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr, "zmagic %d\n", m);
+ goto done;
}
- }
- }
+
+ /* Check if we have a tar file */
+ if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0)
+ if ((m = file_is_tar(ms, ubuf, nb)) != 0) {
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr, "tar %d\n", m);
+ goto done;
+ }
+
+ /* Check if we have a CDF file */
+ if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0)
+ if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) {
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr, "cdf %d\n", m);
+ goto done;
+ }
+
+ /* try soft magic tests */
+ if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
+ if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) {
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr, "softmagic %d\n", m);
#ifdef BUILTIN_ELF
- if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
- nb > 5 && fd != -1) {
- /*
- * We matched something in the file, so this *might*
- * be an ELF file, and the file is at least 5 bytes
- * long, so if it's an ELF file it has at least one
- * byte past the ELF magic number - try extracting
- * information from the ELF headers that cannot easily
- * be extracted with rules in the magic file.
- */
- (void)file_tryelf(ms, fd, ubuf, nb);
- }
+ if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
+ nb > 5 && fd != -1) {
+ /*
+ * We matched something in the file, so this
+ * *might* be an ELF file, and the file is at
+ * least 5 bytes long, so if it's an ELF file
+ * it has at least one byte past the ELF magic
+ * number - try extracting information from the
+ * ELF headers that cannot easily * be
+ * extracted with rules in the magic file.
+ */
+ if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0)
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr,
+ "elf %d\n", m);
+ }
#endif
+ goto done;
+ }
+
+ /* try text properties (and possibly text tokens) */
+ if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
+
+ if ((m = file_ascmagic(ms, ubuf, nb)) != 0) {
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr, "ascmagic %d\n", m);
+ goto done;
+ }
+
+ /* try to discover text encoding */
+ if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
+ if (looks_text == 0)
+ if ((m = file_ascmagic_with_encoding( ms, ubuf,
+ nb, u8buf, ulen, code, type)) != 0) {
+ if ((ms->flags & MAGIC_DEBUG) != 0)
+ (void)fprintf(stderr,
+ "ascmagic/enc %d\n", m);
+ goto done;
+ }
+ }
+ }
+
+ /* give up */
+ m = 1;
+ if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
+ file_printf(ms, mime ? "application/octet-stream" : "data") == -1) {
+ rv = -1;
+ }
+ done:
+ if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
+ if (ms->flags & MAGIC_MIME_TYPE)
+ if (file_printf(ms, "; charset=") == -1)
+ rv = -1;
+ if (file_printf(ms, "%s", code_mime) == -1)
+ rv = -1;
+ }
+ if (u8buf)
+ free(u8buf);
+ if (rv)
+ return rv;
+
return m;
}
#endif
@@ -236,7 +304,7 @@ file_reset(struct magic_set *ms)
return -1;
}
ms->o.buf = NULL;
- ms->haderr = 0;
+ ms->event_flags &= ~EVENT_HAD_ERR;
ms->error = -1;
return 0;
}
@@ -255,12 +323,15 @@ file_getbuffer(struct magic_set *ms)
char *pbuf, *op, *np;
size_t psize, len;
- if (ms->haderr)
+ if (ms->event_flags & EVENT_HAD_ERR)
return NULL;
if (ms->flags & MAGIC_RAW)
return ms->o.buf;
+ if (ms->o.buf == NULL)
+ return NULL;
+
/* * 4 is for octal representation, + 1 is for NUL */
len = strlen(ms->o.buf);
if (len > (SIZE_MAX - 1) / 4) {
@@ -315,7 +386,7 @@ file_getbuffer(struct magic_set *ms)
for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
if (isprint((unsigned char)*op)) {
- *np++ = *op;
+ *np++ = *op;
} else {
OCTALIFY(np, op);
}
diff --git a/contrib/file/getopt_long.c b/contrib/file/getopt_long.c
index 5b9c20b499288..2ad5b4de0a8d6 100644
--- a/contrib/file/getopt_long.c
+++ b/contrib/file/getopt_long.c
@@ -29,14 +29,16 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: getopt_long.c,v 1.5 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include <assert.h>
#ifdef HAVE_ERR_H
#include <err.h>
#else
-#include <stdio.h>
#define warnx printf
#endif
#include <errno.h>
diff --git a/contrib/file/is_tar.c b/contrib/file/is_tar.c
index a9311117ed240..f962edbd8ee34 100644
--- a/contrib/file/is_tar.c
+++ b/contrib/file/is_tar.c
@@ -38,16 +38,16 @@
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: is_tar.c,v 1.36 2009/02/03 20:27:51 christos Exp $")
+#endif
+
#include "magic.h"
#include <string.h>
#include <ctype.h>
-#include <sys/types.h>
#include "tar.h"
-#ifndef lint
-FILE_RCSID("@(#)$File: is_tar.c,v 1.31 2008/02/04 20:51:17 christos Exp $")
-#endif
-
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
private int is_tar(const unsigned char *, size_t);
@@ -66,16 +66,17 @@ file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
* Do the tar test first, because if the first file in the tar
* archive starts with a dot, we can confuse it with an nroff file.
*/
- int tar = is_tar(buf, nbytes);
+ int tar;
int mime = ms->flags & MAGIC_MIME;
- if (tar < 1 || tar > 3)
+ if ((ms->flags & MAGIC_APPLE) != 0)
return 0;
- if (mime == MAGIC_MIME_ENCODING)
+ tar = is_tar(buf, nbytes);
+ if (tar < 1 || tar > 3)
return 0;
- if (file_printf(ms, mime ? "application/x-tar" :
+ if (file_printf(ms, "%s", mime ? "application/x-tar" :
tartype[tar - 1]) == -1)
return -1;
return 1;
diff --git a/contrib/file/libmagic.man b/contrib/file/libmagic.man
index d7082c9904fc3..e39a61053ced9 100644
--- a/contrib/file/libmagic.man
+++ b/contrib/file/libmagic.man
@@ -1,4 +1,4 @@
-.\" $File: libmagic.man,v 1.18 2008/02/28 22:24:46 rrt Exp $
+.\" $File: libmagic.man,v 1.19 2008/10/06 20:16:04 christos Exp $
.\"
.\" Copyright (c) Christos Zoulas 2003.
.\" All Rights Reserved.
@@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd November 15, 2006
+.Dd October 6, 2008
.Dt MAGIC 3
.Os
.Sh NAME
@@ -200,11 +200,8 @@ before any magic queries can performed.
.Pp
The default database file is named by the MAGIC environment variable. If
that variable is not set, the default database file name is __MAGIC__.
-.Pp
.Fn magic_load
adds
-.Dq .mime
-and/or
.Dq .mgc
to the database filename as appropriate.
.Sh RETURN VALUES
@@ -237,11 +234,7 @@ when
.Dv MAGIC_PRESERVE_ATIME
is set.
.Sh FILES
-.Bl -tag -width __MAGIC__.mime.mgc -compact
-.It Pa __MAGIC__.mime
-The non-compiled default magic mime database.
-.It Pa __MAGIC__.mime.mgc
-The compiled default magic mime database.
+.Bl -tag -width __MAGIC__.mgc -compact
.It Pa __MAGIC__
The non-compiled default magic database.
.It Pa __MAGIC__.mgc
diff --git a/contrib/file/magic.c b/contrib/file/magic.c
index 55dfee1b35b24..26b7b864d832f 100644
--- a/contrib/file/magic.c
+++ b/contrib/file/magic.c
@@ -26,15 +26,16 @@
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: magic.c,v 1.59 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
-#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
-#include <sys/types.h>
-#include <sys/param.h> /* for MAXPATHLEN */
-#include <sys/stat.h>
#ifdef QUICK
#include <sys/mman.h>
#endif
@@ -64,10 +65,6 @@
#include "patchlevel.h"
-#ifndef lint
-FILE_RCSID("@(#)$File: magic.c,v 1.54 2008/07/25 23:30:32 rrt Exp $")
-#endif /* lint */
-
#ifndef PIPE_BUF
/* Get the PIPE_BUF from pathconf */
#ifdef _PC_PIPE_BUF
@@ -116,7 +113,7 @@ magic_open(int flags)
if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
goto free;
- ms->haderr = 0;
+ ms->event_flags = 0;
ms->error = -1;
ms->mlist = NULL;
ms->file = "unknown";
@@ -229,7 +226,7 @@ close_and_restore(const struct magic_set *ms, const char *name, int fd,
#elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
struct utimbuf utbuf;
- (void)memset(utbuf, 0, sizeof(utbuf));
+ (void)memset(&utbuf, 0, sizeof(utbuf));
utbuf.actime = sb->st_atime;
utbuf.modtime = sb->st_mtime;
(void) utime(name, &utbuf); /* don't care if loses */
@@ -302,8 +299,9 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
if ((fd = open(inname, flags)) < 0) {
#ifdef __CYGWIN__
/* FIXME: Do this with EXEEXT from autotools */
- char *tmp = alloca(strlen(inname) + 5);
- (void)strcat(strcpy(tmp, inname), ".exe");
+ size_t len = strlen(inname) + 5;
+ char *tmp = alloca(len);
+ (void)strlcat(strlcpy(tmp, inname, len), ".exe", len);
if ((fd = open(tmp, flags)) < 0) {
#endif
if (unreadable_info(ms, sb.st_mode,
@@ -385,13 +383,13 @@ magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
public const char *
magic_error(struct magic_set *ms)
{
- return ms->haderr ? ms->o.buf : NULL;
+ return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
}
public int
magic_errno(struct magic_set *ms)
{
- return ms->haderr ? ms->error : 0;
+ return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
}
public int
diff --git a/contrib/file/magic.h b/contrib/file/magic.h
index ecdd53c1f96bf..a664e9aa39ed5 100644
--- a/contrib/file/magic.h
+++ b/contrib/file/magic.h
@@ -1,7 +1,7 @@
/*
* Copyright (c) Christos Zoulas 2003.
* All Rights Reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -11,7 +11,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -34,21 +34,27 @@
#define MAGIC_SYMLINK 0x000002 /* Follow symlinks */
#define MAGIC_COMPRESS 0x000004 /* Check inside compressed files */
#define MAGIC_DEVICES 0x000008 /* Look at the contents of devices */
-#define MAGIC_MIME_TYPE 0x000010 /* Return only the MIME type */
+#define MAGIC_MIME_TYPE 0x000010 /* Return the MIME type */
#define MAGIC_CONTINUE 0x000020 /* Return all matches */
#define MAGIC_CHECK 0x000040 /* Print warnings to stderr */
#define MAGIC_PRESERVE_ATIME 0x000080 /* Restore access time on exit */
-#define MAGIC_RAW 0x000100 /* Don't translate unprint chars */
+#define MAGIC_RAW 0x000100 /* Don't translate unprintable chars */
#define MAGIC_ERROR 0x000200 /* Handle ENOENT etc as real errors */
-#define MAGIC_MIME_ENCODING 0x000400 /* Return only the MIME encoding */
+#define MAGIC_MIME_ENCODING 0x000400 /* Return the MIME encoding */
#define MAGIC_MIME (MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING)
+#define MAGIC_APPLE 0x000800 /* Return the Apple creator and type */
#define MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */
#define MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */
#define MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */
#define MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */
#define MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */
-#define MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */
-#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */
+#define MAGIC_NO_CHECK_TEXT 0x020000 /* Don't check for text files */
+#define MAGIC_NO_CHECK_CDF 0x040000 /* Don't check for cdf files */
+#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check tokens */
+#define MAGIC_NO_CHECK_ENCODING 0x200000 /* Don't check text encodings */
+
+/* Defined for backwards compatibility (renamed) */
+#define MAGIC_NO_CHECK_ASCII MAGIC_NO_CHECK_TEXT
/* Defined for backwards compatibility; do nothing */
#define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */
diff --git a/contrib/file/magic.man b/contrib/file/magic.man
index 314a01476af35..fb334bc9cfc92 100644
--- a/contrib/file/magic.man
+++ b/contrib/file/magic.man
@@ -1,4 +1,4 @@
-.\" $File: magic.man,v 1.57 2008/08/30 09:50:20 christos Exp $
+.\" $File: magic.man,v 1.59 2008/11/06 23:22:53 christos Exp $
.Dd August 30, 2008
.Dt MAGIC __FSECTION__
.Os
@@ -84,6 +84,8 @@ local time rather than UTC.
.It Dv qldate
An eight-byte value interpreted as a UNIX-style date, but interpreted as
local time rather than UTC.
+.It Dv beid3
+A 32-bit ID3 length in big-endian byte order.
.It Dv beshort
A two-byte value in big-endian byte order.
.It Dv belong
@@ -110,6 +112,8 @@ interpreted as a UNIX-style date, but interpreted as local time rather
than UTC.
.It Dv bestring16
A two-byte unicode (UCS16) string in big-endian byte order.
+.It Dv leid3
+A 32-bit ID3 length in little-endian byte order.
.It Dv leshort
A two-byte value in little-endian byte order.
.It Dv lelong
@@ -145,6 +149,8 @@ interpreted as a UNIX date.
A four-byte value in middle-endian (PDP-11) byte order,
interpreted as a UNIX-style date, but interpreted as local time rather
than UTC.
+.It Dv indirect
+Starting at the given offset, consult the magic database again.
.It Dv regex
A regular expression match in extended POSIX regular expression syntax
(like egrep). Regular expressions can take exponential time to
@@ -290,6 +296,11 @@ added before it: multiple matches are normally separated by a single
space.
.El
.Pp
+An APPLE 4+4 character APPLE creator and type can be specified as:
+.Bd -literal -offset indent
+!:apple CREATYPE
+.Ed
+.Pp
A MIME type is given on a separate line, which must be the next
non-blank or comment line after the magic line that identifies the
file type, and has the following format:
@@ -361,12 +372,12 @@ the file.
The value at that offset is read, and is used again as an offset
in the file.
Indirect offsets are of the form:
-.Em (( x [.[bslBSL]][+\-][ y ]) .
+.Em (( x [.[bislBISL]][+\-][ y ]) .
The value of
.Em x
is used as an offset in the file.
-A byte, short or long is read at that offset depending on the
-.Em [bslBSLm]
+A byte, id3 length, short or long is read at that offset depending on the
+.Em [bislBISLm]
type specifier.
The capitalized types interpret the number as a big endian
value, whereas the small letter versions interpret the number as a little
diff --git a/contrib/file/patchlevel.h b/contrib/file/patchlevel.h
index db6858b5ffab8..b03a8d51017e1 100644
--- a/contrib/file/patchlevel.h
+++ b/contrib/file/patchlevel.h
@@ -1,11 +1,14 @@
-#define FILE_VERSION_MAJOR 4
-#define patchlevel 26
+#define FILE_VERSION_MAJOR 5
+#define patchlevel 0
/*
* Patchlevel file for Ian Darwin's MAGIC command.
- * $File: patchlevel.h,v 1.70 2008/08/30 10:01:01 christos Exp $
+ * $File: patchlevel.h,v 1.71 2009/01/21 19:09:42 christos Exp $
*
* $Log: patchlevel.h,v $
+ * Revision 1.71 2009/01/21 19:09:42 christos
+ * file 5.0
+ *
* Revision 1.70 2008/08/30 10:01:01 christos
* file 4.26
*
diff --git a/contrib/file/print.c b/contrib/file/print.c
index c41e71eac534a..d44a5b3b1be82 100644
--- a/contrib/file/print.c
+++ b/contrib/file/print.c
@@ -30,8 +30,11 @@
*/
#include "file.h"
-#include <stdio.h>
-#include <errno.h>
+
+#ifndef lint
+FILE_RCSID("@(#)$File: print.c,v 1.66 2009/02/03 20:27:51 christos Exp $")
+#endif /* lint */
+
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
@@ -40,10 +43,6 @@
#endif
#include <time.h>
-#ifndef lint
-FILE_RCSID("@(#)$File: print.c,v 1.63 2008/02/17 19:28:54 rrt Exp $")
-#endif /* lint */
-
#define SZOF(a) (sizeof(a) / sizeof(a[0]))
#ifndef COMPILE_ONLY
@@ -64,7 +63,8 @@ file_mdump(struct magic *m)
if (m->in_op & FILE_OPINVERSE)
(void) fputc('~', stderr);
(void) fprintf(stderr, "%c%u),",
- ((m->in_op & FILE_OPS_MASK) < SZOF(optyp)) ?
+ ((size_t)(m->in_op & FILE_OPS_MASK) <
+ SZOF(optyp)) ?
optyp[m->in_op & FILE_OPS_MASK] : '?',
m->in_offset);
}
@@ -93,7 +93,7 @@ file_mdump(struct magic *m)
(void) fprintf(stderr, "/%u", m->str_range);
}
else {
- if ((m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
+ if ((size_t)(m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
(void) fputc(optyp[m->mask_op & FILE_OPS_MASK], stderr);
else
(void) fputc('?', stderr);
diff --git a/contrib/file/readcdf.c b/contrib/file/readcdf.c
new file mode 100644
index 0000000000000..27031dbd9dd3d
--- /dev/null
+++ b/contrib/file/readcdf.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 2008 Christos Zoulas
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: readcdf.c,v 1.11 2009/02/03 20:27:51 christos Exp $")
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+#include "cdf.h"
+#include "magic.h"
+
+#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
+
+private int
+cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
+ size_t count)
+{
+ size_t i;
+ cdf_timestamp_t tp;
+ struct timespec ts;
+ char buf[64];
+ const char *str = "vnd.ms-office";
+ const char *s;
+ int len;
+
+ for (i = 0; i < count; i++) {
+ cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
+ switch (info[i].pi_type) {
+ case CDF_SIGNED16:
+ if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
+ info[i].pi_s16) == -1)
+ return -1;
+ break;
+ case CDF_SIGNED32:
+ if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
+ info[i].pi_s32) == -1)
+ return -1;
+ break;
+ case CDF_UNSIGNED32:
+ if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
+ info[i].pi_u32) == -1)
+ return -1;
+ break;
+ case CDF_LENGTH32_STRING:
+ len = info[i].pi_str.s_len;
+ if (len > 1) {
+ s = info[i].pi_str.s_buf;
+ if (NOTMIME(ms)) {
+ if (file_printf(ms, ", %s: %.*s", buf,
+ len, s) == -1)
+ return -1;
+ } else if (info[i].pi_id ==
+ CDF_PROPERTY_NAME_OF_APPLICATION) {
+ if (strstr(s, "Word"))
+ str = "msword";
+ else if (strstr(s, "Excel"))
+ str = "vnd.ms-excel";
+ else if (strstr(s, "Powerpoint"))
+ str = "vnd.ms-powerpoint";
+ }
+ }
+ break;
+ case CDF_FILETIME:
+ tp = info[i].pi_tp;
+ if (tp != 0) {
+ if (tp < 1000000000000000LL) {
+ char tbuf[64];
+ cdf_print_elapsed_time(tbuf,
+ sizeof(tbuf), tp);
+ if (NOTMIME(ms) && file_printf(ms,
+ ", %s: %s", buf, tbuf) == -1)
+ return -1;
+ } else {
+ char *c, *ec;
+ cdf_timestamp_to_timespec(&ts, tp);
+ c = ctime(&ts.tv_sec);
+ if ((ec = strchr(c, '\n')) != NULL)
+ *ec = '\0';
+
+ if (NOTMIME(ms) && file_printf(ms,
+ ", %s: %s", buf, c) == -1)
+ return -1;
+ }
+ }
+ break;
+ case CDF_CLIPBOARD:
+ break;
+ default:
+ file_error(ms, 0, "Internal parsing error");
+ return -1;
+ }
+ }
+ if (!NOTMIME(ms)) {
+ if (file_printf(ms, "application/%s", str) == -1)
+ return -1;
+ }
+ return 1;
+}
+
+private int
+cdf_file_summary_info(struct magic_set *ms, const cdf_stream_t *sst)
+{
+ cdf_summary_info_header_t si;
+ cdf_property_info_t *info;
+ size_t count;
+ int m;
+
+ if (cdf_unpack_summary_info(sst, &si, &info, &count) == -1) {
+ if (si.si_byte_order != 0xfffe)
+ return 0;
+ else
+ return -1;
+ }
+
+ if (si.si_byte_order != 0xfffe)
+ return 0;
+
+ if (NOTMIME(ms)) {
+ if (file_printf(ms, "CDF V2 Document") == -1)
+ return -1;
+
+ if (file_printf(ms, ", %s Endian",
+ si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
+ return -1;
+ switch (si.si_os) {
+ case 2:
+ if (file_printf(ms, ", Os: Windows, Version %d.%d",
+ si.si_os_version & 0xff, si.si_os_version >> 8)
+ == -1)
+ return -1;
+ break;
+ case 1:
+ if (file_printf(ms, ", Os: MacOS, Version %d.%d",
+ si.si_os_version >> 8, si.si_os_version & 0xff)
+ == -1)
+ return -1;
+ break;
+ default:
+ if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
+ si.si_os_version & 0xff, si.si_os_version >> 8)
+ == -1)
+ return -1;
+ break;
+ }
+ }
+
+ m = cdf_file_property_info(ms, info, count);
+ free(info);
+
+ return m;
+}
+
+protected int
+file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
+ size_t nbytes)
+{
+ cdf_header_t h;
+ cdf_sat_t sat, ssat;
+ cdf_stream_t sst, scn;
+ cdf_dir_t dir;
+ int i;
+ (void)&nbytes;
+ (void)&buf;
+
+ if (ms->flags & MAGIC_APPLE)
+ return 0;
+ if (cdf_read_header(fd, &h) == -1)
+ return 0;
+#ifdef CDF_DEBUG
+ cdf_dump_header(&h);
+#endif
+
+ if (cdf_read_sat(fd, &h, &sat) == -1) {
+ file_error(ms, errno, "Can't read SAT");
+ return -1;
+ }
+#ifdef CDF_DEBUG
+ cdf_dump_sat("SAT", &h, &sat);
+#endif
+
+ if ((i = cdf_read_ssat(fd, &h, &sat, &ssat)) == -1) {
+ file_error(ms, errno, "Can't read SAT");
+ goto out1;
+ }
+#ifdef CDF_DEBUG
+ cdf_dump_sat("SSAT", &h, &ssat);
+#endif
+
+ if ((i = cdf_read_dir(fd, &h, &sat, &dir)) == -1) {
+ file_error(ms, errno, "Can't read directory");
+ goto out2;
+ }
+
+ if ((i = cdf_read_short_stream(fd, &h, &sat, &dir, &sst)) == -1) {
+ file_error(ms, errno, "Cannot read short stream");
+ goto out3;
+ }
+
+#ifdef CDF_DEBUG
+ cdf_dump_dir(fd, &h, &sat, &ssat, &sst, &dir);
+#endif
+ if ((i = cdf_read_summary_info(fd, &h, &sat, &ssat, &sst, &dir, &scn))
+ == -1) {
+ /* Some files don't have summary info! */
+#ifdef notyet
+ file_error(ms, errno, "Can't read summary_info");
+#else
+ i = 0;
+#endif
+ goto out4;
+ }
+#ifdef CDF_DEBUG
+ cdf_dump_summary_info(&h, &scn);
+#endif
+ if ((i = cdf_file_summary_info(ms, &scn)) == -1)
+ file_error(ms, errno, "Can't expand summary_info");
+ free(scn.sst_tab);
+out4:
+ free(sst.sst_tab);
+out3:
+ free(dir.dir_tab);
+out2:
+ free(ssat.sat_tab);
+out1:
+ free(sat.sat_tab);
+ return i;
+}
diff --git a/contrib/file/readelf.c b/contrib/file/readelf.c
index 9dcaf09cdabc0..6f0b328cac805 100644
--- a/contrib/file/readelf.c
+++ b/contrib/file/readelf.c
@@ -26,6 +26,10 @@
*/
#include "file.h"
+#ifndef lint
+FILE_RCSID("@(#)$File: readelf.c,v 1.81 2008/11/04 16:38:28 christos Exp $")
+#endif
+
#ifdef BUILTIN_ELF
#include <string.h>
#include <ctype.h>
@@ -37,10 +41,6 @@
#include "readelf.h"
#include "magic.h"
-#ifndef lint
-FILE_RCSID("@(#)$File: readelf.c,v 1.76 2008/07/16 18:00:57 christos Exp $")
-#endif
-
#ifdef ELFCORE
private int dophn_core(struct magic_set *, int, int, int, off_t, int, size_t,
off_t, int *);
@@ -875,7 +875,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
noff = 0;
for (;;) {
- if (noff >= (size_t)xsh_size)
+ if (noff >= (off_t)xsh_size)
break;
noff = donote(ms, nbuf, (size_t)noff,
(size_t)xsh_size, clazz, swap, 4,
@@ -907,8 +907,9 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
for (;;) {
Elf32_Cap cap32;
Elf64_Cap cap64;
- char cbuf[MAX(sizeof cap32, sizeof cap64)];
- if ((coff += xcap_sizeof) >= (size_t)xsh_size)
+ char cbuf[/*CONSTCOND*/
+ MAX(sizeof cap32, sizeof cap64)];
+ if ((coff += xcap_sizeof) >= (off_t)xsh_size)
break;
if (read(fd, cbuf, (size_t)xcap_sizeof) !=
(ssize_t)xcap_sizeof) {
@@ -929,7 +930,8 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
if (file_printf(ms,
", with unknown capability "
"0x%llx = 0x%llx",
- xcap_tag, xcap_val) == -1)
+ (unsigned long long)xcap_tag,
+ (unsigned long long)xcap_val) == -1)
return -1;
break;
}
@@ -976,11 +978,12 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
if (cap_hw1)
if (file_printf(ms,
" unknown hardware capability 0x%llx",
- cap_hw1) == -1)
+ (unsigned long long)cap_hw1) == -1)
return -1;
} else {
if (file_printf(ms,
- " hardware capability 0x%llx", cap_hw1) == -1)
+ " hardware capability 0x%llx",
+ (unsigned long long)cap_hw1) == -1)
return -1;
}
}
@@ -996,7 +999,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
if (cap_sf1)
if (file_printf(ms,
", with unknown software capability 0x%llx",
- cap_sf1) == -1)
+ (unsigned long long)cap_sf1) == -1)
return -1;
}
return 0;
@@ -1138,7 +1141,7 @@ file_tryelf(struct magic_set *ms, int fd, const unsigned char *buf,
Elf64_Ehdr elf64hdr;
uint16_t type;
- if (ms->flags & MAGIC_MIME)
+ if (ms->flags & (MAGIC_MIME|MAGIC_APPLE))
return 0;
/*
* ELF executables have multiple section headers in arbitrary
diff --git a/contrib/file/softmagic.c b/contrib/file/softmagic.c
index 39a7fc856520c..daf9ccd6d506e 100644
--- a/contrib/file/softmagic.c
+++ b/contrib/file/softmagic.c
@@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -30,6 +30,11 @@
*/
#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: softmagic.c,v 1.133 2008/11/07 22:50:37 christos Exp $")
+#endif /* lint */
+
#include "magic.h"
#include <string.h>
#include <ctype.h>
@@ -37,33 +42,25 @@
#include <time.h>
-#ifndef lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.120 2008/07/28 17:25:21 christos Exp $")
-#endif /* lint */
-
private int match(struct magic_set *, struct magic *, uint32_t,
const unsigned char *, size_t, int);
private int mget(struct magic_set *, const unsigned char *,
struct magic *, size_t, unsigned int);
private int magiccheck(struct magic_set *, struct magic *);
private int32_t mprint(struct magic_set *, struct magic *);
+private int32_t moffset(struct magic_set *, struct magic *);
private void mdebug(uint32_t, const char *, size_t);
private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
const unsigned char *, uint32_t, size_t, size_t);
private int mconvert(struct magic_set *, struct magic *);
private int print_sep(struct magic_set *, int);
+private int handle_annotation(struct magic_set *, struct magic *);
private void cvt_8(union VALUETYPE *, const struct magic *);
private void cvt_16(union VALUETYPE *, const struct magic *);
private void cvt_32(union VALUETYPE *, const struct magic *);
private void cvt_64(union VALUETYPE *, const struct magic *);
/*
- * Macro to give description string according to whether we want plain
- * text or MIME type
- */
-#define MAGIC_DESC ((ms->flags & MAGIC_MIME) ? m->mimetype : m->desc)
-
-/*
* softmagic - lookup one file in parsed, in-memory copy of database
* Passed the name and FILE * of one file to be typed.
*/
@@ -114,15 +111,16 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
uint32_t magindex = 0;
unsigned int cont_level = 0;
int need_separator = 0;
- int returnval = 0; /* if a match is found it is set to 1*/
+ int returnval = 0, e; /* if a match is found it is set to 1*/
int firstline = 1; /* a flag to print X\n X\n- X */
int printed_something = 0;
+ int print = (ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0;
if (file_check_mem(ms, cont_level) == -1)
return -1;
for (magindex = 0; magindex < nmagic; magindex++) {
- int flush;
+ int flush = 0;
struct magic *m = &magic[magindex];
if ((m->flag & BINTEST) != mode) {
@@ -137,11 +135,16 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
ms->line = m->lineno;
/* if main entry matches, print it... */
- flush = !mget(ms, s, m, nbytes, cont_level);
- if (flush) {
- if (m->reln == '!')
- flush = 0;
- } else {
+ switch (mget(ms, s, m, nbytes, cont_level)) {
+ case -1:
+ return -1;
+ case 0:
+ flush = m->reln != '!';
+ break;
+ default:
+ if (m->type == FILE_INDIRECT)
+ returnval = 1;
+
switch (magiccheck(ms, m)) {
case -1:
return -1;
@@ -149,11 +152,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
flush++;
break;
default:
+ flush = 0;
break;
}
+ break;
}
if (flush) {
- /*
+ /*
* main entry didn't match,
* flush its continuations
*/
@@ -167,16 +172,21 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
* If we are going to print something, we'll need to print
* a blank before we print something else.
*/
- if (*MAGIC_DESC) {
+ if (*m->desc) {
need_separator = 1;
printed_something = 1;
+ if ((e = handle_annotation(ms, m)) != 0)
+ return e;
if (print_sep(ms, firstline) == -1)
return -1;
}
- if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
+
+ if (print && mprint(ms, m) == -1)
return -1;
+ ms->c.li[cont_level].off = moffset(ms, m);
+
/* and any continuations that match */
if (file_check_mem(ms, ++cont_level) == -1)
return -1;
@@ -208,10 +218,21 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
continue;
}
#endif
- flush = !mget(ms, s, m, nbytes, cont_level);
- if (flush && m->reln != '!')
- continue;
-
+ switch (mget(ms, s, m, nbytes, cont_level)) {
+ case -1:
+ return -1;
+ case 0:
+ if (m->reln != '!')
+ continue;
+ flush = 1;
+ break;
+ default:
+ if (m->type == FILE_INDIRECT)
+ returnval = 1;
+ flush = 0;
+ break;
+ }
+
switch (flush ? 1 : magiccheck(ms, m)) {
case -1:
return -1;
@@ -234,8 +255,10 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
* If we are going to print something,
* make sure that we have a separator first.
*/
- if (*MAGIC_DESC) {
+ if (*m->desc) {
printed_something = 1;
+ if ((e = handle_annotation(ms, m)) != 0)
+ return e;
if (print_sep(ms, firstline) == -1)
return -1;
}
@@ -248,14 +271,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
/* space if previous printed */
if (need_separator
&& ((m->flag & NOSPACE) == 0)
- && *MAGIC_DESC) {
- if (file_printf(ms, " ") == -1)
+ && *m->desc) {
+ if (print &&
+ file_printf(ms, " ") == -1)
return -1;
need_separator = 0;
}
- if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
+ if (print && mprint(ms, m) == -1)
return -1;
- if (*MAGIC_DESC)
+
+ ms->c.li[cont_level].off = moffset(ms, m);
+
+ if (*m->desc)
need_separator = 1;
/*
@@ -270,11 +297,12 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
}
if (printed_something) {
firstline = 0;
- returnval = 1;
+ if (print)
+ returnval = 1;
}
if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) {
- return 1; /* don't keep searching */
- }
+ return returnval; /* don't keep searching */
+ }
}
return returnval; /* This is hit if -k is set or there is no match */
}
@@ -285,7 +313,7 @@ check_fmt(struct magic_set *ms, struct magic *m)
regex_t rx;
int rc;
- if (strchr(MAGIC_DESC, '%') == NULL)
+ if (strchr(m->desc, '%') == NULL)
return 0;
rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
@@ -295,7 +323,7 @@ check_fmt(struct magic_set *ms, struct magic *m)
file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
return -1;
} else {
- rc = regexec(&rx, MAGIC_DESC, 0, 0, 0);
+ rc = regexec(&rx, m->desc, 0, 0, 0);
regfree(&rx);
return !rc;
}
@@ -328,7 +356,7 @@ mprint(struct magic_set *ms, struct magic *m)
float vf;
double vd;
int64_t t = 0;
- char *buf;
+ char buf[128];
union VALUETYPE *p = &ms->ms_value;
switch (m->type) {
@@ -338,13 +366,13 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
- if (asprintf(&buf, "%c", (unsigned char)v) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
+ (void)snprintf(buf, sizeof(buf), "%c",
+ (unsigned char)v);
+ if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
- if (file_printf(ms, MAGIC_DESC, (unsigned char) v) == -1)
+ if (file_printf(ms, m->desc, (unsigned char) v) == -1)
return -1;
break;
}
@@ -359,13 +387,14 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
- if (asprintf(&buf, "%hu", (unsigned short)v) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
+ (void)snprintf(buf, sizeof(buf), "%hu",
+ (unsigned short)v);
+ if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
- if (file_printf(ms, MAGIC_DESC, (unsigned short) v) == -1)
+ if (
+ file_printf(ms, m->desc, (unsigned short) v) == -1)
return -1;
break;
}
@@ -381,13 +410,12 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
- if (asprintf(&buf, "%u", (uint32_t)v) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
+ (void)snprintf(buf, sizeof(buf), "%u", (uint32_t)v);
+ if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
- if (file_printf(ms, MAGIC_DESC, (uint32_t) v) == -1)
+ if (file_printf(ms, m->desc, (uint32_t) v) == -1)
return -1;
break;
}
@@ -398,7 +426,7 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BEQUAD:
case FILE_LEQUAD:
v = file_signextend(ms, m, p->q);
- if (file_printf(ms, MAGIC_DESC, (uint64_t) v) == -1)
+ if (file_printf(ms, m->desc, (uint64_t) v) == -1)
return -1;
t = ms->offset + sizeof(int64_t);
break;
@@ -408,14 +436,14 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BESTRING16:
case FILE_LESTRING16:
if (m->reln == '=' || m->reln == '!') {
- if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
+ if (file_printf(ms, m->desc, m->value.s) == -1)
return -1;
t = ms->offset + m->vallen;
}
else {
if (*m->value.s == '\0')
p->s[strcspn(p->s, "\n")] = '\0';
- if (file_printf(ms, MAGIC_DESC, p->s) == -1)
+ if (file_printf(ms, m->desc, p->s) == -1)
return -1;
t = ms->offset + strlen(p->s);
if (m->type == FILE_PSTRING)
@@ -427,7 +455,7 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BEDATE:
case FILE_LEDATE:
case FILE_MEDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 1)) == -1)
+ if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1)
return -1;
t = ms->offset + sizeof(time_t);
break;
@@ -436,7 +464,7 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BELDATE:
case FILE_LELDATE:
case FILE_MELDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 0)) == -1)
+ if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1)
return -1;
t = ms->offset + sizeof(time_t);
break;
@@ -444,8 +472,8 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_QDATE:
case FILE_BEQDATE:
case FILE_LEQDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 1))
- == -1)
+ if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q,
+ 1)) == -1)
return -1;
t = ms->offset + sizeof(uint64_t);
break;
@@ -453,8 +481,8 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_QLDATE:
case FILE_BEQLDATE:
case FILE_LEQLDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 0))
- == -1)
+ if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q,
+ 0)) == -1)
return -1;
t = ms->offset + sizeof(uint64_t);
break;
@@ -467,13 +495,12 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
- if (asprintf(&buf, "%g", vf) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
+ (void)snprintf(buf, sizeof(buf), "%g", vf);
+ if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
- if (file_printf(ms, MAGIC_DESC, vf) == -1)
+ if (file_printf(ms, m->desc, vf) == -1)
return -1;
break;
}
@@ -488,13 +515,12 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
- if (asprintf(&buf, "%g", vd) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
+ (void)snprintf(buf, sizeof(buf), "%g", vd);
+ if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
- if (file_printf(ms, MAGIC_DESC, vd) == -1)
+ if (file_printf(ms, m->desc, vd) == -1)
return -1;
break;
}
@@ -510,7 +536,7 @@ mprint(struct magic_set *ms, struct magic *m)
file_oomem(ms, ms->search.rm_len);
return -1;
}
- rval = file_printf(ms, MAGIC_DESC, cp);
+ rval = file_printf(ms, m->desc, cp);
free(cp);
if (rval == -1)
@@ -524,7 +550,7 @@ mprint(struct magic_set *ms, struct magic *m)
}
case FILE_SEARCH:
- if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
+ if (file_printf(ms, m->desc, m->value.s) == -1)
return -1;
if ((m->str_flags & REGEX_OFFSET_START))
t = ms->search.offset;
@@ -533,18 +559,118 @@ mprint(struct magic_set *ms, struct magic *m)
break;
case FILE_DEFAULT:
- if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
+ if (file_printf(ms, m->desc, m->value.s) == -1)
return -1;
t = ms->offset;
break;
+ case FILE_INDIRECT:
+ t = ms->offset;
+ break;
+
default:
file_magerror(ms, "invalid m->type (%d) in mprint()", m->type);
return -1;
}
- return(t);
+ return (int32_t)t;
}
+private int32_t
+moffset(struct magic_set *ms, struct magic *m)
+{
+ switch (m->type) {
+ case FILE_BYTE:
+ return ms->offset + sizeof(char);
+
+ case FILE_SHORT:
+ case FILE_BESHORT:
+ case FILE_LESHORT:
+ return ms->offset + sizeof(short);
+
+ case FILE_LONG:
+ case FILE_BELONG:
+ case FILE_LELONG:
+ case FILE_MELONG:
+ return ms->offset + sizeof(int32_t);
+
+ case FILE_QUAD:
+ case FILE_BEQUAD:
+ case FILE_LEQUAD:
+ return ms->offset + sizeof(int64_t);
+
+ case FILE_STRING:
+ case FILE_PSTRING:
+ case FILE_BESTRING16:
+ case FILE_LESTRING16:
+ if (m->reln == '=' || m->reln == '!')
+ return ms->offset + m->vallen;
+ else {
+ union VALUETYPE *p = &ms->ms_value;
+ uint32_t t;
+
+ if (*m->value.s == '\0')
+ p->s[strcspn(p->s, "\n")] = '\0';
+ t = ms->offset + strlen(p->s);
+ if (m->type == FILE_PSTRING)
+ t++;
+ return t;
+ }
+
+ case FILE_DATE:
+ case FILE_BEDATE:
+ case FILE_LEDATE:
+ case FILE_MEDATE:
+ return ms->offset + sizeof(time_t);
+
+ case FILE_LDATE:
+ case FILE_BELDATE:
+ case FILE_LELDATE:
+ case FILE_MELDATE:
+ return ms->offset + sizeof(time_t);
+
+ case FILE_QDATE:
+ case FILE_BEQDATE:
+ case FILE_LEQDATE:
+ return ms->offset + sizeof(uint64_t);
+
+ case FILE_QLDATE:
+ case FILE_BEQLDATE:
+ case FILE_LEQLDATE:
+ return ms->offset + sizeof(uint64_t);
+
+ case FILE_FLOAT:
+ case FILE_BEFLOAT:
+ case FILE_LEFLOAT:
+ return ms->offset + sizeof(float);
+
+ case FILE_DOUBLE:
+ case FILE_BEDOUBLE:
+ case FILE_LEDOUBLE:
+ return ms->offset + sizeof(double);
+ break;
+
+ case FILE_REGEX:
+ if ((m->str_flags & REGEX_OFFSET_START) != 0)
+ return ms->search.offset;
+ else
+ return ms->search.offset + ms->search.rm_len;
+
+ case FILE_SEARCH:
+ if ((m->str_flags & REGEX_OFFSET_START) != 0)
+ return ms->search.offset;
+ else
+ return ms->search.offset + m->vallen;
+
+ case FILE_DEFAULT:
+ return ms->offset;
+
+ case FILE_INDIRECT:
+ return ms->offset;
+
+ default:
+ return 0;
+ }
+}
#define DO_CVT(fld, cast) \
if (m->num_mask) \
@@ -806,6 +932,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
const char *c;
const char *last; /* end of search region */
const char *buf; /* start of search region */
+ const char *end;
size_t lines;
if (s == NULL) {
@@ -814,10 +941,10 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
return 0;
}
buf = (const char *)s + offset;
- last = (const char *)s + nbytes;
+ end = last = (const char *)s + nbytes;
/* mget() guarantees buf <= last */
for (lines = linecnt, b = buf;
- lines && ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r')));
+ lines && ((b = memchr(c = b, '\n', end - b)) || (b = memchr(c, '\r', end - c)));
lines--, b++) {
last = b;
if (b[0] == '\r' && b[1] == '\n')
@@ -825,7 +952,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
}
if (lines)
last = (const char *)s + nbytes;
-
+
ms->search.s = buf;
ms->search.s_len = last - buf;
ms->search.offset = offset;
@@ -838,13 +965,13 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
const unsigned char *esrc = s + nbytes;
char *dst = p->s;
char *edst = &p->s[sizeof(p->s) - 1];
-
+
if (type == FILE_BESTRING16)
src++;
-
+
/* check for pointer overflow */
if (src < s) {
- file_magerror(ms, "invalid offset %zu in mcopy()",
+ file_magerror(ms, "invalid offset %u in mcopy()",
offset);
return -1;
}
@@ -904,7 +1031,9 @@ mget(struct magic_set *ms, const unsigned char *s,
if ((ms->flags & MAGIC_DEBUG) != 0) {
mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
+#ifndef COMPILE_ONLY
file_mdump(m);
+#endif
}
if (m->flag & INDIR) {
@@ -929,9 +1058,11 @@ mget(struct magic_set *ms, const unsigned char *s,
off = q->l;
break;
case FILE_BELONG:
+ case FILE_BEID3:
off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)|
(q->hl[2]<<8)|(q->hl[3]));
break;
+ case FILE_LEID3:
case FILE_LELONG:
off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)|
(q->hl[1]<<8)|(q->hl[0]));
@@ -1119,6 +1250,7 @@ mget(struct magic_set *ms, const unsigned char *s,
offset = ~offset;
break;
case FILE_BELONG:
+ case FILE_BEID3:
if (nbytes < (offset + 4))
return 0;
if (off) {
@@ -1189,6 +1321,7 @@ mget(struct magic_set *ms, const unsigned char *s,
offset = ~offset;
break;
case FILE_LELONG:
+ case FILE_LEID3:
if (nbytes < (offset + 4))
return 0;
if (off) {
@@ -1365,8 +1498,21 @@ mget(struct magic_set *ms, const unsigned char *s,
break;
}
- if (m->flag & INDIROFFADD)
+ switch (m->in_type) {
+ case FILE_LEID3:
+ case FILE_BEID3:
+ offset = ((((offset >> 0) & 0x7f) << 0) |
+ (((offset >> 8) & 0x7f) << 7) |
+ (((offset >> 16) & 0x7f) << 14) |
+ (((offset >> 24) & 0x7f) << 21)) + 10;
+ break;
+ default:
+ break;
+ }
+
+ if (m->flag & INDIROFFADD) {
offset += ms->c.li[cont_level-1].off;
+ }
if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1)
return -1;
ms->offset = offset;
@@ -1374,7 +1520,9 @@ mget(struct magic_set *ms, const unsigned char *s,
if ((ms->flags & MAGIC_DEBUG) != 0) {
mdebug(offset, (char *)(void *)p,
sizeof(union VALUETYPE));
+#ifndef COMPILE_ONLY
file_mdump(m);
+#endif
}
}
@@ -1384,14 +1532,14 @@ mget(struct magic_set *ms, const unsigned char *s,
if (nbytes < (offset + 1)) /* should alway be true */
return 0;
break;
-
+
case FILE_SHORT:
case FILE_BESHORT:
case FILE_LESHORT:
if (nbytes < (offset + 2))
return 0;
break;
-
+
case FILE_LONG:
case FILE_BELONG:
case FILE_LELONG:
@@ -1410,7 +1558,7 @@ mget(struct magic_set *ms, const unsigned char *s,
if (nbytes < (offset + 4))
return 0;
break;
-
+
case FILE_DOUBLE:
case FILE_BEDOUBLE:
case FILE_LEDOUBLE:
@@ -1430,6 +1578,15 @@ mget(struct magic_set *ms, const unsigned char *s,
return 0;
break;
+ case FILE_INDIRECT:
+ if ((ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0 &&
+ file_printf(ms, m->desc) == -1)
+ return -1;
+ if (nbytes < offset)
+ return 0;
+ return file_softmagic(ms, s + offset, nbytes - offset,
+ BINTEST);
+
case FILE_DEFAULT: /* nothing to check */
default:
break;
@@ -1460,7 +1617,7 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
if (0L == flags) { /* normal string: do it fast */
while (len-- > 0)
if ((v = *b++ - *a++) != '\0')
- break;
+ break;
}
else { /* combine the others */
while (len-- > 0) {
@@ -1474,8 +1631,8 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
if ((v = toupper(*b++) - *a++) != '\0')
break;
}
- else if ((flags & STRING_COMPACT_BLANK) &&
- isspace(*a)) {
+ else if ((flags & STRING_COMPACT_BLANK) &&
+ isspace(*a)) {
a++;
if (isspace(*b++)) {
while (isspace(*b))
@@ -1570,26 +1727,27 @@ magiccheck(struct magic_set *ms, struct magic *m)
case 'x':
matched = 1;
break;
-
+
case '!':
matched = fv != fl;
break;
-
+
case '=':
matched = fv == fl;
break;
-
+
case '>':
matched = fv > fl;
break;
-
+
case '<':
matched = fv < fl;
break;
-
+
default:
matched = 0;
- file_magerror(ms, "cannot happen with float: invalid relation `%c'", m->reln);
+ file_magerror(ms, "cannot happen with float: invalid relation `%c'",
+ m->reln);
return -1;
}
return matched;
@@ -1603,23 +1761,23 @@ magiccheck(struct magic_set *ms, struct magic *m)
case 'x':
matched = 1;
break;
-
+
case '!':
matched = dv != dl;
break;
-
+
case '=':
matched = dv == dl;
break;
-
+
case '>':
matched = dv > dl;
break;
-
+
case '<':
matched = dv < dl;
break;
-
+
default:
matched = 0;
file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln);
@@ -1727,6 +1885,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
return -1;
break;
}
+ case FILE_INDIRECT:
+ return 1;
default:
file_magerror(ms, "invalid type %d in magiccheck()", m->type);
return -1;
@@ -1817,12 +1977,30 @@ magiccheck(struct magic_set *ms, struct magic *m)
}
private int
+handle_annotation(struct magic_set *ms, struct magic *m)
+{
+ if (ms->flags & MAGIC_APPLE) {
+ if (file_printf(ms, "%.8s", m->apple) == -1)
+ return -1;
+ return 1;
+ }
+ if ((ms->flags & MAGIC_MIME_TYPE) && m->mimetype[0]) {
+ if (file_printf(ms, "%s", m->mimetype) == -1)
+ return -1;
+ return 1;
+ }
+ return 0;
+}
+
+private int
print_sep(struct magic_set *ms, int firstline)
{
+ if (ms->flags & MAGIC_MIME)
+ return 0;
if (firstline)
return 0;
/*
- * we found another match
+ * we found another match
* put a newline and '-' to do some simple formatting
*/
return file_printf(ms, "\n- ");
diff --git a/contrib/file/vasprintf.c b/contrib/file/vasprintf.c
index 0289c0d88b4f1..3e3af2062a85e 100644
--- a/contrib/file/vasprintf.c
+++ b/contrib/file/vasprintf.c
@@ -105,12 +105,13 @@ A buffer overflow can only occur if your sprintf() do strange things or when
you use strange formats.
*/
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$File: vasprintf.c,v 1.7 2009/02/03 20:27:52 christos Exp $")
+#endif /* lint */
#include <assert.h>
-#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>