diff options
Diffstat (limited to 'usr.bin/mkuzip')
-rw-r--r-- | usr.bin/mkuzip/Makefile | 6 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_cfg.h | 2 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_cloop.h | 2 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_conveyor.c | 7 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_format.h | 7 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_lzma.c | 57 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_lzma.h | 5 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_zlib.c | 51 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_zlib.h | 5 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_zstd.c | 95 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuz_zstd.h | 38 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuzip.8 | 166 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuzip.c | 94 | ||||
-rw-r--r-- | usr.bin/mkuzip/mkuzip.h | 3 |
14 files changed, 407 insertions, 131 deletions
diff --git a/usr.bin/mkuzip/Makefile b/usr.bin/mkuzip/Makefile index ed31b78f2d31..c60fd7f5ca54 100644 --- a/usr.bin/mkuzip/Makefile +++ b/usr.bin/mkuzip/Makefile @@ -3,10 +3,12 @@ PROG= mkuzip MAN= mkuzip.8 SRCS= mkuzip.c mkuz_blockcache.c mkuz_lzma.c mkuz_zlib.c mkuz_conveyor.c \ - mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c + mkuz_blk.c mkuz_fqueue.c mkuz_time.c mkuz_insize.c mkuz_zstd.c + +CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib #CFLAGS+= -DMKUZ_DEBUG -LIBADD= z md lzma pthread +LIBADD= lzma md pthread z zstd .include <bsd.prog.mk> diff --git a/usr.bin/mkuzip/mkuz_cfg.h b/usr.bin/mkuzip/mkuz_cfg.h index fc88ef29198f..a27d98a6ff60 100644 --- a/usr.bin/mkuzip/mkuz_cfg.h +++ b/usr.bin/mkuzip/mkuz_cfg.h @@ -39,4 +39,6 @@ struct mkuz_cfg { const char *iname; off_t isize; const struct mkuz_format *handler; + size_t cbound_blksz; + int comp_level; }; diff --git a/usr.bin/mkuzip/mkuz_cloop.h b/usr.bin/mkuzip/mkuz_cloop.h index 4ed7c5026391..fabf80a53b12 100644 --- a/usr.bin/mkuzip/mkuz_cloop.h +++ b/usr.bin/mkuzip/mkuz_cloop.h @@ -39,9 +39,11 @@ #define CLOOP_MAJVER_2 '2' #define CLOOP_MAJVER_3 '3' +#define CLOOP_MAJVER_4 '4' #define CLOOP_COMP_LIBZ 'V' #define CLOOP_COMP_LZMA 'L' +#define CLOOP_COMP_ZSTD 'Z' struct cloop_header { char magic[CLOOP_MAGIC_LEN]; /* cloop magic */ diff --git a/usr.bin/mkuzip/mkuz_conveyor.c b/usr.bin/mkuzip/mkuz_conveyor.c index 856d445cce50..2cfae1c0775c 100644 --- a/usr.bin/mkuzip/mkuz_conveyor.c +++ b/usr.bin/mkuzip/mkuz_conveyor.c @@ -42,8 +42,8 @@ __FBSDID("$FreeBSD$"); #include "mkuz_conveyor.h" #include "mkuz_cfg.h" #include "mkuzip.h" -#include "mkuz_format.h" #include "mkuz_blk.h" +#include "mkuz_format.h" #include "mkuz_fqueue.h" #include "mkuz_blk_chain.h" @@ -67,7 +67,7 @@ cworker(void *p) cfp = cwp->cfp; cvp = cwp->cvp; free(cwp); - c_ctx = cfp->handler->f_init(cfp->blksz); + c_ctx = cfp->handler->f_init(&cfp->comp_level); for (;;) { iblk = mkuz_fqueue_deq(cvp->wrk_queue); if (iblk == MKUZ_BLK_EOF) { @@ -80,7 +80,8 @@ cworker(void *p) /* All zeroes block */ oblk = mkuz_blk_ctor(0); } else { - oblk = cfp->handler->f_compress(c_ctx, iblk); + oblk = mkuz_blk_ctor(cfp->cbound_blksz); + cfp->handler->f_compress(c_ctx, iblk, oblk); if (cfp->en_dedup != 0) { compute_digest(oblk); } diff --git a/usr.bin/mkuzip/mkuz_format.h b/usr.bin/mkuzip/mkuz_format.h index 817c0121ed85..ddee771eab87 100644 --- a/usr.bin/mkuzip/mkuz_format.h +++ b/usr.bin/mkuzip/mkuz_format.h @@ -26,12 +26,15 @@ * $FreeBSD$ */ -DEFINE_RAW_METHOD(f_init, void *, uint32_t); -DEFINE_RAW_METHOD(f_compress, struct mkuz_blk *, void *, const struct mkuz_blk *); +DEFINE_RAW_METHOD(f_compress_bound, size_t, size_t); +DEFINE_RAW_METHOD(f_init, void *, int *); +DEFINE_RAW_METHOD(f_compress, void, void *, const struct mkuz_blk *, struct mkuz_blk *); struct mkuz_format { + const char *option; const char *magic; const char *default_sufx; + f_compress_bound_t f_compress_bound; f_init_t f_init; f_compress_t f_compress; }; diff --git a/usr.bin/mkuzip/mkuz_lzma.c b/usr.bin/mkuzip/mkuz_lzma.c index 8810d2ef0c56..bab2820f7c38 100644 --- a/usr.bin/mkuzip/mkuz_lzma.c +++ b/usr.bin/mkuzip/mkuz_lzma.c @@ -35,61 +35,55 @@ __FBSDID("$FreeBSD$"); #include <lzma.h> #include "mkuzip.h" -#include "mkuz_lzma.h" #include "mkuz_blk.h" - -#define USED_BLOCKSIZE DEV_BSIZE +#include "mkuz_lzma.h" struct mkuz_lzma { lzma_filter filters[2]; lzma_options_lzma opt_lzma; lzma_stream strm; - uint32_t blksz; }; -static const lzma_stream lzma_stream_init = LZMA_STREAM_INIT; +size_t +mkuz_lzma_cbound(size_t blksz) +{ + return (lzma_stream_buffer_bound(blksz)); +} void * -mkuz_lzma_init(uint32_t blksz) +mkuz_lzma_init(int *comp_level) { struct mkuz_lzma *ulp; - if (blksz % USED_BLOCKSIZE != 0) { - errx(1, "cluster size should be multiple of %d", - USED_BLOCKSIZE); - /* Not reached */ - } - if (blksz > MAXPHYS) { - errx(1, "cluster size is too large"); + if (*comp_level == USE_DEFAULT_LEVEL) + *comp_level = LZMA_PRESET_DEFAULT; + if (*comp_level < 0 || *comp_level > 9) + errx(1, "provided compression level %d is invalid", + *comp_level); /* Not reached */ - } + ulp = mkuz_safe_zmalloc(sizeof(struct mkuz_lzma)); /* Init lzma encoder */ - ulp->strm = lzma_stream_init; - if (lzma_lzma_preset(&ulp->opt_lzma, LZMA_PRESET_DEFAULT)) + ulp->strm = (lzma_stream)LZMA_STREAM_INIT; + if (lzma_lzma_preset(&ulp->opt_lzma, *comp_level)) errx(1, "Error loading LZMA preset"); ulp->filters[0].id = LZMA_FILTER_LZMA2; ulp->filters[0].options = &ulp->opt_lzma; ulp->filters[1].id = LZMA_VLI_UNKNOWN; - ulp->blksz = blksz; - return (void *)ulp; } -struct mkuz_blk * -mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk) +void +mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk) { lzma_ret ret; - struct mkuz_blk *rval; struct mkuz_lzma *ulp; ulp = (struct mkuz_lzma *)p; - rval = mkuz_blk_ctor(ulp->blksz * 2); - ret = lzma_stream_encoder(&ulp->strm, ulp->filters, LZMA_CHECK_CRC32); if (ret != LZMA_OK) { if (ret == LZMA_MEMLIMIT_ERROR) @@ -99,23 +93,20 @@ mkuz_lzma_compress(void *p, const struct mkuz_blk *iblk) } ulp->strm.next_in = iblk->data; - ulp->strm.avail_in = ulp->blksz; - ulp->strm.next_out = rval->data; - ulp->strm.avail_out = rval->alen; + ulp->strm.avail_in = iblk->info.len; + ulp->strm.next_out = oblk->data; + ulp->strm.avail_out = oblk->alen; ret = lzma_code(&ulp->strm, LZMA_FINISH); - if (ret != LZMA_STREAM_END) { - /* Error */ + if (ret != LZMA_STREAM_END) errx(1, "lzma_code FINISH failed, code=%d, pos(in=%zd, " - "out=%zd)", ret, (ulp->blksz - ulp->strm.avail_in), - (ulp->blksz * 2 - ulp->strm.avail_out)); - } + "out=%zd)", ret, (iblk->info.len - ulp->strm.avail_in), + (oblk->alen - ulp->strm.avail_out)); #if 0 lzma_end(&ulp->strm); #endif - rval->info.len = rval->alen - ulp->strm.avail_out; - return (rval); + oblk->info.len = oblk->alen - ulp->strm.avail_out; } diff --git a/usr.bin/mkuzip/mkuz_lzma.h b/usr.bin/mkuzip/mkuz_lzma.h index bba45425343c..920acc67164d 100644 --- a/usr.bin/mkuzip/mkuz_lzma.h +++ b/usr.bin/mkuzip/mkuz_lzma.h @@ -38,5 +38,6 @@ "exit $?\n" #define DEFAULT_SUFX_LZMA ".ulzma" -void *mkuz_lzma_init(uint32_t); -struct mkuz_blk *mkuz_lzma_compress(void *, const struct mkuz_blk *); +size_t mkuz_lzma_cbound(size_t); +void *mkuz_lzma_init(int *); +void mkuz_lzma_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuz_zlib.c b/usr.bin/mkuzip/mkuz_zlib.c index 4b191f945cda..fa2519ffb60d 100644 --- a/usr.bin/mkuzip/mkuz_zlib.c +++ b/usr.bin/mkuzip/mkuz_zlib.c @@ -34,54 +34,51 @@ __FBSDID("$FreeBSD$"); #include <zlib.h> #include "mkuzip.h" -#include "mkuz_zlib.h" #include "mkuz_blk.h" +#include "mkuz_zlib.h" struct mkuz_zlib { - uLongf oblen; - uint32_t blksz; + int comp_level; }; +size_t +mkuz_zlib_cbound(size_t blksz) +{ + return (compressBound(blksz)); +} + void * -mkuz_zlib_init(uint32_t blksz) +mkuz_zlib_init(int *comp_level) { struct mkuz_zlib *zp; - if (blksz % DEV_BSIZE != 0) { - errx(1, "cluster size should be multiple of %d", - DEV_BSIZE); + if (*comp_level == USE_DEFAULT_LEVEL) + *comp_level = Z_BEST_COMPRESSION; + if (*comp_level < Z_BEST_SPEED || *comp_level > Z_BEST_COMPRESSION) + errx(1, "provided compression level %d is invalid", + *comp_level); /* Not reached */ - } - if (compressBound(blksz) > MAXPHYS) { - errx(1, "cluster size is too large"); - /* Not reached */ - } + zp = mkuz_safe_zmalloc(sizeof(struct mkuz_zlib)); - zp->oblen = compressBound(blksz); - zp->blksz = blksz; + zp->comp_level = *comp_level; - return (void *)zp; + return (zp); } -struct mkuz_blk * -mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk) +void +mkuz_zlib_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk) { uLongf destlen_z; - struct mkuz_blk *rval; struct mkuz_zlib *zp; zp = (struct mkuz_zlib *)p; - rval = mkuz_blk_ctor(zp->oblen); - - destlen_z = rval->alen; - if (compress2(rval->data, &destlen_z, iblk->data, zp->blksz, - Z_BEST_COMPRESSION) != Z_OK) { - errx(1, "can't compress data: compress2() " - "failed"); + destlen_z = oblk->alen; + if (compress2(oblk->data, &destlen_z, iblk->data, iblk->info.len, + zp->comp_level) != Z_OK) { + errx(1, "can't compress data: compress2() failed"); /* Not reached */ } - rval->info.len = (uint32_t)destlen_z; - return (rval); + oblk->info.len = (uint32_t)destlen_z; } diff --git a/usr.bin/mkuzip/mkuz_zlib.h b/usr.bin/mkuzip/mkuz_zlib.h index 55e57a610b4b..ad653b935f22 100644 --- a/usr.bin/mkuzip/mkuz_zlib.h +++ b/usr.bin/mkuzip/mkuz_zlib.h @@ -32,5 +32,6 @@ "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" -void *mkuz_zlib_init(uint32_t); -struct mkuz_blk *mkuz_zlib_compress(void *, const struct mkuz_blk *); +size_t mkuz_zlib_cbound(size_t); +void *mkuz_zlib_init(int *); +void mkuz_zlib_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuz_zstd.c b/usr.bin/mkuzip/mkuz_zstd.c new file mode 100644 index 000000000000..d59cb47917d2 --- /dev/null +++ b/usr.bin/mkuzip/mkuz_zstd.c @@ -0,0 +1,95 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <err.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> + +#include <zstd.h> + +#include "mkuzip.h" +#include "mkuz_blk.h" +#include "mkuz_zstd.h" + +size_t +mkuz_zstd_cbound(size_t blksz) +{ + return (ZSTD_compressBound(blksz)); +} + +void * +mkuz_zstd_init(int *comp_level) +{ + ZSTD_CCtx *cctx; + size_t rc; + + /* Default chosen for near-parity with mkuzip zlib default. */ + if (*comp_level == USE_DEFAULT_LEVEL) + *comp_level = 9; + if (*comp_level < ZSTD_minCLevel() || *comp_level == 0 || + *comp_level > ZSTD_maxCLevel()) + errx(1, "provided compression level %d is invalid", + *comp_level); + + cctx = ZSTD_createCCtx(); + if (cctx == NULL) + errx(1, "could not allocate Zstd context"); + + rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, + *comp_level); + if (ZSTD_isError(rc)) + errx(1, "Could not set zstd compression level %d: %s", + *comp_level, ZSTD_getErrorName(rc)); + + rc = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + if (ZSTD_isError(rc)) + errx(1, "Could not enable zstd checksum: %s", + ZSTD_getErrorName(rc)); + + return (cctx); +} + +void +mkuz_zstd_compress(void *p, const struct mkuz_blk *iblk, struct mkuz_blk *oblk) +{ + ZSTD_CCtx *cctx; + size_t rc; + + cctx = p; + + rc = ZSTD_compress2(cctx, oblk->data, oblk->alen, iblk->data, + iblk->info.len); + if (ZSTD_isError(rc)) + errx(1, "could not compress data: ZSTD_compress2: %s", + ZSTD_getErrorName(rc)); + + oblk->info.len = rc; +} diff --git a/usr.bin/mkuzip/mkuz_zstd.h b/usr.bin/mkuzip/mkuz_zstd.h new file mode 100644 index 000000000000..874e2d82812c --- /dev/null +++ b/usr.bin/mkuzip/mkuz_zstd.h @@ -0,0 +1,38 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define DEFAULT_SUFX_ZSTD ".uzst" + +#define CLOOP_MAGIC_ZSTD "#!/bin/sh\n#Z4.0 Format\n" \ + "(kldstat -qm g_uzip||kldload geom_uzip)>&-&&" \ + "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n" + +size_t mkuz_zstd_cbound(size_t); +void *mkuz_zstd_init(int *); +void mkuz_zstd_compress(void *, const struct mkuz_blk *, struct mkuz_blk *); diff --git a/usr.bin/mkuzip/mkuzip.8 b/usr.bin/mkuzip/mkuzip.8 index 8a54aee7ba57..9bf4a0c3f893 100644 --- a/usr.bin/mkuzip/mkuzip.8 +++ b/usr.bin/mkuzip/mkuzip.8 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 19, 2019 +.Dd August 9, 2019 .Dt MKUZIP 8 .Os .Sh NAME @@ -35,7 +35,9 @@ class .Sh SYNOPSIS .Nm -.Op Fl dLSsvZ +.Op Fl dSsvZ +.Op Fl A Ar compression_algorithm +.Op Fl C Ar compression_level .Op Fl j Ar compression_jobs .Op Fl o Ar outfile .Op Fl s Ar cluster_size @@ -57,17 +59,82 @@ works in two phases: .It An .Ar infile -image is split into clusters; each cluster is compressed using -.Xr zlib 3 -or -.Xr lzma 3 . +image is split into clusters; each cluster is compressed. .It -The resulting set of compressed clusters along with headers that allow -locating each individual cluster is written to the output file. +The resulting set of compressed clusters is written to the output file. +In addition, a +.Dq table of contents +header is written which allows for efficient seeking. .El .Pp The options are: .Bl -tag -width indent +.It Fl A Op Ar lzma | Ar zlib | Ar zstd +Select a specific compression algorithm. +If this option is not provided, the default is +.Ar zlib . +.Pp +The +.Ar lzma +algorithm provides noticeable better compression levels than zlib on the same +data set. +It has vastly slower compression speed and moderately slower decompression +speed. +.Pp +The +.Ar zstd +algorithm provides better compression levels than zlib on the same data set. +It also has faster compression and decompression speed than zlib. +In the very high compression +.Dq level +settings, it does not offer quite as high a compression ratio as +.Ar lzma . +However, its decompression speed does not suffer at high compression +.Dq levels . +.It Fl C Ar compression_level +Select the integer compression level used to parameterize the chosen +compression algorithm. +.Pp +For any given algorithm, a lesser number selects a faster compression mode. +A greater number selects a slower compression mode. +Typically, for the same algorithm, a greater +.Ar compression_level +provides better final compression ratio. +.Pp +For +.Ar lzma , +the range of valid compression levels is +.Va 0-9 . +The +.Nm +default for lzma is +.Va 6 . +.Pp +For +.Ar zlib , +the range of valid compression levels is +.Va 1-9 . +The +.Nm +default for zlib is +.Va 9 . +.Pp +For +.Ar zstd , +the range of valid compression levels is currently +.Va 1-19 . +The +.Nm +default for zstd is +.Va 9 . +.It Fl d +Enable de-duplication. +When the option is enabled +.Nm +detects identical blocks in the input and replaces each subsequent occurrence +of such block with pointer to the very first one in the output. +Setting this option results is moderate decrease of compressed image size, +typically around 3-5% of a final size of the compressed image. .It Fl j Ar compression_jobs Specify the number of compression jobs that .Nm @@ -77,24 +144,9 @@ to the value of .Va hw.ncpu .Xr sysctl 8 variable. -.It Fl d -Enable de-duplication. -When the option is enabled the -.Nm -detects identical blocks in the input and replaces each subsequent occurence -of such block with pointer to the very first one in the output. -Setting this option results is moderate decrease of compressed image size, -typically around 3-5% of a final size of the compressed image. -.It Fl L -Use -.Xr lzma 3 -compression algorithm instead of the default -.Xr zlib 3 . -The -.Xr lzma 3 -provides noticeable better compression levels on the same data set -at the expense of much slower compression speed (10-20x) and somewhat slower -decompression (2-3x). +.It Op Fl L +Legacy flag that indicates the same thing as +.Dq Fl A Ar lzma . .It Fl o Ar outfile Name of the output file .Ar outfile . @@ -119,33 +171,44 @@ should be a multiple of 512 bytes. .It Fl v Display verbose messages. .It Fl Z -Disable zero-blocks detection and elimination. -When this option is set, the +Disable zero-block detection and elimination. +When this option is set, .Nm -would compress empty blocks (i.e. clusters that consist of only zero bytes) -just as it would any other block. -When the option is not set, the +compresses blocks of zero bytes just as it would any other block. +When the option is not set, .Nm -detects such blocks and skips them from the output. +detects and compresses zero blocks in a space-efficient way. Setting .Fl Z -results is slight increase of compressed image size, typically less than 0.1% -of a final size of the compressed image. +increases compressed image sizes slightly, typically less than 0.1%. .El -.Sh NOTES -The compression ratio largely depends on the cluster size used. -.\" The following two sentences are unclear: how can gzip(1) be -.\" used in a comparable fashion, and wouldn't a gzip-compressed -.\" image suffer from larger cluster sizes as well? -For large cluster sizes (16K and higher), typical compression ratios +.Sh IMPLEMENTATION NOTES +The compression ratio largely depends on the compression algorithm, level, and +cluster size used. +For large cluster sizes (16kB and higher), typical overall image compression +ratios with +.Xr zlib 3 are only 1-2% less than those achieved with -.Xr gzip 1 . -However, it should be kept in mind that larger cluster -sizes lead to higher overhead in the +.Xr gzip 1 +over the entire image. +However, it should be kept in mind that larger cluster sizes lead to higher +overhead in the .Xr geom_uzip 4 class, as the class has to decompress the whole cluster even if only a few bytes from that cluster have to be read. .Pp +Additionally, the threshold at 16-32 kB where a larger cluster size does not +benefit overall compression ratio is an artifact of the +.Xr zlib 3 +algorithm in particular. +.Ar Lzma +and +.Ar Zstd will continue to provide better compression ratios as cluster sizes +are increased, at high enough compression levels. +The same tradeoff continues to apply: reads in +.Xr geom_uzip 4 +become more expensive the greater the cluster size. +.Pp The .Nm utility @@ -169,12 +232,27 @@ specific feature and while it does not require any changes to on-disk compressed image format, however it did require some matching changes to the .Xr geom_uzip 4 to handle resulting images correctly. +.Pp +To make use of +.Ar zstd +.Nm +images, the kernel must be configured with +.Cd ZSTDIO . +It is enabled by default in many +.Cd GENERIC +kernels provided as binary distributions by +.Fx . +The status on any particular system can be verified by checking +.Xr sysctl 8 +.Dv kern.features.geom_uzip_zstd +for +.Dq 1 . .Sh EXIT STATUS .Ex -std .Sh SEE ALSO .Xr gzip 1 , .Xr xz 1 , -.Xr lzma 3 , +.Xr zstd 1 , .Xr zlib 3 , .Xr geom 4 , .Xr geom_uzip 4 , diff --git a/usr.bin/mkuzip/mkuzip.c b/usr.bin/mkuzip/mkuzip.c index be0a9b23acb4..a2763e06440c 100644 --- a/usr.bin/mkuzip/mkuzip.c +++ b/usr.bin/mkuzip/mkuzip.c @@ -51,8 +51,9 @@ __FBSDID("$FreeBSD$"); #include "mkuzip.h" #include "mkuz_cloop.h" #include "mkuz_blockcache.h" -#include "mkuz_zlib.h" #include "mkuz_lzma.h" +#include "mkuz_zlib.h" +#include "mkuz_zstd.h" #include "mkuz_blk.h" #include "mkuz_cfg.h" #include "mkuz_conveyor.h" @@ -63,18 +64,38 @@ __FBSDID("$FreeBSD$"); #define DEFAULT_CLSTSIZE 16384 -static struct mkuz_format uzip_fmt = { - .magic = CLOOP_MAGIC_ZLIB, - .default_sufx = DEFAULT_SUFX_ZLIB, - .f_init = &mkuz_zlib_init, - .f_compress = &mkuz_zlib_compress +enum UZ_ALGORITHM { + UZ_ZLIB = 0, + UZ_LZMA, + UZ_ZSTD, + UZ_INVALID }; -static struct mkuz_format ulzma_fmt = { - .magic = CLOOP_MAGIC_LZMA, - .default_sufx = DEFAULT_SUFX_LZMA, - .f_init = &mkuz_lzma_init, - .f_compress = &mkuz_lzma_compress +static const struct mkuz_format uzip_fmts[] = { + [UZ_ZLIB] = { + .option = "zlib", + .magic = CLOOP_MAGIC_ZLIB, + .default_sufx = DEFAULT_SUFX_ZLIB, + .f_compress_bound = mkuz_zlib_cbound, + .f_init = mkuz_zlib_init, + .f_compress = mkuz_zlib_compress, + }, + [UZ_LZMA] = { + .option = "lzma", + .magic = CLOOP_MAGIC_LZMA, + .default_sufx = DEFAULT_SUFX_LZMA, + .f_compress_bound = mkuz_lzma_cbound, + .f_init = mkuz_lzma_init, + .f_compress = mkuz_lzma_compress, + }, + [UZ_ZSTD] = { + .option = "zstd", + .magic = CLOOP_MAGIC_ZSTD, + .default_sufx = DEFAULT_SUFX_ZSTD, + .f_compress_bound = mkuz_zstd_cbound, + .f_init = mkuz_zstd_init, + .f_compress = mkuz_zstd_compress, + }, }; static struct mkuz_blk *readblock(int, u_int32_t); @@ -111,6 +132,8 @@ int main(int argc, char **argv) struct mkuz_blk_info *chit; size_t ncpusz, ncpu, magiclen; double st, et; + enum UZ_ALGORITHM comp_alg; + int comp_level; st = getdtime(); @@ -129,12 +152,27 @@ int main(int argc, char **argv) cfs.en_dedup = 0; summary.en = 0; summary.f = stderr; - cfs.handler = &uzip_fmt; + comp_alg = UZ_ZLIB; + comp_level = USE_DEFAULT_LEVEL; cfs.nworkers = ncpu; struct mkuz_blk *iblk, *oblk; - while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) { + while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { switch(opt) { + case 'A': + for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { + if (strcmp(uzip_fmts[tmp].option, optarg) == 0) + break; + } + if (tmp == UZ_INVALID) + errx(1, "invalid algorithm specified: %s", + optarg); + /* Not reached */ + comp_alg = tmp; + break; + case 'C': + comp_level = atoi(optarg); + break; case 'o': oname = optarg; break; @@ -162,7 +200,7 @@ int main(int argc, char **argv) break; case 'L': - cfs.handler = &ulzma_fmt; + comp_alg = UZ_LZMA; break; case 'S': @@ -193,16 +231,32 @@ int main(int argc, char **argv) /* Not reached */ } + cfs.handler = &uzip_fmts[comp_alg]; + magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); assert(magiclen < sizeof(hdr.magic)); if (cfs.en_dedup != 0) { - hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; + /* + * Dedupe requires a version 3 format. Don't downgrade newer + * formats. + */ + if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) + hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; hdr.magic[CLOOP_OFS_COMPR] = tolower(hdr.magic[CLOOP_OFS_COMPR]); } - c_ctx = cfs.handler->f_init(cfs.blksz); + if (cfs.blksz % DEV_BSIZE != 0) + errx(1, "cluster size should be multiple of %d", DEV_BSIZE); + + cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); + if (cfs.cbound_blksz > MAXPHYS) + errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", + cfs.cbound_blksz, (size_t)MAXPHYS); + + c_ctx = cfs.handler->f_init(&comp_level); + cfs.comp_level = comp_level; cfs.iname = argv[0]; if (oname == NULL) { @@ -239,6 +293,14 @@ int main(int argc, char **argv) } toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); + /* + * Initialize last+1 entry with non-heap trash. If final padding is + * added later, it may or may not be overwritten with an offset + * representing the length of the final compressed block. If not, + * initialize to a defined value. + */ + toc[hdr.nblocks] = 0; + cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); if (cfs.fdw < 0) { diff --git a/usr.bin/mkuzip/mkuzip.h b/usr.bin/mkuzip/mkuzip.h index f41507c86964..b4bec58525ee 100644 --- a/usr.bin/mkuzip/mkuzip.h +++ b/usr.bin/mkuzip/mkuzip.h @@ -28,6 +28,9 @@ #define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args) +/* Use an algorithm-specific default level if no explicit level is selected. */ +#define USE_DEFAULT_LEVEL INT_MIN + void *mkuz_safe_malloc(size_t); void *mkuz_safe_zmalloc(size_t); int mkuz_memvcmp(const void *, unsigned char, size_t); |