diff options
51 files changed, 0 insertions, 14012 deletions
diff --git a/.buckconfig b/.buckconfig deleted file mode 100644 index 483f6053b265..000000000000 --- a/.buckconfig +++ /dev/null @@ -1,9 +0,0 @@ -[cxx] - cppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4 - cflags = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith - cxxppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4 - cxxflags = -std=c++11 -Wno-deprecated-declarations - gtest_dep = //contrib/pzstd:gtest - -[httpserver] - port = 0 diff --git a/.buckversion b/.buckversion deleted file mode 100644 index 892fad966fce..000000000000 --- a/.buckversion +++ /dev/null @@ -1 +0,0 @@ -c8dec2e8da52d483f6dd7c6cd2ad694e8e6fed2b diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 0f7ad8bfc173..000000000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,5 +0,0 @@ -# Code of Conduct - -Facebook has adopted a Code of Conduct that we expect project participants to adhere to. -Please read the [full text](https://code.fb.com/codeofconduct/) -so that you can understand what actions will and will not be tolerated. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 57be94bf54dd..000000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,489 +0,0 @@ -# Contributing to Zstandard -We want to make contributing to this project as easy and transparent as -possible. - -## Our Development Process -New versions are being developed in the "dev" branch, -or in their own feature branch. -When they are deemed ready for a release, they are merged into "release". - -As a consequence, all contributions must stage first through "dev" -or their own feature branch. - -## Pull Requests -We actively welcome your pull requests. - -1. Fork the repo and create your branch from `dev`. -2. If you've added code that should be tested, add tests. -3. If you've changed APIs, update the documentation. -4. Ensure the test suite passes. -5. Make sure your code lints. -6. If you haven't already, complete the Contributor License Agreement ("CLA"). - -## Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Facebook's open source projects. - -Complete your CLA here: <https://code.facebook.com/cla> - -## Workflow -Zstd uses a branch-based workflow for making changes to the codebase. Typically, zstd -will use a new branch per sizable topic. For smaller changes, it is okay to lump multiple -related changes into a branch. - -Our contribution process works in three main stages: -1. Local development - * Update: - * Checkout your fork of zstd if you have not already - ``` - git checkout https://github.com/<username>/zstd - cd zstd - ``` - * Update your local dev branch - ``` - git pull https://github.com/facebook/zstd dev - git push origin dev - ``` - * Topic and development: - * Make a new branch on your fork about the topic you're developing for - ``` - # branch names should be concise but sufficiently informative - git checkout -b <branch-name> - git push origin <branch-name> - ``` - * Make commits and push - ``` - # make some changes = - git add -u && git commit -m <message> - git push origin <branch-name> - ``` - * Note: run local tests to ensure that your changes didn't break existing functionality - * Quick check - ``` - make check - ``` - * Longer check - ``` - make test - ``` -2. Code Review and CI tests - * Ensure CI tests pass: - * Before sharing anything to the community, create a pull request in your own fork against the dev branch - and make sure that all GitHub Actions CI tests pass. See the Continuous Integration section below for more information. - * Ensure that static analysis passes on your development machine. See the Static Analysis section - below to see how to do this. - * Create a pull request: - * When you are ready to share you changes to the community, create a pull request from your branch - to facebook:dev. You can do this very easily by clicking 'Create Pull Request' on your fork's home - page. - * From there, select the branch where you made changes as your source branch and facebook:dev - as the destination. - * Examine the diff presented between the two branches to make sure there is nothing unexpected. - * Write a good pull request description: - * While there is no strict template that our contributors follow, we would like them to - sufficiently summarize and motivate the changes they are proposing. We recommend all pull requests, - at least indirectly, address the following points. - * Is this pull request important and why? - * Is it addressing an issue? If so, what issue? (provide links for convenience please) - * Is this a new feature? If so, why is it useful and/or necessary? - * Are there background references and documents that reviewers should be aware of to properly assess this change? - * Note: make sure to point out any design and architectural decisions that you made and the rationale behind them. - * Note: if you have been working with a specific user and would like them to review your work, make sure you mention them using (@<username>) - * Submit the pull request and iterate with feedback. -3. Merge and Release - * Getting approval: - * You will have to iterate on your changes with feedback from other collaborators to reach a point - where your pull request can be safely merged. - * To avoid too many comments on style and convention, make sure that you have a - look at our style section below before creating a pull request. - * Eventually, someone from the zstd team will approve your pull request and not long after merge it into - the dev branch. - * Housekeeping: - * Most PRs are linked with one or more Github issues. If this is the case for your PR, make sure - the corresponding issue is mentioned. If your change 'fixes' or completely addresses the - issue at hand, then please indicate this by requesting that an issue be closed by commenting. - * Just because your changes have been merged does not mean the topic or larger issue is complete. Remember - that the change must make it to an official zstd release for it to be meaningful. We recommend - that contributors track the activity on their pull request and corresponding issue(s) page(s) until - their change makes it to the next release of zstd. Users will often discover bugs in your code or - suggest ways to refine and improve your initial changes even after the pull request is merged. - -## Static Analysis -Static analysis is a process for examining the correctness or validity of a program without actually -executing it. It usually helps us find many simple bugs. Zstd uses clang's `scan-build` tool for -static analysis. You can install it by following the instructions for your OS on https://clang-analyzer.llvm.org/scan-build. - -Once installed, you can ensure that our static analysis tests pass on your local development machine -by running: -``` -make staticAnalyze -``` - -In general, you can use `scan-build` to static analyze any build script. For example, to static analyze -just `contrib/largeNbDicts` and nothing else, you can run: - -``` -scan-build make -C contrib/largeNbDicts largeNbDicts -``` - -### Pitfalls of static analysis -`scan-build` is part of our regular CI suite. Other static analyzers are not. - -It can be useful to look at additional static analyzers once in a while (and we do), but it's not a good idea to multiply the nb of analyzers run continuously at each commit and PR. The reasons are : - -- Static analyzers are full of false positive. The signal to noise ratio is actually pretty low. -- A good CI policy is "zero-warning tolerance". That means that all issues must be solved, including false positives. This quickly becomes a tedious workload. -- Multiple static analyzers will feature multiple kind of false positives, sometimes applying to the same code but in different ways leading to : - + tortuous code, trying to please multiple constraints, hurting readability and therefore maintenance. Sometimes, such complexity introduce other more subtle bugs, that are just out of scope of the analyzers. - + sometimes, these constraints are mutually exclusive : if one try to solve one, the other static analyzer will complain, they can't be both happy at the same time. -- As if that was not enough, the list of false positives change with each version. It's hard enough to follow one static analyzer, but multiple ones with their own update agenda, this quickly becomes a massive velocity reducer. - -This is different from running a static analyzer once in a while, looking at the output, and __cherry picking__ a few warnings that seem helpful, either because they detected a genuine risk of bug, or because it helps expressing the code in a way which is more readable or more difficult to misuse. These kinds of reports can be useful, and are accepted. - -## Continuous Integration -CI tests run every time a pull request (PR) is created or updated. The exact tests -that get run will depend on the destination branch you specify. Some tests take -longer to run than others. Currently, our CI is set up to run a short -series of tests when creating a PR to the dev branch and a longer series of tests -when creating a PR to the release branch. You can look in the configuration files -of the respective CI platform for more information on what gets run when. - -Most people will just want to create a PR with the destination set to their local dev -branch of zstd. You can then find the status of the tests on the PR's page. You can also -re-run tests and cancel running tests from the PR page or from the respective CI's dashboard. - -Almost all of zstd's CI runs on GitHub Actions (configured at `.github/workflows`), which will automatically run on PRs to your -own fork. A small number of tests run on other services (e.g. Travis CI, Circle CI, Appveyor). -These require work to set up on your local fork, and (at least for Travis CI) cost money. -Therefore, if the PR on your local fork passes GitHub Actions, feel free to submit a PR -against the main repo. - -### Third-party CI -A small number of tests cannot run on GitHub Actions, or have yet to be migrated. -For these, we use a variety of third-party services (listed below). It is not necessary to set -these up on your fork in order to contribute to zstd; however, we do link to instructions for those -who want earlier signal. - -| Service | Purpose | Setup Links | Config Path | -|-----------|------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------| -| Travis CI | Used for testing on non-x86 architectures such as PowerPC | https://docs.travis-ci.com/user/tutorial/#to-get-started-with-travis-ci-using-github <br> https://github.com/marketplace/travis-ci | `.travis.yml` | -| AppVeyor | Used for some Windows testing (e.g. cygwin, mingw) | https://www.appveyor.com/blog/2018/10/02/github-apps-integration/ <br> https://github.com/marketplace/appveyor | `appveyor.yml` | -| Cirrus CI | Used for testing on FreeBSD | https://github.com/marketplace/cirrus-ci/ | `.cirrus.yml` | -| Circle CI | Historically was used to provide faster signal,<br/> but we may be able to migrate these to Github Actions | https://circleci.com/docs/2.0/getting-started/#setting-up-circleci <br> https://youtu.be/Js3hMUsSZ2c <br> https://circleci.com/docs/2.0/enable-checks/ | `.circleci/config.yml` | - -Note: the instructions linked above mostly cover how to set up a repository with CI from scratch. -The general idea should be the same for setting up CI on your fork of zstd, but you may have to -follow slightly different steps. In particular, please ignore any instructions related to setting up -config files (since zstd already has configs for each of these services). - -## Performance -Performance is extremely important for zstd and we only merge pull requests whose performance -landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented. -This high bar for performance means that every PR which has the potential to -impact performance takes a very long time for us to properly review. That being said, we -always welcome contributions to improve performance (or worsen performance for the trade-off of -something else). Please keep the following in mind before submitting a performance related PR: - -1. Zstd isn't as old as gzip but it has been around for time now and its evolution is -very well documented via past Github issues and pull requests. It may be the case that your -particular performance optimization has already been considered in the past. Please take some -time to search through old issues and pull requests using keywords specific to your -would-be PR. Of course, just because a topic has already been discussed (and perhaps rejected -on some grounds) in the past, doesn't mean it isn't worth bringing up again. But even in that case, -it will be helpful for you to have context from that topic's history before contributing. -2. The distinction between noise and actual performance gains can unfortunately be very subtle -especially when microbenchmarking extremely small wins or losses. The only remedy to getting -something subtle merged is extensive benchmarking. You will be doing us a great favor if you -take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc) -benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark -your changes on every single processor and os out there (and neither will we) but do that best -you can:) We've added some things to think about when benchmarking below in the Benchmarking -Performance section which might be helpful for you. -3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly -legitimate thing to do as long as it does not harm the overall performance health of Zstd. -This is a hard balance to strike but please keep in mind other aspects of Zstd when -submitting changes that are clang-specific, windows-specific, etc. - -## Benchmarking Performance -Performance microbenchmarking is a tricky subject but also essential for Zstd. We value empirical -testing over theoretical speculation. This guide it not perfect but for most scenarios, it -is a good place to start. - -### Stability -Unfortunately, the most important aspect in being able to benchmark reliably is to have a stable -benchmarking machine. A virtual machine, a machine with shared resources, or your laptop -will typically not be stable enough to obtain reliable benchmark results. If you can get your -hands on a desktop, this is usually a better scenario. - -Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to -do a little more work to ensure that you are in fact measuring the changes you've made and not -noise. Here are some things you can do to make your benchmarks more stable: - -1. The most simple thing you can do to drastically improve the stability of your benchmark is -to run it multiple times and then aggregate the results of those runs. As a general rule of -thumb, the smaller the change you are trying to measure, the more samples of benchmark runs -you will have to aggregate over to get reliable results. Here are some additional things to keep in -mind when running multiple trials: - * How you aggregate your samples are important. You might be tempted to use the mean of your - results. While this is certainly going to be a more stable number than a raw single sample - benchmark number, you might have more luck by taking the median. The mean is not robust to - outliers whereas the median is. Better still, you could simply take the fastest speed your - benchmark achieved on each run since that is likely the fastest your process will be - capable of running your code. In our experience, this (aggregating by just taking the sample - with the fastest running time) has been the most stable approach. - * The more samples you have, the more stable your benchmarks should be. You can verify - your improved stability by looking at the size of your confidence intervals as you - increase your sample count. These should get smaller and smaller. Eventually hopefully - smaller than the performance win you are expecting. - * Most processors will take some time to get `hot` when running anything. The observations - you collect during that time period will very different from the true performance number. Having - a very large number of sample will help alleviate this problem slightly but you can also - address is directly by simply not including the first `n` iterations of your benchmark in - your aggregations. You can determine `n` by simply looking at the results from each iteration - and then hand picking a good threshold after which the variance in results seems to stabilize. -2. You cannot really get reliable benchmarks if your host machine is simultaneously running -another cpu/memory-intensive application in the background. If you are running benchmarks on your -personal laptop for instance, you should close all applications (including your code editor and -browser) before running your benchmarks. You might also have invisible background applications -running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager -on Windows. You will get more stable benchmark results of you end those processes as well. - * If you have multiple cores, you can even run your benchmark on a reserved core to prevent - pollution from other OS and user processes. There are a number of ways to do this depending - on your OS: - * On linux boxes, you have use https://github.com/lpechacek/cpuset. - * On Windows, you can "Set Processor Affinity" using https://www.thewindowsclub.com/processor-affinity-windows - * On Mac, you can try to use their dedicated affinity API https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/#//apple_ref/doc/uid/TP40006635-CH1-DontLinkElementID_2 -3. To benchmark, you will likely end up writing a separate c/c++ program that will link libzstd. -Dynamically linking your library will introduce some added variation (not a large amount but -definitely some). Statically linking libzstd will be more stable. Static libraries should -be enabled by default when building zstd. -4. Use a profiler with a good high resolution timer. See the section below on profiling for -details on this. -5. Disable frequency scaling, turbo boost and address space randomization (this will vary by OS) -6. Try to avoid storage. On some systems you can use tmpfs. Putting the program, inputs and outputs on -tmpfs avoids touching a real storage system, which can have a pretty big variability. - -Also check our LLVM's guide on benchmarking here: https://llvm.org/docs/Benchmarking.html - -### Zstd benchmark -The fastest signal you can get regarding your performance changes is via the in-build zstd cli -bench option. You can run Zstd as you typically would for your scenario using some set of options -and then additionally also specify the `-b#` option. Doing this will run our benchmarking pipeline -for that options you have just provided. If you want to look at the internals of how this -benchmarking script works, you can check out programs/benchzstd.c - -For example: say you have made a change that you believe improves the speed of zstd level 1. The -very first thing you should use to assess whether you actually achieved any sort of improvement -is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to -specify a running time for your benchmark in seconds (default is 3 seconds). -Usually, the longer the running time, the more stable your results will be. - -``` -$ git checkout <commit-before-your-change> -$ make && cp zstd zstd-old -$ git checkout <commit-after-your-change> -$ make && cp zstd zstd-new -$ zstd-old -i5 -b1 <your-test-data> - 1<your-test-data> : 8990 -> 3992 (2.252), 302.6 MB/s , 626.4 MB/s -$ zstd-new -i5 -b1 <your-test-data> - 1<your-test-data> : 8990 -> 3992 (2.252), 302.8 MB/s , 628.4 MB/s -``` - -Unless your performance win is large enough to be visible despite the intrinsic noise -on your computer, benchzstd alone will likely not be enough to validate the impact of your -changes. For example, the results of the example above indicate that effectively nothing -changed but there could be a small <3% improvement that the noise on the host machine -obscured. So unless you see a large performance win (10-15% consistently) using just -this method of evaluation will not be sufficient. - -### Profiling -There are a number of great profilers out there. We're going to briefly mention how you can -profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler` -on Windows. - -Say you have an idea for a change that you think will provide some good performance gains -for level 1 compression on Zstd. Typically this means, you have identified a section of -code that you think can be made to run faster. - -The first thing you will want to do is make sure that the piece of code is actually taking up -a notable amount of time to run. It is usually not worth optimizing something which accounts for less than -0.0001% of the total running time. Luckily, there are tools to help with this. -Profilers will let you see how much time your code spends inside a particular function. -If your target code snippet is only part of a function, it might be worth trying to -isolate that snippet by moving it to its own function (this is usually not necessary but -might be). - -Most profilers (including the profilers discussed below) will generate a call graph of -functions for you. Your goal will be to find your function of interest in this call graph -and then inspect the time spent inside of it. You might also want to look at the annotated -assembly which most profilers will provide you with. - -#### Instruments -We will once again consider the scenario where you think you've identified a piece of code -whose performance can be improved upon. Follow these steps to profile your code using -Instruments. - -1. Open Instruments -2. Select `Time Profiler` from the list of standard templates -3. Close all other applications except for your instruments window and your terminal -4. Run your benchmarking script from your terminal window - * You will want a benchmark that runs for at least a few seconds (5 seconds will - usually be long enough). This way the profiler will have something to work with - and you will have ample time to attach your profiler to this process:) - * I will just use benchzstd as my benchmarmking script for this example: -``` -$ zstd -b1 -i5 <my-data> # this will run for 5 seconds -``` -5. Once you run your benchmarking script, switch back over to instruments and attach your -process to the time profiler. You can do this by: - * Clicking on the `All Processes` drop down in the top left of the toolbar. - * Selecting your process from the dropdown. In my case, it is just going to be labeled - `zstd` - * Hitting the bright red record circle button on the top left of the toolbar -6. You profiler will now start collecting metrics from your benchmarking script. Once -you think you have collected enough samples (usually this is the case after 3 seconds of -recording), stop your profiler. -7. Make sure that in toolbar of the bottom window, `profile` is selected. -8. You should be able to see your call graph. - * If you don't see the call graph or an incomplete call graph, make sure you have compiled - zstd and your benchmarking script using debug flags. On mac and linux, this just means - you will have to supply the `-g` flag alone with your build script. You might also - have to provide the `-fno-omit-frame-pointer` flag -9. Dig down the graph to find your function call and then inspect it by double clicking -the list item. You will be able to see the annotated source code and the assembly side by -side. - -#### Perf - -This wiki has a pretty detailed tutorial on getting started working with perf so we'll -leave you to check that out of you're getting started: - -https://perf.wiki.kernel.org/index.php/Tutorial - -Some general notes on perf: -* Use `perf stat -r # <bench-program>` to quickly get some relevant timing and -counter statistics. Perf uses a high resolution timer and this is likely one -of the first things your team will run when assessing your PR. -* Perf has a long list of hardware counters that can be viewed with `perf --list`. -When measuring optimizations, something worth trying is to make sure the hardware -counters you expect to be impacted by your change are in fact being so. For example, -if you expect the L1 cache misses to decrease with your change, you can look at the -counter `L1-dcache-load-misses` -* Perf hardware counters will not work on a virtual machine. - -#### Visual Studio - -TODO - -## Issues -We use GitHub issues to track public bugs. Please ensure your description is -clear and has sufficient instructions to be able to reproduce the issue. - -Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -## Coding Style -It's a pretty long topic, which is difficult to summarize in a single paragraph. -As a rule of thumbs, try to imitate the coding style of -similar lines of codes around your contribution. -The following is a non-exhaustive list of rules employed in zstd code base: - -### C90 -This code base is following strict C90 standard, -with 2 extensions : 64-bit `long long` types, and variadic macros. -This rule is applied strictly to code within `lib/` and `programs/`. -Sub-project in `contrib/` are allowed to use other conventions. - -### C++ direct compatibility : symbol mangling -All public symbol declarations must be wrapped in `extern “C” { … }`, -so that this project can be compiled as C++98 code, -and linked into C++ applications. - -### Minimal Frugal -This design requirement is fundamental to preserve the portability of the code base. -#### Dependencies -- Reduce dependencies to the minimum possible level. - Any dependency should be considered “bad” by default, - and only tolerated because it provides a service in a better way than can be achieved locally. - The only external dependencies this repository tolerates are - standard C libraries, and in rare cases, system level headers. -- Within `lib/`, this policy is even more drastic. - The only external dependencies allowed are `<assert.h>`, `<stdlib.h>`, `<string.h>`, - and even then, not directly. - In particular, no function shall ever allocate on heap directly, - and must use instead `ZSTD_malloc()` and equivalent. - Other accepted non-symbol headers are `<stddef.h>` and `<limits.h>`. -- Within the project, there is a strict hierarchy of dependencies that must be respected. - `programs/` is allowed to depend on `lib/`, but only its public API. - Within `lib/`, `lib/common` doesn't depend on any other directory. - `lib/compress` and `lib/decompress` shall not depend on each other. - `lib/dictBuilder` can depend on `lib/common` and `lib/compress`, but not `lib/decompress`. -#### Resources -- Functions in `lib/` must use very little stack space, - several dozens of bytes max. - Everything larger must use the heap allocator, - or require a scratch buffer to be emplaced manually. - -### Naming -* All public symbols are prefixed with `ZSTD_` - + private symbols, with a scope limited to their own unit, are free of this restriction. - However, since `libzstd` source code can be amalgamated, - each symbol name must attempt to be (and remain) unique. - Avoid too generic names that could become ground for future collisions. - This generally implies usage of some form of prefix. -* For symbols (functions and variables), naming convention is `PREFIX_camelCase`. - + In some advanced cases, one can also find : - - `PREFIX_prefix2_camelCase` - - `PREFIX_camelCase_extendedQualifier` -* Multi-words names generally consist of an action followed by object: - - for example : `ZSTD_createCCtx()` -* Prefer positive actions - - `goBackward` rather than `notGoForward` -* Type names (`struct`, etc.) follow similar convention, - except that they are allowed and even invited to start by an Uppercase letter. - Example : `ZSTD_CCtx`, `ZSTD_CDict` -* Macro names are all Capital letters. - The same composition rules (`PREFIX_NAME_QUALIFIER`) apply. -* File names are all lowercase letters. - The convention is `snake_case`. - File names **must** be unique across the entire code base, - even when they stand in clearly separated directories. - -### Qualifiers -* This code base is `const` friendly, if not `const` fanatical. - Any variable that can be `const` (aka. read-only) **must** be `const`. - Any pointer which content will not be modified must be `const`. - This property is then controlled at compiler level. - `const` variables are an important signal to readers that this variable isn't modified. - Conversely, non-const variables are a signal to readers to watch out for modifications later on in the function. -* If a function must be inlined, mention it explicitly, - using project's own portable macros, such as `FORCE_INLINE_ATTR`, - defined in `lib/common/compiler.h`. - -### Debugging -* **Assertions** are welcome, and should be used very liberally, - to control any condition the code expects for its correct execution. - These assertion checks will be run in debug builds, and disabled in production. -* For traces, this project provides its own debug macros, - in particular `DEBUGLOG(level, ...)`, defined in `lib/common/debug.h`. - -### Code documentation -* Avoid code documentation that merely repeats what the code is already stating. - Whenever applicable, prefer employing the code as the primary way to convey explanations. - Example 1 : `int nbTokens = n;` instead of `int i = n; /* i is a nb of tokens *./`. - Example 2 : `assert(size > 0);` instead of `/* here, size should be positive */`. -* At declaration level, the documentation explains how to use the function or variable - and when applicable why it's needed, of the scenarios where it can be useful. -* At implementation level, the documentation explains the general outline of the algorithm employed, - and when applicable why this specific choice was preferred. - -### General layout -* 4 spaces for indentation rather than tabs -* Code documentation shall directly precede function declaration or implementation -* Function implementations and its code documentation should be preceded and followed by an empty line - - -## License -By contributing to Zstandard, you agree that your contributions will be licensed -under both the [LICENSE](LICENSE) file and the [COPYING](COPYING) file in the root directory of this source tree. diff --git a/Makefile b/Makefile deleted file mode 100644 index e8e6bcffe4ea..000000000000 --- a/Makefile +++ /dev/null @@ -1,471 +0,0 @@ -# ################################################################ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# You may select, at your option, one of the above-listed licenses. -# ################################################################ - -# verbose mode (print commands) on V=1 or VERBOSE=1 -Q = $(if $(filter 1,$(V) $(VERBOSE)),,@) - -PRGDIR = programs -ZSTDDIR = lib -BUILDIR = build -ZWRAPDIR = zlibWrapper -TESTDIR = tests -FUZZDIR = $(TESTDIR)/fuzz - -# Define nul output -VOID = /dev/null - -# When cross-compiling from linux to windows, you might -# need to specify this as "Windows." Fedora build fails -# without it. -# -# Note: mingw-w64 build from linux to windows does not -# fail on other tested distros (ubuntu, debian) even -# without manually specifying the TARGET_SYSTEM. -TARGET_SYSTEM ?= $(OS) -CP ?= cp - -ifneq (,$(filter Windows%,$(TARGET_SYSTEM))) - EXT =.exe -else - EXT = -endif - -## default: Build lib-release and zstd-release -.PHONY: default -default: lib-release zstd-release - -.PHONY: all -all: allmost examples manual contrib - -.PHONY: allmost -allmost: allzstd zlibwrapper - -# skip zwrapper, can't build that on alternate architectures without the proper zlib installed -.PHONY: allzstd -allzstd: lib - $(Q)$(MAKE) -C $(PRGDIR) all - $(Q)$(MAKE) -C $(TESTDIR) all - -.PHONY: all32 -all32: - $(MAKE) -C $(PRGDIR) zstd32 - $(MAKE) -C $(TESTDIR) all32 - -.PHONY: lib lib-release lib-mt lib-nomt -lib lib-release lib-mt lib-nomt: - $(Q)$(MAKE) -C $(ZSTDDIR) $@ - -.PHONY: zstd zstd-release -zstd zstd-release: - $(Q)$(MAKE) -C $(PRGDIR) $@ - $(Q)ln -sf $(PRGDIR)/zstd$(EXT) zstd$(EXT) - -.PHONY: zstdmt -zstdmt: - $(Q)$(MAKE) -C $(PRGDIR) $@ - $(Q)$(CP) $(PRGDIR)/zstd$(EXT) ./zstdmt$(EXT) - -.PHONY: zlibwrapper -zlibwrapper: lib - $(MAKE) -C $(ZWRAPDIR) all - -## test: run long-duration tests -.PHONY: test -DEBUGLEVEL ?= 1 -test: MOREFLAGS += -g -Werror -test: - DEBUGLEVEL=$(DEBUGLEVEL) MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants - $(MAKE) -C $(TESTDIR) $@ - ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder $@ - -## check: run basic tests for `zstd` cli -.PHONY: check -check: - $(Q)$(MAKE) -C $(TESTDIR) $@ - -.PHONY: automated_benchmarking -automated_benchmarking: - $(MAKE) -C $(TESTDIR) $@ - -.PHONY: benchmarking -benchmarking: automated_benchmarking - -## examples: build all examples in `examples/` directory -.PHONY: examples -examples: lib - $(MAKE) -C examples all - -## manual: generate API documentation in html format -.PHONY: manual -manual: - $(MAKE) -C contrib/gen_html $@ - -## man: generate man page -.PHONY: man -man: - $(MAKE) -C programs $@ - -## contrib: build all supported projects in `/contrib` directory -.PHONY: contrib -contrib: lib - $(MAKE) -C contrib/pzstd all - $(MAKE) -C contrib/seekable_format/examples all - $(MAKE) -C contrib/seekable_format/tests test - $(MAKE) -C contrib/largeNbDicts all - $(MAKE) -C contrib/externalSequenceProducer all - cd build/single_file_libs/ ; ./build_decoder_test.sh - cd build/single_file_libs/ ; ./build_library_test.sh - -.PHONY: cleanTabs -cleanTabs: - cd contrib; ./cleanTabs - -.PHONY: clean -clean: - $(Q)$(MAKE) -C $(ZSTDDIR) $@ > $(VOID) - $(Q)$(MAKE) -C $(PRGDIR) $@ > $(VOID) - $(Q)$(MAKE) -C $(TESTDIR) $@ > $(VOID) - $(Q)$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID) - $(Q)$(MAKE) -C examples/ $@ > $(VOID) - $(Q)$(MAKE) -C contrib/gen_html $@ > $(VOID) - $(Q)$(MAKE) -C contrib/pzstd $@ > $(VOID) - $(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID) - $(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID) - $(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID) - $(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID) - $(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp* - $(Q)$(RM) -r lz4 cmakebuild mesonbuild install - @echo Cleaning completed - -#------------------------------------------------------------------------------ -# make install is validated only for Linux, macOS, Hurd and some BSD targets -#------------------------------------------------------------------------------ -ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT% CYGWIN_NT% Haiku AIX,$(shell sh -c 'MSYSTEM="MSYS" uname') )) - -HOST_OS = POSIX - -MKDIR ?= mkdir -p - -HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) -EGREP_OPTIONS ?= -ifeq ($(HAVE_COLORNEVER), 1) -EGREP_OPTIONS += --color=never -endif -EGREP = egrep $(EGREP_OPTIONS) - -# Print a two column output of targets and their description. To add a target description, put a -# comment in the Makefile with the format "## <TARGET>: <DESCRIPTION>". For example: -# -## list: Print all targets and their descriptions (if provided) -.PHONY: list -list: - $(Q)TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \ - | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \ - | $(EGREP) -v -e '^[^[:alnum:]]' | sort); \ - { \ - printf "Target Name\tDescription\n"; \ - printf "%0.s-" {1..16}; printf "\t"; printf "%0.s-" {1..40}; printf "\n"; \ - for target in $$TARGETS; do \ - line=$$($(EGREP) "^##[[:space:]]+$$target:" $(lastword $(MAKEFILE_LIST))); \ - description=$$(echo $$line | awk '{i=index($$0,":"); print substr($$0,i+1)}' | xargs); \ - printf "$$target\t$$description\n"; \ - done \ - } | column -t -s $$'\t' - -.PHONY: install armtest usan asan uasan msan asan32 -install: - $(Q)$(MAKE) -C $(ZSTDDIR) $@ - $(Q)$(MAKE) -C $(PRGDIR) $@ - -.PHONY: uninstall -uninstall: - $(Q)$(MAKE) -C $(ZSTDDIR) $@ - $(Q)$(MAKE) -C $(PRGDIR) $@ - -.PHONY: travis-install -travis-install: - $(MAKE) install PREFIX=~/install_test_dir - -.PHONY: clangbuild-darwin-fat -clangbuild-darwin-fat: clean - clang -v - CXX=clang++ CC=clang CFLAGS+="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch arm64" $(MAKE) zstd-release - mv programs/zstd programs/zstd_arm64 - CXX=clang++ CC=clang CFLAGS+="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation -arch x86_64" $(MAKE) zstd-release - mv programs/zstd programs/zstd_x64 - lipo -create programs/zstd_x64 programs/zstd_arm64 -output programs/zstd - -.PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build -gcc5build: clean - gcc-5 -v - CC=gcc-5 $(MAKE) all MOREFLAGS="-Werror $(MOREFLAGS)" - -gcc6build: clean - gcc-6 -v - CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror $(MOREFLAGS)" - -gcc7build: clean - gcc-7 -v - CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror $(MOREFLAGS)" - -clangbuild: clean - clang -v - CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" $(MAKE) all - -m32build: clean - gcc -v - $(MAKE) all32 - -armbuild: clean - CC=arm-linux-gnueabi-gcc CFLAGS="-Werror" $(MAKE) allzstd - -aarch64build: clean - CC=aarch64-linux-gnu-gcc CFLAGS="-Werror -O0" $(MAKE) allzstd - -ppcbuild: clean - CC=powerpc-linux-gnu-gcc CFLAGS="-m32 -Wno-attributes -Werror" $(MAKE) -j allzstd - -ppc64build: clean - CC=powerpc-linux-gnu-gcc CFLAGS="-m64 -Werror" $(MAKE) -j allzstd - -.PHONY: armfuzz aarch64fuzz ppcfuzz ppc64fuzz -armfuzz: clean - CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static MOREFLAGS="-static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest - -aarch64fuzz: clean - ld -v - CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static MOREFLAGS="-static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest - -ppcfuzz: clean - CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static MOREFLAGS="-static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest - -ppc64fuzz: clean - CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) fuzztest - -.PHONY: cxxtest gcc5test gcc6test armtest aarch64test ppctest ppc64test -cxxtest: CXXFLAGS += -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror -cxxtest: clean - $(MAKE) -C $(PRGDIR) all CC="$(CXX) -Wno-deprecated" CFLAGS="$(CXXFLAGS)" # adding -Wno-deprecated to avoid clang++ warning on dealing with C files directly - -gcc5test: clean - gcc-5 -v - $(MAKE) all CC=gcc-5 MOREFLAGS="-Werror $(MOREFLAGS)" - -gcc6test: clean - gcc-6 -v - $(MAKE) all CC=gcc-6 MOREFLAGS="-Werror $(MOREFLAGS)" - -armtest: clean - $(MAKE) -C $(TESTDIR) datagen # use native, faster - $(MAKE) -C $(TESTDIR) test CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" - -aarch64test: - $(MAKE) -C $(TESTDIR) datagen # use native, faster - $(MAKE) -C $(TESTDIR) test CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" - -ppctest: clean - $(MAKE) -C $(TESTDIR) datagen # use native, faster - $(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" - -ppc64test: clean - $(MAKE) -C $(TESTDIR) datagen # use native, faster - $(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" - -.PHONY: arm-ppc-compilation -arm-ppc-compilation: - $(MAKE) -C $(PRGDIR) clean zstd CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)" - $(MAKE) -C $(PRGDIR) clean zstd CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static $(MOREFLAGS)" - $(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static $(MOREFLAGS)" - $(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static $(MOREFLAGS)" - -regressiontest: - $(MAKE) -C $(FUZZDIR) regressiontest - -uasanregressiontest: - $(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined -Werror" CXXFLAGS="-O3 -fsanitize=address,undefined -Werror" - -msanregressiontest: - $(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory -Werror" CXXFLAGS="-O3 -fsanitize=memory -Werror" - -update_regressionResults : REGRESS_RESULTS_DIR := /tmp/regress_results_dir/ -update_regressionResults: - $(MAKE) -j -C programs zstd - $(MAKE) -j -C tests/regression test - $(RM) -r $(REGRESS_RESULTS_DIR) - $(MKDIR) $(REGRESS_RESULTS_DIR) - ./tests/regression/test \ - --cache tests/regression/cache \ - --output $(REGRESS_RESULTS_DIR)/results.csv \ - --zstd programs/zstd - echo "Showing results differences" - ! diff tests/regression/results.csv $(REGRESS_RESULTS_DIR)/results.csv - echo "Updating results.csv" - $(CP) $(REGRESS_RESULTS_DIR)/results.csv tests/regression/results.csv - - -# run UBsan with -fsanitize-recover=pointer-overflow -# this only works with recent compilers such as gcc 8+ -usan: clean - $(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=undefined -Werror $(MOREFLAGS)" - -asan: clean - $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror $(MOREFLAGS)" - -asan-%: clean - LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $* - -msan: clean - $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason - -msan-%: - $(MAKE) clean - LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -j -C $(TESTDIR) HAVE_LZMA=0 $* - -asan32: clean - $(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)" - -uasan: clean - $(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)" - -uasan-%: clean - LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address,undefined -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $* - -tsan-%: clean - LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror $(MOREFLAGS)" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" - -.PHONY: apt-install -apt-install: - # TODO: uncomment once issue 3011 is resolved and remove hack from Github Actions .yml - # sudo apt-get update - sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install $(APT_PACKAGES) - -.PHONY: apt-add-repo -apt-add-repo: - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get update -y -qq - -.PHONY: ppcinstall arminstall valgrindinstall libc6install gcc6install gcc7install gcc8install gpp6install clang38install lz4install -ppcinstall: - APT_PACKAGES="qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu" $(MAKE) apt-install - -arminstall: - APT_PACKAGES="qemu-system-arm qemu-user-static gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross" $(MAKE) apt-install - -valgrindinstall: - APT_PACKAGES="valgrind" $(MAKE) apt-install - -libc6install: - APT_PACKAGES="libc6-dev-i386 gcc-multilib" $(MAKE) apt-install - -gcc6install: apt-add-repo - APT_PACKAGES="libc6-dev-i386 gcc-multilib gcc-6 gcc-6-multilib" $(MAKE) apt-install - -gcc7install: apt-add-repo - APT_PACKAGES="libc6-dev-i386 gcc-multilib gcc-7 gcc-7-multilib" $(MAKE) apt-install - -gcc8install: apt-add-repo - APT_PACKAGES="libc6-dev-i386 gcc-multilib gcc-8 gcc-8-multilib" $(MAKE) apt-install - -gpp6install: apt-add-repo - APT_PACKAGES="libc6-dev-i386 g++-multilib gcc-6 g++-6 g++-6-multilib" $(MAKE) apt-install - -clang38install: - APT_PACKAGES="clang-3.8" $(MAKE) apt-install - -# Ubuntu 14.04 ships a too-old lz4 -lz4install: - [ -e lz4 ] || git clone https://github.com/lz4/lz4 && sudo $(MAKE) -C lz4 install - -endif - - -ifneq (,$(filter MSYS%,$(shell sh -c 'MSYSTEM="MSYS" uname') )) -HOST_OS = MSYS -endif - -#------------------------------------------------------------------------ -# target specific tests -#------------------------------------------------------------------------ -ifneq (,$(filter MSYS POSIX,$(HOST_OS))) - -CMAKE ?= cmake -CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON - -ifneq (,$(filter MSYS%,$(shell sh -c 'MSYSTEM="MSYS" uname'))) -CMAKE_PARAMS = -G"MSYS Makefiles" -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -endif - -.PHONY: cmakebuild -cmakebuild: - $(CMAKE) --version - $(RM) -r cmakebuild install - $(MKDIR) cmakebuild install - cd cmakebuild; $(CMAKE) -Wdev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-Werror -O0" -DCMAKE_INSTALL_PREFIX=install $(CMAKE_PARAMS) ../build/cmake - $(CMAKE) --build cmakebuild --target install -- -j V=1 - cd cmakebuild; ctest -V -L Medium - -MESON ?= meson -NINJA ?= ninja - -.PHONY: mesonbuild -mesonbuild: - $(MESON) setup \ - --buildtype=debugoptimized \ - -Db_lundef=false \ - -Dauto_features=enabled \ - -Dbin_programs=true \ - -Dbin_tests=true \ - -Dbin_contrib=true \ - -Ddefault_library=both \ - build/meson mesonbuild - $(NINJA) -C mesonbuild/ - $(MESON) test -C mesonbuild/ --print-errorlogs - $(MESON) install -C mesonbuild --destdir staging/ - -.PHONY: c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze -c89build: clean - $(CC) -v - CFLAGS="-std=c89 -Werror -Wno-attributes -Wpedantic -Wno-long-long -Wno-variadic-macros -O0" $(MAKE) lib zstd - -gnu90build: clean - $(CC) -v - CFLAGS="-std=gnu90 -Werror -O0" $(MAKE) allmost - -c99build: clean - $(CC) -v - CFLAGS="-std=c99 -Werror -O0" $(MAKE) allmost - -gnu99build: clean - $(CC) -v - CFLAGS="-std=gnu99 -Werror -O0" $(MAKE) allmost - -c11build: clean - $(CC) -v - CFLAGS="-std=c11 -Werror -O0" $(MAKE) allmost - -bmix64build: clean - $(CC) -v - CFLAGS="-O3 -mbmi -Werror" $(MAKE) -C $(TESTDIR) test - -bmix32build: clean - $(CC) -v - CFLAGS="-O3 -mbmi -mx32 -Werror" $(MAKE) -C $(TESTDIR) test - -bmi32build: clean - $(CC) -v - CFLAGS="-O3 -mbmi -m32 -Werror" $(MAKE) -C $(TESTDIR) test - -# static analyzer test uses clang's scan-build -# does not analyze zlibWrapper, due to detected issues in zlib source code -staticAnalyze: SCANBUILD ?= scan-build -staticAnalyze: - $(CC) -v - CC=$(CC) CPPFLAGS=-g $(SCANBUILD) --status-bugs -v $(MAKE) zstd -endif diff --git a/Package.swift b/Package.swift deleted file mode 100644 index 97f2c6a5251e..000000000000 --- a/Package.swift +++ /dev/null @@ -1,36 +0,0 @@ -// swift-tools-version:5.0 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "zstd", - platforms: [ - .macOS(.v10_10), .iOS(.v9), .tvOS(.v9) - ], - products: [ - // Products define the executables and libraries a package produces, and make them visible to other packages. - .library( - name: "libzstd", - targets: [ "libzstd" ]) - ], - dependencies: [ - // Dependencies declare other packages that this package depends on. - // .package(url: /* package url */, from: "1.0.0"), - ], - targets: [ - // Targets are the basic building blocks of a package. A target can define a module or a test suite. - // Targets can depend on other targets in this package, and on products in packages this package depends on. - .target( - name: "libzstd", - path: "lib", - sources: [ "common", "compress", "decompress", "dictBuilder" ], - publicHeadersPath: ".", - cSettings: [ - .headerSearchPath(".") - ]) - ], - swiftLanguageVersions: [.v5], - cLanguageStandard: .gnu11, - cxxLanguageStandard: .gnucxx14 -) diff --git a/TESTING.md b/TESTING.md deleted file mode 100644 index df842cc94119..000000000000 --- a/TESTING.md +++ /dev/null @@ -1,43 +0,0 @@ -Testing -======= - -Zstandard CI testing is split up into three sections: -short, medium, and long tests. - -Short Tests ------------ -Short tests run on CircleCI for new commits on every branch and pull request. -They consist of the following tests: -- Compilation on all supported targets (x86, x86_64, ARM, AArch64, PowerPC, and PowerPC64) -- Compilation on various versions of gcc, clang, and g++ -- `tests/playTests.sh` on x86_64, without the tests on long data (CLI tests) -- Small tests (`tests/legacy.c`, `tests/longmatch.c`) on x64_64 - -Medium Tests ------------- -Medium tests run on every commit and pull request to `dev` branch, on TravisCI. -They consist of the following tests: -- The following tests run with UBsan and Asan on x86_64 and x86, as well as with - Msan on x86_64 - - `tests/playTests.sh --test-large-data` - - Fuzzer tests: `tests/fuzzer.c`, `tests/zstreamtest.c`, and `tests/decodecorpus.c` -- `tests/zstreamtest.c` under Tsan (streaming mode, including multithreaded mode) -- Valgrind Test (`make -C tests test-valgrind`) (testing CLI and fuzzer under `valgrind`) -- Fuzzer tests (see above) on ARM, AArch64, PowerPC, and PowerPC64 - -Long Tests ----------- -Long tests run on all commits to `release` branch, -and once a day on the current version of `dev` branch, -on TravisCI. -They consist of the following tests: -- Entire test suite (including fuzzers and some other specialized tests) on: - - x86_64 and x86 with UBsan and Asan - - x86_64 with Msan - - ARM, AArch64, PowerPC, and PowerPC64 -- Streaming mode fuzzer with Tsan (for the `zstdmt` testing) -- ZlibWrapper tests, including under valgrind -- Versions test (ensuring `zstd` can decode files from all previous versions) -- `pzstd` with asan and tsan, as well as in 32-bits mode -- Testing `zstd` with legacy mode off -- Entire test suite and make install on macOS diff --git a/doc/README.md b/doc/README.md deleted file mode 100644 index 8f3babcdbb26..000000000000 --- a/doc/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Zstandard Documentation -======================= - -This directory contains material defining the Zstandard format, -as well as detailed instructions to use `zstd` library. - -__`zstd_manual.html`__ : Documentation of `zstd.h` API, in html format. -Unfortunately, Github doesn't display `html` files in parsed format, just as source code. -For a readable display of html API documentation of latest release, -use this link: [https://raw.githack.com/facebook/zstd/release/doc/zstd_manual.html](https://raw.githack.com/facebook/zstd/release/doc/zstd_manual.html) . - -__`zstd_compression_format.md`__ : This document defines the Zstandard compression format. -Compliant decoders must adhere to this document, -and compliant encoders must generate data that follows it. - -Should you look for resources to develop your own port of Zstandard algorithm, -you may find the following resources useful : - -__`educational_decoder`__ : This directory contains an implementation of a Zstandard decoder, -compliant with the Zstandard compression format. -It can be used, for example, to better understand the format, -or as the basis for a separate implementation of Zstandard decoder. - -[__`decode_corpus`__](https://github.com/facebook/zstd/tree/dev/tests#decodecorpus---tool-to-generate-zstandard-frames-for-decoder-testing) : -This tool, stored in `/tests` directory, is able to generate random valid frames, -which is useful if you wish to test your decoder and verify it fully supports the specification. diff --git a/doc/decompressor_errata.md b/doc/decompressor_errata.md deleted file mode 100644 index b570f73145d9..000000000000 --- a/doc/decompressor_errata.md +++ /dev/null @@ -1,148 +0,0 @@ -Decompressor Errata -=================== - -This document captures known decompressor bugs, where the decompressor rejects a valid zstd frame. -Each entry will contain: -1. The last affected decompressor versions. -2. The decompressor components affected. -2. Whether the compressed frame could ever be produced by the reference compressor. -3. An example frame (hexadecimal string when it can be short enough, link to golden file otherwise) -4. A description of the bug. - -The document is in reverse chronological order, with the bugs that affect the most recent zstd decompressor versions listed first. - - -No sequence using the 2-bytes format ------------------------------------------------- - -**Last affected version**: v1.5.5 - -**Affected decompressor component(s)**: Library & CLI - -**Produced by the reference compressor**: No - -**Example Frame**: see zstd/tests/golden-decompression/zeroSeq_2B.zst - -The zstd decoder incorrectly expects FSE tables when there are 0 sequences present in the block -if the value 0 is encoded using the 2-bytes format. -Instead, it should immediately end the sequence section, and move on to next block. - -This situation was never generated by the reference compressor, -because representing 0 sequences with the 2-bytes format is inefficient -(the 1-byte format is always used in this case). - - -Compressed block with a size of exactly 128 KB ------------------------------------------------- - -**Last affected version**: v1.5.2 - -**Affected decompressor component(s)**: Library & CLI - -**Produced by the reference compressor**: No - -**Example Frame**: see zstd/tests/golden-decompression/block-128k.zst - -The zstd decoder incorrectly rejected blocks of type `Compressed_Block` when their size was exactly 128 KB. -Note that `128 KB - 1` was accepted, and `128 KB + 1` is forbidden by the spec. - -This type of block was never generated by the reference compressor. - -These blocks used to be disallowed by the spec up until spec version 0.3.2 when the restriction was lifted by [PR#1689](https://github.com/facebook/zstd/pull/1689). - -> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block). - - -Compressed block with 0 literals and 0 sequences ------------------------------------------------- - -**Last affected version**: v1.5.2 - -**Affected decompressor component(s)**: Library & CLI - -**Produced by the reference compressor**: No - -**Example Frame**: `28b5 2ffd 2000 1500 0000 00` - -The zstd decoder incorrectly rejected blocks of type `Compressed_Block` that encodes literals as `Raw_Literals_Block` with no literals, and has no sequences. - -This type of block was never generated by the reference compressor. - -Additionally, these blocks were disallowed by the spec up until spec version 0.3.2 when the restriction was lifted by [PR#1689](https://github.com/facebook/zstd/pull/1689). - -> A Compressed_Block has the extra restriction that Block_Size is always strictly less than the decompressed size. If this condition cannot be respected, the block must be sent uncompressed instead (Raw_Block). - - -First block is RLE block ------------------------- - -**Last affected version**: v1.4.3 - -**Affected decompressor component(s)**: CLI only - -**Produced by the reference compressor**: No - -**Example Frame**: `28b5 2ffd a001 0002 0002 0010 000b 0000 00` - -The zstd CLI decompressor rejected cases where the first block was an RLE block whose `Block_Size` is 131072, and the frame contains more than one block. -This only affected the zstd CLI, and not the library. - -The example is an RLE block with 131072 bytes, followed by a second RLE block with 1 byte. - -The compressor currently works around this limitation by explicitly avoiding producing RLE blocks as the first -block. - -https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L3527-L3535 - - -Tiny FSE Table & Block ----------------------- - -**Last affected version**: v1.3.4 - -**Affected decompressor component(s)**: Library & CLI - -**Produced by the reference compressor**: Possibly until version v1.3.4, but probably never - -**Example Frame**: `28b5 2ffd 2027 c500 0080 f3f1 f0ec ebc6 c5c7 f09d 4300 0000 e0e0 0658 0100 603e 52` - -The zstd library rejected blocks of type `Compressed_Block` whose offset of the last table with type `FSE_Compressed_Mode` was less than 4 bytes from the end of the block. - -In more depth, let `Last_Table_Offset` be the offset in the compressed block (excluding the header) that -the last table with type `FSE_Compressed_Mode` started. If `Block_Content - Last_Table_Offset < 4` then -the buggy zstd decompressor would reject the block. This occurs when the last serialized table is 2 bytes -and the bitstream size is 1 byte. - -For example: -* There is 1 sequence in the block -* `Literals_Lengths_Mode` is `FSE_Compressed_Mode` & the serialized table size is 2 bytes -* `Offsets_Mode` is `Predefined_Mode` -* `Match_Lengths_Mode` is `Predefined_Mode` -* The bitstream is 1 byte. E.g. there is only one sequence and it fits in 1 byte. - -The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`. - -See the compressor workaround code: - -https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682 - -Magicless format ----------------------- - -**Last affected version**: v1.5.5 - -**Affected decompressor component(s)**: Library - -**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232) - -**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59` - -v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames. -These include but are not limited to: -* Valid frames that happen to begin with a legacy magic number (little-endian) -* Valid frames that happen to begin with a skippable magic number (little-endian) - -If you are affected by this issue and cannot update to v1.5.6 or later, there is a -workaround to recover affected data. Simply prepend the ZSTD magic number -`0xFD2FB528` (little-endian) to your data and decompress using the standard-format -decoder. diff --git a/doc/decompressor_permissive.md b/doc/decompressor_permissive.md deleted file mode 100644 index 164d6c86d61c..000000000000 --- a/doc/decompressor_permissive.md +++ /dev/null @@ -1,80 +0,0 @@ -Decompressor Permissiveness to Invalid Data -=========================================== - -This document describes the behavior of the reference decompressor in cases -where it accepts formally invalid data instead of reporting an error. - -While the reference decompressor *must* decode any compliant frame following -the specification, its ability to detect erroneous data is on a best effort -basis: the decoder may accept input data that would be formally invalid, -when it causes no risk to the decoder, and which detection would cost too much -complexity or speed regression. - -In practice, the vast majority of invalid data are detected, if only because -many corruption events are dangerous for the decoder process (such as -requesting an out-of-bound memory access) and many more are easy to check. - -This document lists a few known cases where invalid data was formerly accepted -by the decoder, and what has changed since. - - -Truncated Huffman states ------------------------- - -**Last affected version**: v1.5.6 - -**Produced by the reference compressor**: No - -**Example Frame**: `28b5 2ffd 0000 5500 0072 8001 0420 7e1f 02aa 00` - -When using FSE-compressed Huffman weights, the compressed weight bitstream -could contain fewer bits than necessary to decode the initial states. - -The reference decompressor up to v1.5.6 will decode truncated or missing -initial states as zero, which can result in a valid Huffman tree if only -the second state is truncated. - -In newer versions, truncated initial states are reported as a corruption -error by the decoder. - - -Offset == 0 ------------ - -**Last affected version**: v1.5.5 - -**Produced by the reference compressor**: No - -**Example Frame**: `28b5 2ffd 0000 4500 0008 0002 002f 430b ae` - -If a sequence is decoded with `literals_length = 0` and `offset_value = 3` -while `Repeated_Offset_1 = 1`, the computed offset will be `0`, which is -invalid. - -The reference decompressor up to v1.5.5 processes this case as if the computed -offset was `1`, including inserting `1` into the repeated offset list. -This prevents the output buffer from remaining uninitialized, thus denying a -potential attack vector from an untrusted source. -However, in the rare case where this scenario would be the outcome of a -transmission or storage error, the decoder relies on the checksum to detect -the error. - -In newer versions, this case is always detected and reported as a corruption error. - - -Non-zeroes reserved bits ------------------------- - -**Last affected version**: v1.5.5 - -**Produced by the reference compressor**: No - -The Sequences section of each block has a header, and one of its elements is a -byte, which describes the compression mode of each symbol. -This byte contains 2 reserved bits which must be set to zero. - -The reference decompressor up to v1.5.5 just ignores these 2 bits. -This behavior has no consequence for the rest of the frame decoding process. - -In newer versions, the 2 reserved bits are actively checked for value zero, -and the decoder reports a corruption error if they are not. diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile deleted file mode 100644 index b3154d9afc0f..000000000000 --- a/doc/educational_decoder/Makefile +++ /dev/null @@ -1,62 +0,0 @@ -# ################################################################ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# You may select, at your option, one of the above-listed licenses. -# ################################################################ - -ZSTD ?= zstd # note: requires zstd installation on local system - -UNAME?= $(shell sh -c 'MSYSTEM="MSYS" uname') -ifeq ($(UNAME), SunOS) -DIFF ?= gdiff -else -DIFF ?= diff -endif - -HARNESS_FILES=*.c - -MULTITHREAD_LDFLAGS = -pthread -DEBUGFLAGS= -g -DZSTD_DEBUG=1 -CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ - -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) -CFLAGS ?= -O2 -CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ - -Wstrict-aliasing=1 -Wswitch-enum \ - -Wredundant-decls -Wstrict-prototypes -Wundef \ - -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ - -std=c99 -CFLAGS += $(DEBUGFLAGS) -CFLAGS += $(MOREFLAGS) -FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS) - -harness: $(HARNESS_FILES) - $(CC) $(FLAGS) $^ -o $@ - -clean: - @$(RM) harness *.o - @$(RM) -rf harness.dSYM # MacOS specific - -test: harness - # - # Testing single-file decompression with educational decoder - # - @$(ZSTD) -f README.md -o tmp.zst - @./harness tmp.zst tmp - @$(DIFF) -s tmp README.md - @$(RM) tmp* - # - # Testing dictionary decompression with education decoder - # - # note : files are presented multiple for training, to reach minimum threshold - @$(ZSTD) --train harness.c zstd_decompress.c zstd_decompress.h README.md \ - harness.c zstd_decompress.c zstd_decompress.h README.md \ - harness.c zstd_decompress.c zstd_decompress.h README.md \ - -o dictionary - @$(ZSTD) -f README.md -D dictionary -o tmp.zst - @./harness tmp.zst tmp dictionary - @$(DIFF) -s tmp README.md - @$(RM) tmp* dictionary diff --git a/doc/educational_decoder/README.md b/doc/educational_decoder/README.md deleted file mode 100644 index c89451ca0784..000000000000 --- a/doc/educational_decoder/README.md +++ /dev/null @@ -1,36 +0,0 @@ -Educational Decoder -=================== - -`zstd_decompress.c` is a self-contained implementation in C99 of a decoder, -according to the [Zstandard format specification]. -While it does not implement as many features as the reference decoder, -such as the streaming API or content checksums, it is written to be easy to -follow and understand, to help understand how the Zstandard format works. -It's laid out to match the [format specification], -so it can be used to understand how complex segments could be implemented. -It also contains implementations of Huffman and FSE table decoding. - -[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md -[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md - -While the library's primary objective is code clarity, -it also happens to compile into a small object file. -The object file can be made even smaller by removing error messages, -using the macro directive `ZDEC_NO_MESSAGE` at compilation time. -This can be reduced even further by foregoing dictionary support, -by defining `ZDEC_NO_DICTIONARY`. - -`harness.c` provides a simple test harness around the decoder: - - harness <input-file> <output-file> [dictionary] - -As an additional resource to be used with this decoder, -see the `decodecorpus` tool in the [tests] directory. -It generates valid Zstandard frames that can be used to verify -a Zstandard decoder implementation. -Note that to use the tool to verify this decoder implementation, -the --content-size flag should be set, -as this decoder does not handle streaming decoding, -and so it must know the decompressed size in advance. - -[tests]: https://github.com/facebook/zstd/blob/dev/tests/ diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c deleted file mode 100644 index 12c5a801bebd..000000000000 --- a/doc/educational_decoder/harness.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include <stdio.h> -#include <stdlib.h> - -#include "zstd_decompress.h" - -typedef unsigned char u8; - -// If the data doesn't have decompressed size with it, fallback on assuming the -// compression ratio is at most 16 -#define MAX_COMPRESSION_RATIO (16) - -// Protect against allocating too much memory for output -#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024) - -// Error message then exit -#define ERR_OUT(...) { fprintf(stderr, __VA_ARGS__); exit(1); } - - -typedef struct { - u8* address; - size_t size; -} buffer_s; - -static void freeBuffer(buffer_s b) { free(b.address); } - -static buffer_s read_file(const char *path) -{ - FILE* const f = fopen(path, "rb"); - if (!f) ERR_OUT("failed to open file %s \n", path); - - fseek(f, 0L, SEEK_END); - size_t const size = (size_t)ftell(f); - rewind(f); - - void* const ptr = malloc(size); - if (!ptr) ERR_OUT("failed to allocate memory to hold %s \n", path); - - size_t const read = fread(ptr, 1, size, f); - if (read != size) ERR_OUT("error while reading file %s \n", path); - - fclose(f); - buffer_s const b = { ptr, size }; - return b; -} - -static void write_file(const char* path, const u8* ptr, size_t size) -{ - FILE* const f = fopen(path, "wb"); - if (!f) ERR_OUT("failed to open file %s \n", path); - - size_t written = 0; - while (written < size) { - written += fwrite(ptr+written, 1, size, f); - if (ferror(f)) ERR_OUT("error while writing file %s\n", path); - } - - fclose(f); -} - -int main(int argc, char **argv) -{ - if (argc < 3) - ERR_OUT("usage: %s <file.zst> <out_path> [dictionary] \n", argv[0]); - - buffer_s const input = read_file(argv[1]); - - buffer_s dict = { NULL, 0 }; - if (argc >= 4) { - dict = read_file(argv[3]); - } - - size_t out_capacity = ZSTD_get_decompressed_size(input.address, input.size); - if (out_capacity == (size_t)-1) { - out_capacity = MAX_COMPRESSION_RATIO * input.size; - fprintf(stderr, "WARNING: Compressed data does not contain " - "decompressed size, going to assume the compression " - "ratio is at most %d (decompressed size of at most " - "%u) \n", - MAX_COMPRESSION_RATIO, (unsigned)out_capacity); - } - if (out_capacity > MAX_OUTPUT_SIZE) - ERR_OUT("Required output size too large for this implementation \n"); - - u8* const output = malloc(out_capacity); - if (!output) ERR_OUT("failed to allocate memory \n"); - - dictionary_t* const parsed_dict = create_dictionary(); - if (dict.size) { -#if defined (ZDEC_NO_DICTIONARY) - printf("dict.size = %zu \n", dict.size); - ERR_OUT("no dictionary support \n"); -#else - parse_dictionary(parsed_dict, dict.address, dict.size); -#endif - } - size_t const decompressed_size = - ZSTD_decompress_with_dict(output, out_capacity, - input.address, input.size, - parsed_dict); - - free_dictionary(parsed_dict); - - write_file(argv[2], output, decompressed_size); - - freeBuffer(input); - freeBuffer(dict); - free(output); - return 0; -} diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c deleted file mode 100644 index 839e085b4819..000000000000 --- a/doc/educational_decoder/zstd_decompress.c +++ /dev/null @@ -1,2323 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/// Zstandard educational decoder implementation -/// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md - -#include <stdint.h> // uint8_t, etc. -#include <stdlib.h> // malloc, free, exit -#include <stdio.h> // fprintf -#include <string.h> // memset, memcpy -#include "zstd_decompress.h" - - -/******* IMPORTANT CONSTANTS *********************************************/ - -// Zstandard frame -// "Magic_Number -// 4 Bytes, little-endian format. Value : 0xFD2FB528" -#define ZSTD_MAGIC_NUMBER 0xFD2FB528U - -// The size of `Block_Content` is limited by `Block_Maximum_Size`, -#define ZSTD_BLOCK_SIZE_MAX ((size_t)128 * 1024) - -// literal blocks can't be larger than their block -#define MAX_LITERALS_SIZE ZSTD_BLOCK_SIZE_MAX - - -/******* UTILITY MACROS AND TYPES *********************************************/ -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -#if defined(ZDEC_NO_MESSAGE) -#define MESSAGE(...) -#else -#define MESSAGE(...) fprintf(stderr, "" __VA_ARGS__) -#endif - -/// This decoder calls exit(1) when it encounters an error, however a production -/// library should propagate error codes -#define ERROR(s) \ - do { \ - MESSAGE("Error: %s\n", s); \ - exit(1); \ - } while (0) -#define INP_SIZE() \ - ERROR("Input buffer smaller than it should be or input is " \ - "corrupted") -#define OUT_SIZE() ERROR("Output buffer too small for output") -#define CORRUPTION() ERROR("Corruption detected while decompressing") -#define BAD_ALLOC() ERROR("Memory allocation error") -#define IMPOSSIBLE() ERROR("An impossibility has occurred") - -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; - -typedef int8_t i8; -typedef int16_t i16; -typedef int32_t i32; -typedef int64_t i64; -/******* END UTILITY MACROS AND TYPES *****************************************/ - -/******* IMPLEMENTATION PRIMITIVE PROTOTYPES **********************************/ -/// The implementations for these functions can be found at the bottom of this -/// file. They implement low-level functionality needed for the higher level -/// decompression functions. - -/*** IO STREAM OPERATIONS *************/ - -/// ostream_t/istream_t are used to wrap the pointers/length data passed into -/// ZSTD_decompress, so that all IO operations are safely bounds checked -/// They are written/read forward, and reads are treated as little-endian -/// They should be used opaquely to ensure safety -typedef struct { - u8 *ptr; - size_t len; -} ostream_t; - -typedef struct { - const u8 *ptr; - size_t len; - - // Input often reads a few bits at a time, so maintain an internal offset - int bit_offset; -} istream_t; - -/// The following two functions are the only ones that allow the istream to be -/// non-byte aligned - -/// Reads `num` bits from a bitstream, and updates the internal offset -static inline u64 IO_read_bits(istream_t *const in, const int num_bits); -/// Backs-up the stream by `num` bits so they can be read again -static inline void IO_rewind_bits(istream_t *const in, const int num_bits); -/// If the remaining bits in a byte will be unused, advance to the end of the -/// byte -static inline void IO_align_stream(istream_t *const in); - -/// Write the given byte into the output stream -static inline void IO_write_byte(ostream_t *const out, u8 symb); - -/// Returns the number of bytes left to be read in this stream. The stream must -/// be byte aligned. -static inline size_t IO_istream_len(const istream_t *const in); - -/// Advances the stream by `len` bytes, and returns a pointer to the chunk that -/// was skipped. The stream must be byte aligned. -static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len); -/// Advances the stream by `len` bytes, and returns a pointer to the chunk that -/// was skipped so it can be written to. -static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len); - -/// Advance the inner state by `len` bytes. The stream must be byte aligned. -static inline void IO_advance_input(istream_t *const in, size_t len); - -/// Returns an `ostream_t` constructed from the given pointer and length. -static inline ostream_t IO_make_ostream(u8 *out, size_t len); -/// Returns an `istream_t` constructed from the given pointer and length. -static inline istream_t IO_make_istream(const u8 *in, size_t len); - -/// Returns an `istream_t` with the same base as `in`, and length `len`. -/// Then, advance `in` to account for the consumed bytes. -/// `in` must be byte aligned. -static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len); -/*** END IO STREAM OPERATIONS *********/ - -/*** BITSTREAM OPERATIONS *************/ -/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits, -/// and return them interpreted as a little-endian unsigned integer. -static inline u64 read_bits_LE(const u8 *src, const int num_bits, - const size_t offset); - -/// Read bits from the end of a HUF or FSE bitstream. `offset` is in bits, so -/// it updates `offset` to `offset - bits`, and then reads `bits` bits from -/// `src + offset`. If the offset becomes negative, the extra bits at the -/// bottom are filled in with `0` bits instead of reading from before `src`. -static inline u64 STREAM_read_bits(const u8 *src, const int bits, - i64 *const offset); -/*** END BITSTREAM OPERATIONS *********/ - -/*** BIT COUNTING OPERATIONS **********/ -/// Returns the index of the highest set bit in `num`, or `-1` if `num == 0` -static inline int highest_set_bit(const u64 num); -/*** END BIT COUNTING OPERATIONS ******/ - -/*** HUFFMAN PRIMITIVES ***************/ -// Table decode method uses exponential memory, so we need to limit depth -#define HUF_MAX_BITS (16) - -// Limit the maximum number of symbols to 256 so we can store a symbol in a byte -#define HUF_MAX_SYMBS (256) - -/// Structure containing all tables necessary for efficient Huffman decoding -typedef struct { - u8 *symbols; - u8 *num_bits; - int max_bits; -} HUF_dtable; - -/// Decode a single symbol and read in enough bits to refresh the state -static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset); -/// Read in a full state's worth of bits to initialize it -static inline void HUF_init_state(const HUF_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset); - -/// Decompresses a single Huffman stream, returns the number of bytes decoded. -/// `src_len` must be the exact length of the Huffman-coded block. -static size_t HUF_decompress_1stream(const HUF_dtable *const dtable, - ostream_t *const out, istream_t *const in); -/// Same as previous but decodes 4 streams, formatted as in the Zstandard -/// specification. -/// `src_len` must be the exact length of the Huffman-coded block. -static size_t HUF_decompress_4stream(const HUF_dtable *const dtable, - ostream_t *const out, istream_t *const in); - -/// Initialize a Huffman decoding table using the table of bit counts provided -static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits, - const int num_symbs); -/// Initialize a Huffman decoding table using the table of weights provided -/// Weights follow the definition provided in the Zstandard specification -static void HUF_init_dtable_usingweights(HUF_dtable *const table, - const u8 *const weights, - const int num_symbs); - -/// Free the malloc'ed parts of a decoding table -static void HUF_free_dtable(HUF_dtable *const dtable); -/*** END HUFFMAN PRIMITIVES ***********/ - -/*** FSE PRIMITIVES *******************/ -/// For more description of FSE see -/// https://github.com/Cyan4973/FiniteStateEntropy/ - -// FSE table decoding uses exponential memory, so limit the maximum accuracy -#define FSE_MAX_ACCURACY_LOG (15) -// Limit the maximum number of symbols so they can be stored in a single byte -#define FSE_MAX_SYMBS (256) - -/// The tables needed to decode FSE encoded streams -typedef struct { - u8 *symbols; - u8 *num_bits; - u16 *new_state_base; - int accuracy_log; -} FSE_dtable; - -/// Return the symbol for the current state -static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable, - const u16 state); -/// Read the number of bits necessary to update state, update, and shift offset -/// back to reflect the bits read -static inline void FSE_update_state(const FSE_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset); - -/// Combine peek and update: decode a symbol and update the state -static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset); - -/// Read bits from the stream to initialize the state and shift offset back -static inline void FSE_init_state(const FSE_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset); - -/// Decompress two interleaved bitstreams (e.g. compressed Huffman weights) -/// using an FSE decoding table. `src_len` must be the exact length of the -/// block. -static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable, - ostream_t *const out, - istream_t *const in); - -/// Initialize a decoding table using normalized frequencies. -static void FSE_init_dtable(FSE_dtable *const dtable, - const i16 *const norm_freqs, const int num_symbs, - const int accuracy_log); - -/// Decode an FSE header as defined in the Zstandard format specification and -/// use the decoded frequencies to initialize a decoding table. -static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in, - const int max_accuracy_log); - -/// Initialize an FSE table that will always return the same symbol and consume -/// 0 bits per symbol, to be used for RLE mode in sequence commands -static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb); - -/// Free the malloc'ed parts of a decoding table -static void FSE_free_dtable(FSE_dtable *const dtable); -/*** END FSE PRIMITIVES ***************/ - -/******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/ - -/******* ZSTD HELPER STRUCTS AND PROTOTYPES ***********************************/ - -/// A small structure that can be reused in various places that need to access -/// frame header information -typedef struct { - // The size of window that we need to be able to contiguously store for - // references - size_t window_size; - // The total output size of this compressed frame - size_t frame_content_size; - - // The dictionary id if this frame uses one - u32 dictionary_id; - - // Whether or not the content of this frame has a checksum - int content_checksum_flag; - // Whether or not the output for this frame is in a single segment - int single_segment_flag; -} frame_header_t; - -/// The context needed to decode blocks in a frame -typedef struct { - frame_header_t header; - - // The total amount of data available for backreferences, to determine if an - // offset too large to be correct - size_t current_total_output; - - const u8 *dict_content; - size_t dict_content_len; - - // Entropy encoding tables so they can be repeated by future blocks instead - // of retransmitting - HUF_dtable literals_dtable; - FSE_dtable ll_dtable; - FSE_dtable ml_dtable; - FSE_dtable of_dtable; - - // The last 3 offsets for the special "repeat offsets". - u64 previous_offsets[3]; -} frame_context_t; - -/// The decoded contents of a dictionary so that it doesn't have to be repeated -/// for each frame that uses it -struct dictionary_s { - // Entropy tables - HUF_dtable literals_dtable; - FSE_dtable ll_dtable; - FSE_dtable ml_dtable; - FSE_dtable of_dtable; - - // Raw content for backreferences - u8 *content; - size_t content_size; - - // Offset history to prepopulate the frame's history - u64 previous_offsets[3]; - - u32 dictionary_id; -}; - -/// A tuple containing the parts necessary to decode and execute a ZSTD sequence -/// command -typedef struct { - u32 literal_length; - u32 match_length; - u32 offset; -} sequence_command_t; - -/// The decoder works top-down, starting at the high level like Zstd frames, and -/// working down to lower more technical levels such as blocks, literals, and -/// sequences. The high-level functions roughly follow the outline of the -/// format specification: -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md - -/// Before the implementation of each high-level function declared here, the -/// prototypes for their helper functions are defined and explained - -/// Decode a single Zstd frame, or error if the input is not a valid frame. -/// Accepts a dict argument, which may be NULL indicating no dictionary. -/// See -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame-concatenation -static void decode_frame(ostream_t *const out, istream_t *const in, - const dictionary_t *const dict); - -// Decode data in a compressed block -static void decompress_block(frame_context_t *const ctx, ostream_t *const out, - istream_t *const in); - -// Decode the literals section of a block -static size_t decode_literals(frame_context_t *const ctx, istream_t *const in, - u8 **const literals); - -// Decode the sequences part of a block -static size_t decode_sequences(frame_context_t *const ctx, istream_t *const in, - sequence_command_t **const sequences); - -// Execute the decoded sequences on the literals block -static void execute_sequences(frame_context_t *const ctx, ostream_t *const out, - const u8 *const literals, - const size_t literals_len, - const sequence_command_t *const sequences, - const size_t num_sequences); - -// Copies literals and returns the total literal length that was copied -static u32 copy_literals(const size_t seq, istream_t *litstream, - ostream_t *const out); - -// Given an offset code from a sequence command (either an actual offset value -// or an index for previous offset), computes the correct offset and updates -// the offset history -static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist); - -// Given an offset, match length, and total output, as well as the frame -// context for the dictionary, determines if the dictionary is used and -// executes the copy operation -static void execute_match_copy(frame_context_t *const ctx, size_t offset, - size_t match_length, size_t total_output, - ostream_t *const out); - -/******* END ZSTD HELPER STRUCTS AND PROTOTYPES *******************************/ - -size_t ZSTD_decompress(void *const dst, const size_t dst_len, - const void *const src, const size_t src_len) { - dictionary_t* const uninit_dict = create_dictionary(); - size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src, - src_len, uninit_dict); - free_dictionary(uninit_dict); - return decomp_size; -} - -size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len, - const void *const src, const size_t src_len, - dictionary_t* parsed_dict) { - - istream_t in = IO_make_istream(src, src_len); - ostream_t out = IO_make_ostream(dst, dst_len); - - // "A content compressed by Zstandard is transformed into a Zstandard frame. - // Multiple frames can be appended into a single file or stream. A frame is - // totally independent, has a defined beginning and end, and a set of - // parameters which tells the decoder how to decompress it." - - /* this decoder assumes decompression of a single frame */ - decode_frame(&out, &in, parsed_dict); - - return (size_t)(out.ptr - (u8 *)dst); -} - -/******* FRAME DECODING ******************************************************/ - -static void decode_data_frame(ostream_t *const out, istream_t *const in, - const dictionary_t *const dict); -static void init_frame_context(frame_context_t *const context, - istream_t *const in, - const dictionary_t *const dict); -static void free_frame_context(frame_context_t *const context); -static void parse_frame_header(frame_header_t *const header, - istream_t *const in); -static void frame_context_apply_dict(frame_context_t *const ctx, - const dictionary_t *const dict); - -static void decompress_data(frame_context_t *const ctx, ostream_t *const out, - istream_t *const in); - -static void decode_frame(ostream_t *const out, istream_t *const in, - const dictionary_t *const dict) { - const u32 magic_number = (u32)IO_read_bits(in, 32); - if (magic_number == ZSTD_MAGIC_NUMBER) { - // ZSTD frame - decode_data_frame(out, in, dict); - - return; - } - - // not a real frame or a skippable frame - ERROR("Tried to decode non-ZSTD frame"); -} - -/// Decode a frame that contains compressed data. Not all frames do as there -/// are skippable frames. -/// See -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#general-structure-of-zstandard-frame-format -static void decode_data_frame(ostream_t *const out, istream_t *const in, - const dictionary_t *const dict) { - frame_context_t ctx; - - // Initialize the context that needs to be carried from block to block - init_frame_context(&ctx, in, dict); - - if (ctx.header.frame_content_size != 0 && - ctx.header.frame_content_size > out->len) { - OUT_SIZE(); - } - - decompress_data(&ctx, out, in); - - free_frame_context(&ctx); -} - -/// Takes the information provided in the header and dictionary, and initializes -/// the context for this frame -static void init_frame_context(frame_context_t *const context, - istream_t *const in, - const dictionary_t *const dict) { - // Most fields in context are correct when initialized to 0 - memset(context, 0, sizeof(frame_context_t)); - - // Parse data from the frame header - parse_frame_header(&context->header, in); - - // Set up the offset history for the repeat offset commands - context->previous_offsets[0] = 1; - context->previous_offsets[1] = 4; - context->previous_offsets[2] = 8; - - // Apply details from the dict if it exists - frame_context_apply_dict(context, dict); -} - -static void free_frame_context(frame_context_t *const context) { - HUF_free_dtable(&context->literals_dtable); - - FSE_free_dtable(&context->ll_dtable); - FSE_free_dtable(&context->ml_dtable); - FSE_free_dtable(&context->of_dtable); - - memset(context, 0, sizeof(frame_context_t)); -} - -static void parse_frame_header(frame_header_t *const header, - istream_t *const in) { - // "The first header's byte is called the Frame_Header_Descriptor. It tells - // which other fields are present. Decoding this byte is enough to tell the - // size of Frame_Header. - // - // Bit number Field name - // 7-6 Frame_Content_Size_flag - // 5 Single_Segment_flag - // 4 Unused_bit - // 3 Reserved_bit - // 2 Content_Checksum_flag - // 1-0 Dictionary_ID_flag" - const u8 descriptor = (u8)IO_read_bits(in, 8); - - // decode frame header descriptor into flags - const u8 frame_content_size_flag = descriptor >> 6; - const u8 single_segment_flag = (descriptor >> 5) & 1; - const u8 reserved_bit = (descriptor >> 3) & 1; - const u8 content_checksum_flag = (descriptor >> 2) & 1; - const u8 dictionary_id_flag = descriptor & 3; - - if (reserved_bit != 0) { - CORRUPTION(); - } - - header->single_segment_flag = single_segment_flag; - header->content_checksum_flag = content_checksum_flag; - - // decode window size - if (!single_segment_flag) { - // "Provides guarantees on maximum back-reference distance that will be - // used within compressed data. This information is important for - // decoders to allocate enough memory. - // - // Bit numbers 7-3 2-0 - // Field name Exponent Mantissa" - u8 window_descriptor = (u8)IO_read_bits(in, 8); - u8 exponent = window_descriptor >> 3; - u8 mantissa = window_descriptor & 7; - - // Use the algorithm from the specification to compute window size - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor - size_t window_base = (size_t)1 << (10 + exponent); - size_t window_add = (window_base / 8) * mantissa; - header->window_size = window_base + window_add; - } - - // decode dictionary id if it exists - if (dictionary_id_flag) { - // "This is a variable size field, which contains the ID of the - // dictionary required to properly decode the frame. Note that this - // field is optional. When it's not present, it's up to the caller to - // make sure it uses the correct dictionary. Format is little-endian." - const int bytes_array[] = {0, 1, 2, 4}; - const int bytes = bytes_array[dictionary_id_flag]; - - header->dictionary_id = (u32)IO_read_bits(in, bytes * 8); - } else { - header->dictionary_id = 0; - } - - // decode frame content size if it exists - if (single_segment_flag || frame_content_size_flag) { - // "This is the original (uncompressed) size. This information is - // optional. The Field_Size is provided according to value of - // Frame_Content_Size_flag. The Field_Size can be equal to 0 (not - // present), 1, 2, 4 or 8 bytes. Format is little-endian." - // - // if frame_content_size_flag == 0 but single_segment_flag is set, we - // still have a 1 byte field - const int bytes_array[] = {1, 2, 4, 8}; - const int bytes = bytes_array[frame_content_size_flag]; - - header->frame_content_size = IO_read_bits(in, bytes * 8); - if (bytes == 2) { - // "When Field_Size is 2, the offset of 256 is added." - header->frame_content_size += 256; - } - } else { - header->frame_content_size = 0; - } - - if (single_segment_flag) { - // "The Window_Descriptor byte is optional. It is absent when - // Single_Segment_flag is set. In this case, the maximum back-reference - // distance is the content size itself, which can be any value from 1 to - // 2^64-1 bytes (16 EB)." - header->window_size = header->frame_content_size; - } -} - -/// Decompress the data from a frame block by block -static void decompress_data(frame_context_t *const ctx, ostream_t *const out, - istream_t *const in) { - // "A frame encapsulates one or multiple blocks. Each block can be - // compressed or not, and has a guaranteed maximum content size, which - // depends on frame parameters. Unlike frames, each block depends on - // previous blocks for proper decoding. However, each block can be - // decompressed without waiting for its successor, allowing streaming - // operations." - int last_block = 0; - do { - // "Last_Block - // - // The lowest bit signals if this block is the last one. Frame ends - // right after this block. - // - // Block_Type and Block_Size - // - // The next 2 bits represent the Block_Type, while the remaining 21 bits - // represent the Block_Size. Format is little-endian." - last_block = (int)IO_read_bits(in, 1); - const int block_type = (int)IO_read_bits(in, 2); - const size_t block_len = IO_read_bits(in, 21); - - switch (block_type) { - case 0: { - // "Raw_Block - this is an uncompressed block. Block_Size is the - // number of bytes to read and copy." - const u8 *const read_ptr = IO_get_read_ptr(in, block_len); - u8 *const write_ptr = IO_get_write_ptr(out, block_len); - - // Copy the raw data into the output - memcpy(write_ptr, read_ptr, block_len); - - ctx->current_total_output += block_len; - break; - } - case 1: { - // "RLE_Block - this is a single byte, repeated N times. In which - // case, Block_Size is the size to regenerate, while the - // "compressed" block is just 1 byte (the byte to repeat)." - const u8 *const read_ptr = IO_get_read_ptr(in, 1); - u8 *const write_ptr = IO_get_write_ptr(out, block_len); - - // Copy `block_len` copies of `read_ptr[0]` to the output - memset(write_ptr, read_ptr[0], block_len); - - ctx->current_total_output += block_len; - break; - } - case 2: { - // "Compressed_Block - this is a Zstandard compressed block, - // detailed in another section of this specification. Block_Size is - // the compressed size. - - // Create a sub-stream for the block - istream_t block_stream = IO_make_sub_istream(in, block_len); - decompress_block(ctx, out, &block_stream); - break; - } - case 3: - // "Reserved - this is not a block. This value cannot be used with - // current version of this specification." - CORRUPTION(); - break; - default: - IMPOSSIBLE(); - } - } while (!last_block); - - if (ctx->header.content_checksum_flag) { - // This program does not support checking the checksum, so skip over it - // if it's present - IO_advance_input(in, 4); - } -} -/******* END FRAME DECODING ***************************************************/ - -/******* BLOCK DECOMPRESSION **************************************************/ -static void decompress_block(frame_context_t *const ctx, ostream_t *const out, - istream_t *const in) { - // "A compressed block consists of 2 sections : - // - // Literals_Section - // Sequences_Section" - - - // Part 1: decode the literals block - u8 *literals = NULL; - const size_t literals_size = decode_literals(ctx, in, &literals); - - // Part 2: decode the sequences block - sequence_command_t *sequences = NULL; - const size_t num_sequences = - decode_sequences(ctx, in, &sequences); - - // Part 3: combine literals and sequence commands to generate output - execute_sequences(ctx, out, literals, literals_size, sequences, - num_sequences); - free(literals); - free(sequences); -} -/******* END BLOCK DECOMPRESSION **********************************************/ - -/******* LITERALS DECODING ****************************************************/ -static size_t decode_literals_simple(istream_t *const in, u8 **const literals, - const int block_type, - const int size_format); -static size_t decode_literals_compressed(frame_context_t *const ctx, - istream_t *const in, - u8 **const literals, - const int block_type, - const int size_format); -static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in); -static void fse_decode_hufweights(ostream_t *weights, istream_t *const in, - int *const num_symbs); - -static size_t decode_literals(frame_context_t *const ctx, istream_t *const in, - u8 **const literals) { - // "Literals can be stored uncompressed or compressed using Huffman prefix - // codes. When compressed, an optional tree description can be present, - // followed by 1 or 4 streams." - // - // "Literals_Section_Header - // - // Header is in charge of describing how literals are packed. It's a - // byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, using - // little-endian convention." - // - // "Literals_Block_Type - // - // This field uses 2 lowest bits of first byte, describing 4 different block - // types" - // - // size_format takes between 1 and 2 bits - int block_type = (int)IO_read_bits(in, 2); - int size_format = (int)IO_read_bits(in, 2); - - if (block_type <= 1) { - // Raw or RLE literals block - return decode_literals_simple(in, literals, block_type, - size_format); - } else { - // Huffman compressed literals - return decode_literals_compressed(ctx, in, literals, block_type, - size_format); - } -} - -/// Decodes literals blocks in raw or RLE form -static size_t decode_literals_simple(istream_t *const in, u8 **const literals, - const int block_type, - const int size_format) { - size_t size; - switch (size_format) { - // These cases are in the form ?0 - // In this case, the ? bit is actually part of the size field - case 0: - case 2: - // "Size_Format uses 1 bit. Regenerated_Size uses 5 bits (0-31)." - IO_rewind_bits(in, 1); - size = IO_read_bits(in, 5); - break; - case 1: - // "Size_Format uses 2 bits. Regenerated_Size uses 12 bits (0-4095)." - size = IO_read_bits(in, 12); - break; - case 3: - // "Size_Format uses 2 bits. Regenerated_Size uses 20 bits (0-1048575)." - size = IO_read_bits(in, 20); - break; - default: - // Size format is in range 0-3 - IMPOSSIBLE(); - } - - if (size > MAX_LITERALS_SIZE) { - CORRUPTION(); - } - - *literals = malloc(size); - if (!*literals) { - BAD_ALLOC(); - } - - switch (block_type) { - case 0: { - // "Raw_Literals_Block - Literals are stored uncompressed." - const u8 *const read_ptr = IO_get_read_ptr(in, size); - memcpy(*literals, read_ptr, size); - break; - } - case 1: { - // "RLE_Literals_Block - Literals consist of a single byte value repeated N times." - const u8 *const read_ptr = IO_get_read_ptr(in, 1); - memset(*literals, read_ptr[0], size); - break; - } - default: - IMPOSSIBLE(); - } - - return size; -} - -/// Decodes Huffman compressed literals -static size_t decode_literals_compressed(frame_context_t *const ctx, - istream_t *const in, - u8 **const literals, - const int block_type, - const int size_format) { - size_t regenerated_size, compressed_size; - // Only size_format=0 has 1 stream, so default to 4 - int num_streams = 4; - switch (size_format) { - case 0: - // "A single stream. Both Compressed_Size and Regenerated_Size use 10 - // bits (0-1023)." - num_streams = 1; - // Fall through as it has the same size format - /* fallthrough */ - case 1: - // "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits - // (0-1023)." - regenerated_size = IO_read_bits(in, 10); - compressed_size = IO_read_bits(in, 10); - break; - case 2: - // "4 streams. Both Compressed_Size and Regenerated_Size use 14 bits - // (0-16383)." - regenerated_size = IO_read_bits(in, 14); - compressed_size = IO_read_bits(in, 14); - break; - case 3: - // "4 streams. Both Compressed_Size and Regenerated_Size use 18 bits - // (0-262143)." - regenerated_size = IO_read_bits(in, 18); - compressed_size = IO_read_bits(in, 18); - break; - default: - // Impossible - IMPOSSIBLE(); - } - if (regenerated_size > MAX_LITERALS_SIZE) { - CORRUPTION(); - } - - *literals = malloc(regenerated_size); - if (!*literals) { - BAD_ALLOC(); - } - - ostream_t lit_stream = IO_make_ostream(*literals, regenerated_size); - istream_t huf_stream = IO_make_sub_istream(in, compressed_size); - - if (block_type == 2) { - // Decode the provided Huffman table - // "This section is only present when Literals_Block_Type type is - // Compressed_Literals_Block (2)." - - HUF_free_dtable(&ctx->literals_dtable); - decode_huf_table(&ctx->literals_dtable, &huf_stream); - } else { - // If the previous Huffman table is being repeated, ensure it exists - if (!ctx->literals_dtable.symbols) { - CORRUPTION(); - } - } - - size_t symbols_decoded; - if (num_streams == 1) { - symbols_decoded = HUF_decompress_1stream(&ctx->literals_dtable, &lit_stream, &huf_stream); - } else { - symbols_decoded = HUF_decompress_4stream(&ctx->literals_dtable, &lit_stream, &huf_stream); - } - - if (symbols_decoded != regenerated_size) { - CORRUPTION(); - } - - return regenerated_size; -} - -// Decode the Huffman table description -static void decode_huf_table(HUF_dtable *const dtable, istream_t *const in) { - // "All literal values from zero (included) to last present one (excluded) - // are represented by Weight with values from 0 to Max_Number_of_Bits." - - // "This is a single byte value (0-255), which describes how to decode the list of weights." - const u8 header = IO_read_bits(in, 8); - - u8 weights[HUF_MAX_SYMBS]; - memset(weights, 0, sizeof(weights)); - - int num_symbs; - - if (header >= 128) { - // "This is a direct representation, where each Weight is written - // directly as a 4 bits field (0-15). The full representation occupies - // ((Number_of_Symbols+1)/2) bytes, meaning it uses a last full byte - // even if Number_of_Symbols is odd. Number_of_Symbols = headerByte - - // 127" - num_symbs = header - 127; - const size_t bytes = (num_symbs + 1) / 2; - - const u8 *const weight_src = IO_get_read_ptr(in, bytes); - - for (int i = 0; i < num_symbs; i++) { - // "They are encoded forward, 2 - // weights to a byte with the first weight taking the top four bits - // and the second taking the bottom four (e.g. the following - // operations could be used to read the weights: Weight[0] = - // (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf), etc.)." - if (i % 2 == 0) { - weights[i] = weight_src[i / 2] >> 4; - } else { - weights[i] = weight_src[i / 2] & 0xf; - } - } - } else { - // The weights are FSE encoded, decode them before we can construct the - // table - istream_t fse_stream = IO_make_sub_istream(in, header); - ostream_t weight_stream = IO_make_ostream(weights, HUF_MAX_SYMBS); - fse_decode_hufweights(&weight_stream, &fse_stream, &num_symbs); - } - - // Construct the table using the decoded weights - HUF_init_dtable_usingweights(dtable, weights, num_symbs); -} - -static void fse_decode_hufweights(ostream_t *weights, istream_t *const in, - int *const num_symbs) { - const int MAX_ACCURACY_LOG = 7; - - FSE_dtable dtable; - - // "An FSE bitstream starts by a header, describing probabilities - // distribution. It will create a Decoding Table. For a list of Huffman - // weights, maximum accuracy is 7 bits." - FSE_decode_header(&dtable, in, MAX_ACCURACY_LOG); - - // Decode the weights - *num_symbs = FSE_decompress_interleaved2(&dtable, weights, in); - - FSE_free_dtable(&dtable); -} -/******* END LITERALS DECODING ************************************************/ - -/******* SEQUENCE DECODING ****************************************************/ -/// The combination of FSE states needed to decode sequences -typedef struct { - FSE_dtable ll_table; - FSE_dtable of_table; - FSE_dtable ml_table; - - u16 ll_state; - u16 of_state; - u16 ml_state; -} sequence_states_t; - -/// Different modes to signal to decode_seq_tables what to do -typedef enum { - seq_literal_length = 0, - seq_offset = 1, - seq_match_length = 2, -} seq_part_t; - -typedef enum { - seq_predefined = 0, - seq_rle = 1, - seq_fse = 2, - seq_repeat = 3, -} seq_mode_t; - -/// The predefined FSE distribution tables for `seq_predefined` mode -static const i16 SEQ_LITERAL_LENGTH_DEFAULT_DIST[36] = { - 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1}; -static const i16 SEQ_OFFSET_DEFAULT_DIST[29] = { - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}; -static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = { - 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; - -/// The sequence decoding baseline and number of additional bits to read/add -/// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets -static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, - 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536}; -static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - -static const u32 SEQ_MATCH_LENGTH_BASELINES[53] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, - 99, 131, 259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539}; -static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - -/// Offset decoding is simpler so we just need a maximum code value -static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52}; - -static void decompress_sequences(frame_context_t *const ctx, - istream_t *const in, - sequence_command_t *const sequences, - const size_t num_sequences); -static sequence_command_t decode_sequence(sequence_states_t *const state, - const u8 *const src, - i64 *const offset, - int lastSequence); -static void decode_seq_table(FSE_dtable *const table, istream_t *const in, - const seq_part_t type, const seq_mode_t mode); - -static size_t decode_sequences(frame_context_t *const ctx, istream_t *in, - sequence_command_t **const sequences) { - // "A compressed block is a succession of sequences . A sequence is a - // literal copy command, followed by a match copy command. A literal copy - // command specifies a length. It is the number of bytes to be copied (or - // extracted) from the literal section. A match copy command specifies an - // offset and a length. The offset gives the position to copy from, which - // can be within a previous block." - - size_t num_sequences; - - // "Number_of_Sequences - // - // This is a variable size field using between 1 and 3 bytes. Let's call its - // first byte byte0." - u8 header = IO_read_bits(in, 8); - if (header < 128) { - // "Number_of_Sequences = byte0 . Uses 1 byte." - num_sequences = header; - } else if (header < 255) { - // "Number_of_Sequences = ((byte0-128) << 8) + byte1 . Uses 2 bytes." - num_sequences = ((header - 128) << 8) + IO_read_bits(in, 8); - } else { - // "Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00 . Uses 3 bytes." - num_sequences = IO_read_bits(in, 16) + 0x7F00; - } - - if (num_sequences == 0) { - // "There are no sequences. The sequence section stops there." - *sequences = NULL; - return 0; - } - - *sequences = malloc(num_sequences * sizeof(sequence_command_t)); - if (!*sequences) { - BAD_ALLOC(); - } - - decompress_sequences(ctx, in, *sequences, num_sequences); - return num_sequences; -} - -/// Decompress the FSE encoded sequence commands -static void decompress_sequences(frame_context_t *const ctx, istream_t *in, - sequence_command_t *const sequences, - const size_t num_sequences) { - // "The Sequences_Section regroup all symbols required to decode commands. - // There are 3 symbol types : literals lengths, offsets and match lengths. - // They are encoded together, interleaved, in a single bitstream." - - // "Symbol compression modes - // - // This is a single byte, defining the compression mode of each symbol - // type." - // - // Bit number : Field name - // 7-6 : Literals_Lengths_Mode - // 5-4 : Offsets_Mode - // 3-2 : Match_Lengths_Mode - // 1-0 : Reserved - u8 compression_modes = IO_read_bits(in, 8); - - if ((compression_modes & 3) != 0) { - // Reserved bits set - CORRUPTION(); - } - - // "Following the header, up to 3 distribution tables can be described. When - // present, they are in this order : - // - // Literals lengths - // Offsets - // Match Lengths" - // Update the tables we have stored in the context - decode_seq_table(&ctx->ll_dtable, in, seq_literal_length, - (compression_modes >> 6) & 3); - - decode_seq_table(&ctx->of_dtable, in, seq_offset, - (compression_modes >> 4) & 3); - - decode_seq_table(&ctx->ml_dtable, in, seq_match_length, - (compression_modes >> 2) & 3); - - - sequence_states_t states; - - // Initialize the decoding tables - { - states.ll_table = ctx->ll_dtable; - states.of_table = ctx->of_dtable; - states.ml_table = ctx->ml_dtable; - } - - const size_t len = IO_istream_len(in); - const u8 *const src = IO_get_read_ptr(in, len); - - // "After writing the last bit containing information, the compressor writes - // a single 1-bit and then fills the byte with 0-7 0 bits of padding." - const int padding = 8 - highest_set_bit(src[len - 1]); - // The offset starts at the end because FSE streams are read backwards - i64 bit_offset = (i64)(len * 8 - (size_t)padding); - - // "The bitstream starts with initial state values, each using the required - // number of bits in their respective accuracy, decoded previously from - // their normalized distribution. - // - // It starts by Literals_Length_State, followed by Offset_State, and finally - // Match_Length_State." - FSE_init_state(&states.ll_table, &states.ll_state, src, &bit_offset); - FSE_init_state(&states.of_table, &states.of_state, src, &bit_offset); - FSE_init_state(&states.ml_table, &states.ml_state, src, &bit_offset); - - for (size_t i = 0; i < num_sequences; i++) { - // Decode sequences one by one - sequences[i] = decode_sequence(&states, src, &bit_offset, i==num_sequences-1); - } - - if (bit_offset != 0) { - CORRUPTION(); - } -} - -// Decode a single sequence and update the state -static sequence_command_t decode_sequence(sequence_states_t *const states, - const u8 *const src, - i64 *const offset, - int lastSequence) { - // "Each symbol is a code in its own context, which specifies Baseline and - // Number_of_Bits to add. Codes are FSE compressed, and interleaved with raw - // additional bits in the same bitstream." - - // Decode symbols, but don't update states - const u8 of_code = FSE_peek_symbol(&states->of_table, states->of_state); - const u8 ll_code = FSE_peek_symbol(&states->ll_table, states->ll_state); - const u8 ml_code = FSE_peek_symbol(&states->ml_table, states->ml_state); - - // Offset doesn't need a max value as it's not decoded using a table - if (ll_code > SEQ_MAX_CODES[seq_literal_length] || - ml_code > SEQ_MAX_CODES[seq_match_length]) { - CORRUPTION(); - } - - // Read the interleaved bits - sequence_command_t seq; - // "Decoding starts by reading the Number_of_Bits required to decode Offset. - // It then does the same for Match_Length, and then for Literals_Length." - seq.offset = ((u32)1 << of_code) + STREAM_read_bits(src, of_code, offset); - - seq.match_length = - SEQ_MATCH_LENGTH_BASELINES[ml_code] + - STREAM_read_bits(src, SEQ_MATCH_LENGTH_EXTRA_BITS[ml_code], offset); - - seq.literal_length = - SEQ_LITERAL_LENGTH_BASELINES[ll_code] + - STREAM_read_bits(src, SEQ_LITERAL_LENGTH_EXTRA_BITS[ll_code], offset); - - // "If it is not the last sequence in the block, the next operation is to - // update states. Using the rules pre-calculated in the decoding tables, - // Literals_Length_State is updated, followed by Match_Length_State, and - // then Offset_State." - // If the stream is complete don't read bits to update state - if (!lastSequence) { - FSE_update_state(&states->ll_table, &states->ll_state, src, offset); - FSE_update_state(&states->ml_table, &states->ml_state, src, offset); - FSE_update_state(&states->of_table, &states->of_state, src, offset); - } - - return seq; -} - -/// Given a sequence part and table mode, decode the FSE distribution -/// Errors if the mode is `seq_repeat` without a pre-existing table in `table` -static void decode_seq_table(FSE_dtable *const table, istream_t *const in, - const seq_part_t type, const seq_mode_t mode) { - // Constant arrays indexed by seq_part_t - const i16 *const default_distributions[] = {SEQ_LITERAL_LENGTH_DEFAULT_DIST, - SEQ_OFFSET_DEFAULT_DIST, - SEQ_MATCH_LENGTH_DEFAULT_DIST}; - const size_t default_distribution_lengths[] = {36, 29, 53}; - const size_t default_distribution_accuracies[] = {6, 5, 6}; - - const size_t max_accuracies[] = {9, 8, 9}; - - if (mode != seq_repeat) { - // Free old one before overwriting - FSE_free_dtable(table); - } - - switch (mode) { - case seq_predefined: { - // "Predefined_Mode : uses a predefined distribution table." - const i16 *distribution = default_distributions[type]; - const size_t symbs = default_distribution_lengths[type]; - const size_t accuracy_log = default_distribution_accuracies[type]; - - FSE_init_dtable(table, distribution, symbs, accuracy_log); - break; - } - case seq_rle: { - // "RLE_Mode : it's a single code, repeated Number_of_Sequences times." - const u8 symb = IO_get_read_ptr(in, 1)[0]; - FSE_init_dtable_rle(table, symb); - break; - } - case seq_fse: { - // "FSE_Compressed_Mode : standard FSE compression. A distribution table - // will be present " - FSE_decode_header(table, in, max_accuracies[type]); - break; - } - case seq_repeat: - // "Repeat_Mode : reuse distribution table from previous compressed - // block." - // Nothing to do here, table will be unchanged - if (!table->symbols) { - // This mode is invalid if we don't already have a table - CORRUPTION(); - } - break; - default: - // Impossible, as mode is from 0-3 - IMPOSSIBLE(); - break; - } - -} -/******* END SEQUENCE DECODING ************************************************/ - -/******* SEQUENCE EXECUTION ***************************************************/ -static void execute_sequences(frame_context_t *const ctx, ostream_t *const out, - const u8 *const literals, - const size_t literals_len, - const sequence_command_t *const sequences, - const size_t num_sequences) { - istream_t litstream = IO_make_istream(literals, literals_len); - - u64 *const offset_hist = ctx->previous_offsets; - size_t total_output = ctx->current_total_output; - - for (size_t i = 0; i < num_sequences; i++) { - const sequence_command_t seq = sequences[i]; - { - const u32 literals_size = copy_literals(seq.literal_length, &litstream, out); - total_output += literals_size; - } - - size_t const offset = compute_offset(seq, offset_hist); - - size_t const match_length = seq.match_length; - - execute_match_copy(ctx, offset, match_length, total_output, out); - - total_output += match_length; - } - - // Copy any leftover literals - { - size_t len = IO_istream_len(&litstream); - copy_literals(len, &litstream, out); - total_output += len; - } - - ctx->current_total_output = total_output; -} - -static u32 copy_literals(const size_t literal_length, istream_t *litstream, - ostream_t *const out) { - // If the sequence asks for more literals than are left, the - // sequence must be corrupted - if (literal_length > IO_istream_len(litstream)) { - CORRUPTION(); - } - - u8 *const write_ptr = IO_get_write_ptr(out, literal_length); - const u8 *const read_ptr = - IO_get_read_ptr(litstream, literal_length); - // Copy literals to output - memcpy(write_ptr, read_ptr, literal_length); - - return literal_length; -} - -static size_t compute_offset(sequence_command_t seq, u64 *const offset_hist) { - size_t offset; - // Offsets are special, we need to handle the repeat offsets - if (seq.offset <= 3) { - // "The first 3 values define a repeated offset and we will call - // them Repeated_Offset1, Repeated_Offset2, and Repeated_Offset3. - // They are sorted in recency order, with Repeated_Offset1 meaning - // 'most recent one'". - - // Use 0 indexing for the array - u32 idx = seq.offset - 1; - if (seq.literal_length == 0) { - // "There is an exception though, when current sequence's - // literals length is 0. In this case, repeated offsets are - // shifted by one, so Repeated_Offset1 becomes Repeated_Offset2, - // Repeated_Offset2 becomes Repeated_Offset3, and - // Repeated_Offset3 becomes Repeated_Offset1 - 1_byte." - idx++; - } - - if (idx == 0) { - offset = offset_hist[0]; - } else { - // If idx == 3 then literal length was 0 and the offset was 3, - // as per the exception listed above - offset = idx < 3 ? offset_hist[idx] : offset_hist[0] - 1; - - // If idx == 1 we don't need to modify offset_hist[2], since - // we're using the second-most recent code - if (idx > 1) { - offset_hist[2] = offset_hist[1]; - } - offset_hist[1] = offset_hist[0]; - offset_hist[0] = offset; - } - } else { - // When it's not a repeat offset: - // "if (Offset_Value > 3) offset = Offset_Value - 3;" - offset = seq.offset - 3; - - // Shift back history - offset_hist[2] = offset_hist[1]; - offset_hist[1] = offset_hist[0]; - offset_hist[0] = offset; - } - return offset; -} - -static void execute_match_copy(frame_context_t *const ctx, size_t offset, - size_t match_length, size_t total_output, - ostream_t *const out) { - u8 *write_ptr = IO_get_write_ptr(out, match_length); - if (total_output <= ctx->header.window_size) { - // In this case offset might go back into the dictionary - if (offset > total_output + ctx->dict_content_len) { - // The offset goes beyond even the dictionary - CORRUPTION(); - } - - if (offset > total_output) { - // "The rest of the dictionary is its content. The content act - // as a "past" in front of data to compress or decompress, so it - // can be referenced in sequence commands." - const size_t dict_copy = - MIN(offset - total_output, match_length); - const size_t dict_offset = - ctx->dict_content_len - (offset - total_output); - - memcpy(write_ptr, ctx->dict_content + dict_offset, dict_copy); - write_ptr += dict_copy; - match_length -= dict_copy; - } - } else if (offset > ctx->header.window_size) { - CORRUPTION(); - } - - // We must copy byte by byte because the match length might be larger - // than the offset - // ex: if the output so far was "abc", a command with offset=3 and - // match_length=6 would produce "abcabcabc" as the new output - for (size_t j = 0; j < match_length; j++) { - *write_ptr = *(write_ptr - offset); - write_ptr++; - } -} -/******* END SEQUENCE EXECUTION ***********************************************/ - -/******* OUTPUT SIZE COUNTING *************************************************/ -/// Get the decompressed size of an input stream so memory can be allocated in -/// advance. -/// This implementation assumes `src` points to a single ZSTD-compressed frame -size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) { - istream_t in = IO_make_istream(src, src_len); - - // get decompressed size from ZSTD frame header - { - const u32 magic_number = (u32)IO_read_bits(&in, 32); - - if (magic_number == ZSTD_MAGIC_NUMBER) { - // ZSTD frame - frame_header_t header; - parse_frame_header(&header, &in); - - if (header.frame_content_size == 0 && !header.single_segment_flag) { - // Content size not provided, we can't tell - return (size_t)-1; - } - - return header.frame_content_size; - } else { - // not a real frame or skippable frame - ERROR("ZSTD frame magic number did not match"); - } - } -} -/******* END OUTPUT SIZE COUNTING *********************************************/ - -/******* DICTIONARY PARSING ***************************************************/ -dictionary_t* create_dictionary(void) { - dictionary_t* const dict = calloc(1, sizeof(dictionary_t)); - if (!dict) { - BAD_ALLOC(); - } - return dict; -} - -/// Free an allocated dictionary -void free_dictionary(dictionary_t *const dict) { - HUF_free_dtable(&dict->literals_dtable); - FSE_free_dtable(&dict->ll_dtable); - FSE_free_dtable(&dict->of_dtable); - FSE_free_dtable(&dict->ml_dtable); - - free(dict->content); - - memset(dict, 0, sizeof(dictionary_t)); - - free(dict); -} - - -#if !defined(ZDEC_NO_DICTIONARY) -#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes") -#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src"); - -static void init_dictionary_content(dictionary_t *const dict, - istream_t *const in); - -void parse_dictionary(dictionary_t *const dict, const void *src, - size_t src_len) { - const u8 *byte_src = (const u8 *)src; - memset(dict, 0, sizeof(dictionary_t)); - if (src == NULL) { /* cannot initialize dictionary with null src */ - NULL_SRC(); - } - if (src_len < 8) { - DICT_SIZE_ERROR(); - } - - istream_t in = IO_make_istream(byte_src, src_len); - - const u32 magic_number = IO_read_bits(&in, 32); - if (magic_number != 0xEC30A437) { - // raw content dict - IO_rewind_bits(&in, 32); - init_dictionary_content(dict, &in); - return; - } - - dict->dictionary_id = IO_read_bits(&in, 32); - - // "Entropy_Tables : following the same format as the tables in compressed - // blocks. They are stored in following order : Huffman tables for literals, - // FSE table for offsets, FSE table for match lengths, and FSE table for - // literals lengths. It's finally followed by 3 offset values, populating - // recent offsets (instead of using {1,4,8}), stored in order, 4-bytes - // little-endian each, for a total of 12 bytes. Each recent offset must have - // a value < dictionary size." - decode_huf_table(&dict->literals_dtable, &in); - decode_seq_table(&dict->of_dtable, &in, seq_offset, seq_fse); - decode_seq_table(&dict->ml_dtable, &in, seq_match_length, seq_fse); - decode_seq_table(&dict->ll_dtable, &in, seq_literal_length, seq_fse); - - // Read in the previous offset history - dict->previous_offsets[0] = IO_read_bits(&in, 32); - dict->previous_offsets[1] = IO_read_bits(&in, 32); - dict->previous_offsets[2] = IO_read_bits(&in, 32); - - // Ensure the provided offsets aren't too large - // "Each recent offset must have a value < dictionary size." - for (int i = 0; i < 3; i++) { - if (dict->previous_offsets[i] > src_len) { - ERROR("Dictionary corrupted"); - } - } - - // "Content : The rest of the dictionary is its content. The content act as - // a "past" in front of data to compress or decompress, so it can be - // referenced in sequence commands." - init_dictionary_content(dict, &in); -} - -static void init_dictionary_content(dictionary_t *const dict, - istream_t *const in) { - // Copy in the content - dict->content_size = IO_istream_len(in); - dict->content = malloc(dict->content_size); - if (!dict->content) { - BAD_ALLOC(); - } - - const u8 *const content = IO_get_read_ptr(in, dict->content_size); - - memcpy(dict->content, content, dict->content_size); -} - -static void HUF_copy_dtable(HUF_dtable *const dst, - const HUF_dtable *const src) { - if (src->max_bits == 0) { - memset(dst, 0, sizeof(HUF_dtable)); - return; - } - - const size_t size = (size_t)1 << src->max_bits; - dst->max_bits = src->max_bits; - - dst->symbols = malloc(size); - dst->num_bits = malloc(size); - if (!dst->symbols || !dst->num_bits) { - BAD_ALLOC(); - } - - memcpy(dst->symbols, src->symbols, size); - memcpy(dst->num_bits, src->num_bits, size); -} - -static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) { - if (src->accuracy_log == 0) { - memset(dst, 0, sizeof(FSE_dtable)); - return; - } - - size_t size = (size_t)1 << src->accuracy_log; - dst->accuracy_log = src->accuracy_log; - - dst->symbols = malloc(size); - dst->num_bits = malloc(size); - dst->new_state_base = malloc(size * sizeof(u16)); - if (!dst->symbols || !dst->num_bits || !dst->new_state_base) { - BAD_ALLOC(); - } - - memcpy(dst->symbols, src->symbols, size); - memcpy(dst->num_bits, src->num_bits, size); - memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16)); -} - -/// A dictionary acts as initializing values for the frame context before -/// decompression, so we implement it by applying it's predetermined -/// tables and content to the context before beginning decompression -static void frame_context_apply_dict(frame_context_t *const ctx, - const dictionary_t *const dict) { - // If the content pointer is NULL then it must be an empty dict - if (!dict || !dict->content) - return; - - // If the requested dictionary_id is non-zero, the correct dictionary must - // be present - if (ctx->header.dictionary_id != 0 && - ctx->header.dictionary_id != dict->dictionary_id) { - ERROR("Wrong dictionary provided"); - } - - // Copy the dict content to the context for references during sequence - // execution - ctx->dict_content = dict->content; - ctx->dict_content_len = dict->content_size; - - // If it's a formatted dict copy the precomputed tables in so they can - // be used in the table repeat modes - if (dict->dictionary_id != 0) { - // Deep copy the entropy tables so they can be freed independently of - // the dictionary struct - HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable); - FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable); - FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable); - FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable); - - // Copy the repeated offsets - memcpy(ctx->previous_offsets, dict->previous_offsets, - sizeof(ctx->previous_offsets)); - } -} - -#else // ZDEC_NO_DICTIONARY is defined - -static void frame_context_apply_dict(frame_context_t *const ctx, - const dictionary_t *const dict) { - (void)ctx; - if (dict && dict->content) ERROR("dictionary not supported"); -} - -#endif -/******* END DICTIONARY PARSING ***********************************************/ - -/******* IO STREAM OPERATIONS *************************************************/ - -/// Reads `num` bits from a bitstream, and updates the internal offset -static inline u64 IO_read_bits(istream_t *const in, const int num_bits) { - if (num_bits > 64 || num_bits <= 0) { - ERROR("Attempt to read an invalid number of bits"); - } - - const size_t bytes = (num_bits + in->bit_offset + 7) / 8; - const size_t full_bytes = (num_bits + in->bit_offset) / 8; - if (bytes > in->len) { - INP_SIZE(); - } - - const u64 result = read_bits_LE(in->ptr, num_bits, in->bit_offset); - - in->bit_offset = (num_bits + in->bit_offset) % 8; - in->ptr += full_bytes; - in->len -= full_bytes; - - return result; -} - -/// If a non-zero number of bits have been read from the current byte, advance -/// the offset to the next byte -static inline void IO_rewind_bits(istream_t *const in, int num_bits) { - if (num_bits < 0) { - ERROR("Attempting to rewind stream by a negative number of bits"); - } - - // move the offset back by `num_bits` bits - const int new_offset = in->bit_offset - num_bits; - // determine the number of whole bytes we have to rewind, rounding up to an - // integer number (e.g. if `new_offset == -5`, `bytes == 1`) - const i64 bytes = -(new_offset - 7) / 8; - - in->ptr -= bytes; - in->len += bytes; - // make sure the resulting `bit_offset` is positive, as mod in C does not - // convert numbers from negative to positive (e.g. -22 % 8 == -6) - in->bit_offset = ((new_offset % 8) + 8) % 8; -} - -/// If the remaining bits in a byte will be unused, advance to the end of the -/// byte -static inline void IO_align_stream(istream_t *const in) { - if (in->bit_offset != 0) { - if (in->len == 0) { - INP_SIZE(); - } - in->ptr++; - in->len--; - in->bit_offset = 0; - } -} - -/// Write the given byte into the output stream -static inline void IO_write_byte(ostream_t *const out, u8 symb) { - if (out->len == 0) { - OUT_SIZE(); - } - - out->ptr[0] = symb; - out->ptr++; - out->len--; -} - -/// Returns the number of bytes left to be read in this stream. The stream must -/// be byte aligned. -static inline size_t IO_istream_len(const istream_t *const in) { - return in->len; -} - -/// Returns a pointer where `len` bytes can be read, and advances the internal -/// state. The stream must be byte aligned. -static inline const u8 *IO_get_read_ptr(istream_t *const in, size_t len) { - if (len > in->len) { - INP_SIZE(); - } - if (in->bit_offset != 0) { - ERROR("Attempting to operate on a non-byte aligned stream"); - } - const u8 *const ptr = in->ptr; - in->ptr += len; - in->len -= len; - - return ptr; -} -/// Returns a pointer to write `len` bytes to, and advances the internal state -static inline u8 *IO_get_write_ptr(ostream_t *const out, size_t len) { - if (len > out->len) { - OUT_SIZE(); - } - u8 *const ptr = out->ptr; - out->ptr += len; - out->len -= len; - - return ptr; -} - -/// Advance the inner state by `len` bytes -static inline void IO_advance_input(istream_t *const in, size_t len) { - if (len > in->len) { - INP_SIZE(); - } - if (in->bit_offset != 0) { - ERROR("Attempting to operate on a non-byte aligned stream"); - } - - in->ptr += len; - in->len -= len; -} - -/// Returns an `ostream_t` constructed from the given pointer and length -static inline ostream_t IO_make_ostream(u8 *out, size_t len) { - return (ostream_t) { out, len }; -} - -/// Returns an `istream_t` constructed from the given pointer and length -static inline istream_t IO_make_istream(const u8 *in, size_t len) { - return (istream_t) { in, len, 0 }; -} - -/// Returns an `istream_t` with the same base as `in`, and length `len` -/// Then, advance `in` to account for the consumed bytes -/// `in` must be byte aligned -static inline istream_t IO_make_sub_istream(istream_t *const in, size_t len) { - // Consume `len` bytes of the parent stream - const u8 *const ptr = IO_get_read_ptr(in, len); - - // Make a substream using the pointer to those `len` bytes - return IO_make_istream(ptr, len); -} -/******* END IO STREAM OPERATIONS *********************************************/ - -/******* BITSTREAM OPERATIONS *************************************************/ -/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits -static inline u64 read_bits_LE(const u8 *src, const int num_bits, - const size_t offset) { - if (num_bits > 64) { - ERROR("Attempt to read an invalid number of bits"); - } - - // Skip over bytes that aren't in range - src += offset / 8; - size_t bit_offset = offset % 8; - u64 res = 0; - - int shift = 0; - int left = num_bits; - while (left > 0) { - u64 mask = left >= 8 ? 0xff : (((u64)1 << left) - 1); - // Read the next byte, shift it to account for the offset, and then mask - // out the top part if we don't need all the bits - res += (((u64)*src++ >> bit_offset) & mask) << shift; - shift += 8 - bit_offset; - left -= 8 - bit_offset; - bit_offset = 0; - } - - return res; -} - -/// Read bits from the end of a HUF or FSE bitstream. `offset` is in bits, so -/// it updates `offset` to `offset - bits`, and then reads `bits` bits from -/// `src + offset`. If the offset becomes negative, the extra bits at the -/// bottom are filled in with `0` bits instead of reading from before `src`. -static inline u64 STREAM_read_bits(const u8 *const src, const int bits, - i64 *const offset) { - *offset = *offset - bits; - size_t actual_off = *offset; - size_t actual_bits = bits; - // Don't actually read bits from before the start of src, so if `*offset < - // 0` fix actual_off and actual_bits to reflect the quantity to read - if (*offset < 0) { - actual_bits += *offset; - actual_off = 0; - } - u64 res = read_bits_LE(src, actual_bits, actual_off); - - if (*offset < 0) { - // Fill in the bottom "overflowed" bits with 0's - res = -*offset >= 64 ? 0 : (res << -*offset); - } - return res; -} -/******* END BITSTREAM OPERATIONS *********************************************/ - -/******* BIT COUNTING OPERATIONS **********************************************/ -/// Returns `x`, where `2^x` is the largest power of 2 less than or equal to -/// `num`, or `-1` if `num == 0`. -static inline int highest_set_bit(const u64 num) { - for (int i = 63; i >= 0; i--) { - if (((u64)1 << i) <= num) { - return i; - } - } - return -1; -} -/******* END BIT COUNTING OPERATIONS ******************************************/ - -/******* HUFFMAN PRIMITIVES ***************************************************/ -static inline u8 HUF_decode_symbol(const HUF_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset) { - // Look up the symbol and number of bits to read - const u8 symb = dtable->symbols[*state]; - const u8 bits = dtable->num_bits[*state]; - const u16 rest = STREAM_read_bits(src, bits, offset); - // Shift `bits` bits out of the state, keeping the low order bits that - // weren't necessary to determine this symbol. Then add in the new bits - // read from the stream. - *state = ((*state << bits) + rest) & (((u16)1 << dtable->max_bits) - 1); - - return symb; -} - -static inline void HUF_init_state(const HUF_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset) { - // Read in a full `dtable->max_bits` bits to initialize the state - const u8 bits = dtable->max_bits; - *state = STREAM_read_bits(src, bits, offset); -} - -static size_t HUF_decompress_1stream(const HUF_dtable *const dtable, - ostream_t *const out, - istream_t *const in) { - const size_t len = IO_istream_len(in); - if (len == 0) { - INP_SIZE(); - } - const u8 *const src = IO_get_read_ptr(in, len); - - // "Each bitstream must be read backward, that is starting from the end down - // to the beginning. Therefore it's necessary to know the size of each - // bitstream. - // - // It's also necessary to know exactly which bit is the latest. This is - // detected by a final bit flag : the highest bit of latest byte is a - // final-bit-flag. Consequently, a last byte of 0 is not possible. And the - // final-bit-flag itself is not part of the useful bitstream. Hence, the - // last byte contains between 0 and 7 useful bits." - const int padding = 8 - highest_set_bit(src[len - 1]); - - // Offset starts at the end because HUF streams are read backwards - i64 bit_offset = len * 8 - padding; - u16 state; - - HUF_init_state(dtable, &state, src, &bit_offset); - - size_t symbols_written = 0; - while (bit_offset > -dtable->max_bits) { - // Iterate over the stream, decoding one symbol at a time - IO_write_byte(out, HUF_decode_symbol(dtable, &state, src, &bit_offset)); - symbols_written++; - } - // "The process continues up to reading the required number of symbols per - // stream. If a bitstream is not entirely and exactly consumed, hence - // reaching exactly its beginning position with all bits consumed, the - // decoding process is considered faulty." - - // When all symbols have been decoded, the final state value shouldn't have - // any data from the stream, so it should have "read" dtable->max_bits from - // before the start of `src` - // Therefore `offset`, the edge to start reading new bits at, should be - // dtable->max_bits before the start of the stream - if (bit_offset != -dtable->max_bits) { - CORRUPTION(); - } - - return symbols_written; -} - -static size_t HUF_decompress_4stream(const HUF_dtable *const dtable, - ostream_t *const out, istream_t *const in) { - // "Compressed size is provided explicitly : in the 4-streams variant, - // bitstreams are preceded by 3 unsigned little-endian 16-bits values. Each - // value represents the compressed size of one stream, in order. The last - // stream size is deducted from total compressed size and from previously - // decoded stream sizes" - const size_t csize1 = IO_read_bits(in, 16); - const size_t csize2 = IO_read_bits(in, 16); - const size_t csize3 = IO_read_bits(in, 16); - - istream_t in1 = IO_make_sub_istream(in, csize1); - istream_t in2 = IO_make_sub_istream(in, csize2); - istream_t in3 = IO_make_sub_istream(in, csize3); - istream_t in4 = IO_make_sub_istream(in, IO_istream_len(in)); - - size_t total_output = 0; - // Decode each stream independently for simplicity - // If we wanted to we could decode all 4 at the same time for speed, - // utilizing more execution units - total_output += HUF_decompress_1stream(dtable, out, &in1); - total_output += HUF_decompress_1stream(dtable, out, &in2); - total_output += HUF_decompress_1stream(dtable, out, &in3); - total_output += HUF_decompress_1stream(dtable, out, &in4); - - return total_output; -} - -/// Initializes a Huffman table using canonical Huffman codes -/// For more explanation on canonical Huffman codes see -/// https://www.cs.scranton.edu/~mccloske/courses/cmps340/huff_canonical_dec2015.html -/// Codes within a level are allocated in symbol order (i.e. smaller symbols get -/// earlier codes) -static void HUF_init_dtable(HUF_dtable *const table, const u8 *const bits, - const int num_symbs) { - memset(table, 0, sizeof(HUF_dtable)); - if (num_symbs > HUF_MAX_SYMBS) { - ERROR("Too many symbols for Huffman"); - } - - u8 max_bits = 0; - u16 rank_count[HUF_MAX_BITS + 1]; - memset(rank_count, 0, sizeof(rank_count)); - - // Count the number of symbols for each number of bits, and determine the - // depth of the tree - for (int i = 0; i < num_symbs; i++) { - if (bits[i] > HUF_MAX_BITS) { - ERROR("Huffman table depth too large"); - } - max_bits = MAX(max_bits, bits[i]); - rank_count[bits[i]]++; - } - - const size_t table_size = 1 << max_bits; - table->max_bits = max_bits; - table->symbols = malloc(table_size); - table->num_bits = malloc(table_size); - - if (!table->symbols || !table->num_bits) { - free(table->symbols); - free(table->num_bits); - BAD_ALLOC(); - } - - // "Symbols are sorted by Weight. Within same Weight, symbols keep natural - // order. Symbols with a Weight of zero are removed. Then, starting from - // lowest weight, prefix codes are distributed in order." - - u32 rank_idx[HUF_MAX_BITS + 1]; - // Initialize the starting codes for each rank (number of bits) - rank_idx[max_bits] = 0; - for (int i = max_bits; i >= 1; i--) { - rank_idx[i - 1] = rank_idx[i] + rank_count[i] * (1 << (max_bits - i)); - // The entire range takes the same number of bits so we can memset it - memset(&table->num_bits[rank_idx[i]], i, rank_idx[i - 1] - rank_idx[i]); - } - - if (rank_idx[0] != table_size) { - CORRUPTION(); - } - - // Allocate codes and fill in the table - for (int i = 0; i < num_symbs; i++) { - if (bits[i] != 0) { - // Allocate a code for this symbol and set its range in the table - const u16 code = rank_idx[bits[i]]; - // Since the code doesn't care about the bottom `max_bits - bits[i]` - // bits of state, it gets a range that spans all possible values of - // the lower bits - const u16 len = 1 << (max_bits - bits[i]); - memset(&table->symbols[code], i, len); - rank_idx[bits[i]] += len; - } - } -} - -static void HUF_init_dtable_usingweights(HUF_dtable *const table, - const u8 *const weights, - const int num_symbs) { - // +1 because the last weight is not transmitted in the header - if (num_symbs + 1 > HUF_MAX_SYMBS) { - ERROR("Too many symbols for Huffman"); - } - - u8 bits[HUF_MAX_SYMBS]; - - u64 weight_sum = 0; - for (int i = 0; i < num_symbs; i++) { - // Weights are in the same range as bit count - if (weights[i] > HUF_MAX_BITS) { - CORRUPTION(); - } - weight_sum += weights[i] > 0 ? (u64)1 << (weights[i] - 1) : 0; - } - - // Find the first power of 2 larger than the sum - const int max_bits = highest_set_bit(weight_sum) + 1; - const u64 left_over = ((u64)1 << max_bits) - weight_sum; - // If the left over isn't a power of 2, the weights are invalid - if (left_over & (left_over - 1)) { - CORRUPTION(); - } - - // left_over is used to find the last weight as it's not transmitted - // by inverting 2^(weight - 1) we can determine the value of last_weight - const int last_weight = highest_set_bit(left_over) + 1; - - for (int i = 0; i < num_symbs; i++) { - // "Number_of_Bits = Number_of_Bits ? Max_Number_of_Bits + 1 - Weight : 0" - bits[i] = weights[i] > 0 ? (max_bits + 1 - weights[i]) : 0; - } - bits[num_symbs] = - max_bits + 1 - last_weight; // Last weight is always non-zero - - HUF_init_dtable(table, bits, num_symbs + 1); -} - -static void HUF_free_dtable(HUF_dtable *const dtable) { - free(dtable->symbols); - free(dtable->num_bits); - memset(dtable, 0, sizeof(HUF_dtable)); -} -/******* END HUFFMAN PRIMITIVES ***********************************************/ - -/******* FSE PRIMITIVES *******************************************************/ -/// For more description of FSE see -/// https://github.com/Cyan4973/FiniteStateEntropy/ - -/// Allow a symbol to be decoded without updating state -static inline u8 FSE_peek_symbol(const FSE_dtable *const dtable, - const u16 state) { - return dtable->symbols[state]; -} - -/// Consumes bits from the input and uses the current state to determine the -/// next state -static inline void FSE_update_state(const FSE_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset) { - const u8 bits = dtable->num_bits[*state]; - const u16 rest = STREAM_read_bits(src, bits, offset); - *state = dtable->new_state_base[*state] + rest; -} - -/// Decodes a single FSE symbol and updates the offset -static inline u8 FSE_decode_symbol(const FSE_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset) { - const u8 symb = FSE_peek_symbol(dtable, *state); - FSE_update_state(dtable, state, src, offset); - return symb; -} - -static inline void FSE_init_state(const FSE_dtable *const dtable, - u16 *const state, const u8 *const src, - i64 *const offset) { - // Read in a full `accuracy_log` bits to initialize the state - const u8 bits = dtable->accuracy_log; - *state = STREAM_read_bits(src, bits, offset); -} - -static size_t FSE_decompress_interleaved2(const FSE_dtable *const dtable, - ostream_t *const out, - istream_t *const in) { - const size_t len = IO_istream_len(in); - if (len == 0) { - INP_SIZE(); - } - const u8 *const src = IO_get_read_ptr(in, len); - - // "Each bitstream must be read backward, that is starting from the end down - // to the beginning. Therefore it's necessary to know the size of each - // bitstream. - // - // It's also necessary to know exactly which bit is the latest. This is - // detected by a final bit flag : the highest bit of latest byte is a - // final-bit-flag. Consequently, a last byte of 0 is not possible. And the - // final-bit-flag itself is not part of the useful bitstream. Hence, the - // last byte contains between 0 and 7 useful bits." - const int padding = 8 - highest_set_bit(src[len - 1]); - i64 offset = len * 8 - padding; - - u16 state1, state2; - // "The first state (State1) encodes the even indexed symbols, and the - // second (State2) encodes the odd indexes. State1 is initialized first, and - // then State2, and they take turns decoding a single symbol and updating - // their state." - FSE_init_state(dtable, &state1, src, &offset); - FSE_init_state(dtable, &state2, src, &offset); - - // Decode until we overflow the stream - // Since we decode in reverse order, overflowing the stream is offset going - // negative - size_t symbols_written = 0; - while (1) { - // "The number of symbols to decode is determined by tracking bitStream - // overflow condition: If updating state after decoding a symbol would - // require more bits than remain in the stream, it is assumed the extra - // bits are 0. Then, the symbols for each of the final states are - // decoded and the process is complete." - IO_write_byte(out, FSE_decode_symbol(dtable, &state1, src, &offset)); - symbols_written++; - if (offset < 0) { - // There's still a symbol to decode in state2 - IO_write_byte(out, FSE_peek_symbol(dtable, state2)); - symbols_written++; - break; - } - - IO_write_byte(out, FSE_decode_symbol(dtable, &state2, src, &offset)); - symbols_written++; - if (offset < 0) { - // There's still a symbol to decode in state1 - IO_write_byte(out, FSE_peek_symbol(dtable, state1)); - symbols_written++; - break; - } - } - - return symbols_written; -} - -static void FSE_init_dtable(FSE_dtable *const dtable, - const i16 *const norm_freqs, const int num_symbs, - const int accuracy_log) { - if (accuracy_log > FSE_MAX_ACCURACY_LOG) { - ERROR("FSE accuracy too large"); - } - if (num_symbs > FSE_MAX_SYMBS) { - ERROR("Too many symbols for FSE"); - } - - dtable->accuracy_log = accuracy_log; - - const size_t size = (size_t)1 << accuracy_log; - dtable->symbols = malloc(size * sizeof(u8)); - dtable->num_bits = malloc(size * sizeof(u8)); - dtable->new_state_base = malloc(size * sizeof(u16)); - - if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) { - BAD_ALLOC(); - } - - // Used to determine how many bits need to be read for each state, - // and where the destination range should start - // Needs to be u16 because max value is 2 * max number of symbols, - // which can be larger than a byte can store - u16 state_desc[FSE_MAX_SYMBS]; - - // "Symbols are scanned in their natural order for "less than 1" - // probabilities. Symbols with this probability are being attributed a - // single cell, starting from the end of the table. These symbols define a - // full state reset, reading Accuracy_Log bits." - int high_threshold = size; - for (int s = 0; s < num_symbs; s++) { - // Scan for low probability symbols to put at the top - if (norm_freqs[s] == -1) { - dtable->symbols[--high_threshold] = s; - state_desc[s] = 1; - } - } - - // "All remaining symbols are sorted in their natural order. Starting from - // symbol 0 and table position 0, each symbol gets attributed as many cells - // as its probability. Cell allocation is spread, not linear." - // Place the rest in the table - const u16 step = (size >> 1) + (size >> 3) + 3; - const u16 mask = size - 1; - u16 pos = 0; - for (int s = 0; s < num_symbs; s++) { - if (norm_freqs[s] <= 0) { - continue; - } - - state_desc[s] = norm_freqs[s]; - - for (int i = 0; i < norm_freqs[s]; i++) { - // Give `norm_freqs[s]` states to symbol s - dtable->symbols[pos] = s; - // "A position is skipped if already occupied, typically by a "less - // than 1" probability symbol." - do { - pos = (pos + step) & mask; - } while (pos >= - high_threshold); - // Note: no other collision checking is necessary as `step` is - // coprime to `size`, so the cycle will visit each position exactly - // once - } - } - if (pos != 0) { - CORRUPTION(); - } - - // Now we can fill baseline and num bits - for (size_t i = 0; i < size; i++) { - u8 symbol = dtable->symbols[i]; - u16 next_state_desc = state_desc[symbol]++; - // Fills in the table appropriately, next_state_desc increases by symbol - // over time, decreasing number of bits - dtable->num_bits[i] = (u8)(accuracy_log - highest_set_bit(next_state_desc)); - // Baseline increases until the bit threshold is passed, at which point - // it resets to 0 - dtable->new_state_base[i] = - ((u16)next_state_desc << dtable->num_bits[i]) - size; - } -} - -/// Decode an FSE header as defined in the Zstandard format specification and -/// use the decoded frequencies to initialize a decoding table. -static void FSE_decode_header(FSE_dtable *const dtable, istream_t *const in, - const int max_accuracy_log) { - // "An FSE distribution table describes the probabilities of all symbols - // from 0 to the last present one (included) on a normalized scale of 1 << - // Accuracy_Log . - // - // It's a bitstream which is read forward, in little-endian fashion. It's - // not necessary to know its exact size, since it will be discovered and - // reported by the decoding process. - if (max_accuracy_log > FSE_MAX_ACCURACY_LOG) { - ERROR("FSE accuracy too large"); - } - - // The bitstream starts by reporting on which scale it operates. - // Accuracy_Log = low4bits + 5. Note that maximum Accuracy_Log for literal - // and match lengths is 9, and for offsets is 8. Higher values are - // considered errors." - const int accuracy_log = 5 + IO_read_bits(in, 4); - if (accuracy_log > max_accuracy_log) { - ERROR("FSE accuracy too large"); - } - - // "Then follows each symbol value, from 0 to last present one. The number - // of bits used by each field is variable. It depends on : - // - // Remaining probabilities + 1 : example : Presuming an Accuracy_Log of 8, - // and presuming 100 probabilities points have already been distributed, the - // decoder may read any value from 0 to 255 - 100 + 1 == 156 (inclusive). - // Therefore, it must read log2sup(156) == 8 bits. - // - // Value decoded : small values use 1 less bit : example : Presuming values - // from 0 to 156 (inclusive) are possible, 255-156 = 99 values are remaining - // in an 8-bits field. They are used this way : first 99 values (hence from - // 0 to 98) use only 7 bits, values from 99 to 156 use 8 bits. " - - i32 remaining = 1 << accuracy_log; - i16 frequencies[FSE_MAX_SYMBS]; - - int symb = 0; - while (remaining > 0 && symb < FSE_MAX_SYMBS) { - // Log of the number of possible values we could read - int bits = highest_set_bit(remaining + 1) + 1; - - u16 val = IO_read_bits(in, bits); - - // Try to mask out the lower bits to see if it qualifies for the "small - // value" threshold - const u16 lower_mask = ((u16)1 << (bits - 1)) - 1; - const u16 threshold = ((u16)1 << bits) - 1 - (remaining + 1); - - if ((val & lower_mask) < threshold) { - IO_rewind_bits(in, 1); - val = val & lower_mask; - } else if (val > lower_mask) { - val = val - threshold; - } - - // "Probability is obtained from Value decoded by following formula : - // Proba = value - 1" - const i16 proba = (i16)val - 1; - - // "It means value 0 becomes negative probability -1. -1 is a special - // probability, which means "less than 1". Its effect on distribution - // table is described in next paragraph. For the purpose of calculating - // cumulated distribution, it counts as one." - remaining -= proba < 0 ? -proba : proba; - - frequencies[symb] = proba; - symb++; - - // "When a symbol has a probability of zero, it is followed by a 2-bits - // repeat flag. This repeat flag tells how many probabilities of zeroes - // follow the current one. It provides a number ranging from 0 to 3. If - // it is a 3, another 2-bits repeat flag follows, and so on." - if (proba == 0) { - // Read the next two bits to see how many more 0s - int repeat = IO_read_bits(in, 2); - - while (1) { - for (int i = 0; i < repeat && symb < FSE_MAX_SYMBS; i++) { - frequencies[symb++] = 0; - } - if (repeat == 3) { - repeat = IO_read_bits(in, 2); - } else { - break; - } - } - } - } - IO_align_stream(in); - - // "When last symbol reaches cumulated total of 1 << Accuracy_Log, decoding - // is complete. If the last symbol makes cumulated total go above 1 << - // Accuracy_Log, distribution is considered corrupted." - if (remaining != 0 || symb >= FSE_MAX_SYMBS) { - CORRUPTION(); - } - - // Initialize the decoding table using the determined weights - FSE_init_dtable(dtable, frequencies, symb, accuracy_log); -} - -static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb) { - dtable->symbols = malloc(sizeof(u8)); - dtable->num_bits = malloc(sizeof(u8)); - dtable->new_state_base = malloc(sizeof(u16)); - - if (!dtable->symbols || !dtable->num_bits || !dtable->new_state_base) { - BAD_ALLOC(); - } - - // This setup will always have a state of 0, always return symbol `symb`, - // and never consume any bits - dtable->symbols[0] = symb; - dtable->num_bits[0] = 0; - dtable->new_state_base[0] = 0; - dtable->accuracy_log = 0; -} - -static void FSE_free_dtable(FSE_dtable *const dtable) { - free(dtable->symbols); - free(dtable->num_bits); - free(dtable->new_state_base); - memset(dtable, 0, sizeof(FSE_dtable)); -} -/******* END FSE PRIMITIVES ***************************************************/ diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h deleted file mode 100644 index c13c8134dee0..000000000000 --- a/doc/educational_decoder/zstd_decompress.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include <stddef.h> /* size_t */ - -/******* EXPOSED TYPES ********************************************************/ -/* -* Contains the parsed contents of a dictionary -* This includes Huffman and FSE tables used for decoding and data on offsets -*/ -typedef struct dictionary_s dictionary_t; -/******* END EXPOSED TYPES ****************************************************/ - -/******* DECOMPRESSION FUNCTIONS **********************************************/ -/// Zstandard decompression functions. -/// `dst` must point to a space at least as large as the reconstructed output. -size_t ZSTD_decompress(void *const dst, const size_t dst_len, - const void *const src, const size_t src_len); - -/// If `dict != NULL` and `dict_len >= 8`, does the same thing as -/// `ZSTD_decompress` but uses the provided dict -size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len, - const void *const src, const size_t src_len, - dictionary_t* parsed_dict); - -/// Get the decompressed size of an input stream so memory can be allocated in -/// advance -/// Returns -1 if the size can't be determined -/// Assumes decompression of a single frame -size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len); -/******* END DECOMPRESSION FUNCTIONS ******************************************/ - -/******* DICTIONARY MANAGEMENT ***********************************************/ -/* - * Return a valid dictionary_t pointer for use with dictionary initialization - * or decompression - */ -dictionary_t* create_dictionary(void); - -/* - * Parse a provided dictionary blob for use in decompression - * `src` -- must point to memory space representing the dictionary - * `src_len` -- must provide the dictionary size - * `dict` -- will contain the parsed contents of the dictionary and - * can be used for decompression - */ -void parse_dictionary(dictionary_t *const dict, const void *src, - size_t src_len); - -/* - * Free internal Huffman tables, FSE tables, and dictionary content - */ -void free_dictionary(dictionary_t *const dict); -/******* END DICTIONARY MANAGEMENT *******************************************/ diff --git a/doc/images/CSpeed2.png b/doc/images/CSpeed2.png Binary files differdeleted file mode 100644 index 42affa46ea47..000000000000 --- a/doc/images/CSpeed2.png +++ /dev/null diff --git a/doc/images/DCspeed5.png b/doc/images/DCspeed5.png Binary files differdeleted file mode 100644 index 900b0242f6d2..000000000000 --- a/doc/images/DCspeed5.png +++ /dev/null diff --git a/doc/images/DSpeed3.png b/doc/images/DSpeed3.png Binary files differdeleted file mode 100644 index 4818b11180a7..000000000000 --- a/doc/images/DSpeed3.png +++ /dev/null diff --git a/doc/images/cdict_v136.png b/doc/images/cdict_v136.png Binary files differdeleted file mode 100644 index 4a6d45620a06..000000000000 --- a/doc/images/cdict_v136.png +++ /dev/null diff --git a/doc/images/dict-cr.png b/doc/images/dict-cr.png Binary files differdeleted file mode 100644 index 6da34da46bde..000000000000 --- a/doc/images/dict-cr.png +++ /dev/null diff --git a/doc/images/dict-cs.png b/doc/images/dict-cs.png Binary files differdeleted file mode 100644 index a0d8d250565c..000000000000 --- a/doc/images/dict-cs.png +++ /dev/null diff --git a/doc/images/dict-ds.png b/doc/images/dict-ds.png Binary files differdeleted file mode 100644 index 5b9c360c9f46..000000000000 --- a/doc/images/dict-ds.png +++ /dev/null diff --git a/doc/images/zstd_cdict_v1_3_5.png b/doc/images/zstd_cdict_v1_3_5.png Binary files differdeleted file mode 100644 index cce67c83abbf..000000000000 --- a/doc/images/zstd_cdict_v1_3_5.png +++ /dev/null diff --git a/doc/images/zstd_logo86.png b/doc/images/zstd_logo86.png Binary files differdeleted file mode 100644 index 8abefe21b1e1..000000000000 --- a/doc/images/zstd_logo86.png +++ /dev/null diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md deleted file mode 100644 index a2bf20cbc762..000000000000 --- a/doc/zstd_compression_format.md +++ /dev/null @@ -1,1771 +0,0 @@ -Zstandard Compression Format -============================ - -### Notices - -Copyright (c) Meta Platforms, Inc. and affiliates. - -Permission is granted to copy and distribute this document -for any purpose and without charge, -including translations into other languages -and incorporation into compilations, -provided that the copyright notice and this notice are preserved, -and that any substantive changes or deletions from the original -are clearly marked. -Distribution of this document is unlimited. - -### Version - -0.4.3 (2024-10-07) - - -Introduction ------------- - -The purpose of this document is to define a lossless compressed data format, -that is independent of CPU type, operating system, -file system and character set, suitable for -file compression, pipe and streaming compression, -using the [Zstandard algorithm](https://facebook.github.io/zstd/). -The text of the specification assumes a basic background in programming -at the level of bits and other primitive data representations. - -The data can be produced or consumed, -even for an arbitrarily long sequentially presented input data stream, -using only an a priori bounded amount of intermediate storage, -and hence can be used in data communications. -The format uses the Zstandard compression method, -and optional [xxHash-64 checksum method](https://cyan4973.github.io/xxHash/), -for detection of data corruption. - -The data format defined by this specification -does not attempt to allow random access to compressed data. - -Unless otherwise indicated below, -a compliant compressor must produce data sets -that conform to the specifications presented here. -It doesn’t need to support all options though. - -A compliant decompressor must be able to decompress -at least one working set of parameters -that conforms to the specifications presented here. -It may also ignore informative fields, such as checksum. -Whenever it does not support a parameter defined in the compressed stream, -it must produce a non-ambiguous error code and associated error message -explaining which parameter is unsupported. - -This specification is intended for use by implementers of software -to compress data into Zstandard format and/or decompress data from Zstandard format. -The Zstandard format is supported by an open source reference implementation, -written in portable C, and available at : https://github.com/facebook/zstd . - - -### Overall conventions -In this document: -- square brackets i.e. `[` and `]` are used to indicate optional fields or parameters. -- the naming convention for identifiers is `Mixed_Case_With_Underscores` - -### Definitions -Content compressed by Zstandard is transformed into a Zstandard __frame__. -Multiple frames can be appended into a single file or stream. -A frame is completely independent, has a defined beginning and end, -and a set of parameters which tells the decoder how to decompress it. - -A frame encapsulates one or multiple __blocks__. -Each block contains arbitrary content, which is described by its header, -and has a guaranteed maximum content size, which depends on frame parameters. -Unlike frames, each block depends on previous blocks for proper decoding. -However, each block can be decompressed without waiting for its successor, -allowing streaming operations. - -Overview ---------- -- [Frames](#frames) - - [Zstandard frames](#zstandard-frames) - - [Blocks](#blocks) - - [Literals Section](#literals-section) - - [Sequences Section](#sequences-section) - - [Sequence Execution](#sequence-execution) - - [Skippable frames](#skippable-frames) -- [Entropy Encoding](#entropy-encoding) - - [FSE](#fse) - - [Huffman Coding](#huffman-coding) -- [Dictionary Format](#dictionary-format) - -Frames ------- -Zstandard compressed data is made of one or more __frames__. -Each frame is independent and can be decompressed independently of other frames. -The decompressed content of multiple concatenated frames is the concatenation of -each frame decompressed content. - -There are two frame formats defined by Zstandard: - Zstandard frames and Skippable frames. -Zstandard frames contain compressed data, while -skippable frames contain custom user metadata. - -## Zstandard frames -The structure of a single Zstandard frame is following: - -| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] | -|:--------------:|:--------------:|:----------:| ------------------ |:--------------------:| -| 4 bytes | 2-14 bytes | n bytes | | 0-4 bytes | - -__`Magic_Number`__ - -4 Bytes, __little-endian__ format. -Value : 0xFD2FB528 -Note: This value was selected to be less probable to find at the beginning of some random file. -It avoids trivial patterns (0x00, 0xFF, repeated bytes, increasing bytes, etc.), -contains byte values outside of ASCII range, -and doesn't map into UTF8 space. -It reduces the chances that a text file represent this value by accident. - -__`Frame_Header`__ - -2 to 14 Bytes, detailed in [`Frame_Header`](#frame_header). - -__`Data_Block`__ - -Detailed in [`Blocks`](#blocks). -That’s where compressed data is stored. - -__`Content_Checksum`__ - -An optional 32-bit checksum, only present if `Content_Checksum_flag` is set. -The content checksum is the result -of [xxh64() hash function](https://cyan4973.github.io/xxHash/) -digesting the original (decoded) data as input, and a seed of zero. -The low 4 bytes of the checksum are stored in __little-endian__ format. - -### `Frame_Header` - -The `Frame_Header` has a variable size, with a minimum of 2 bytes, -and up to 14 bytes depending on optional parameters. -The structure of `Frame_Header` is following: - -| `Frame_Header_Descriptor` | [`Window_Descriptor`] | [`Dictionary_ID`] | [`Frame_Content_Size`] | -| ------------------------- | --------------------- | ----------------- | ---------------------- | -| 1 byte | 0-1 byte | 0-4 bytes | 0-8 bytes | - -#### `Frame_Header_Descriptor` - -The first header's byte is called the `Frame_Header_Descriptor`. -It describes which other fields are present. -Decoding this byte is enough to tell the size of `Frame_Header`. - -| Bit number | Field name | -| ---------- | ---------- | -| 7-6 | `Frame_Content_Size_flag` | -| 5 | `Single_Segment_flag` | -| 4 | `Unused_bit` | -| 3 | `Reserved_bit` | -| 2 | `Content_Checksum_flag` | -| 1-0 | `Dictionary_ID_flag` | - -In this table, bit 7 is the highest bit, while bit 0 is the lowest one. - -__`Frame_Content_Size_flag`__ - -This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`), -specifying if `Frame_Content_Size` (the decompressed data size) -is provided within the header. -`Flag_Value` provides `FCS_Field_Size`, -which is the number of bytes used by `Frame_Content_Size` -according to the following table: - -| `Flag_Value` | 0 | 1 | 2 | 3 | -| -------------- | ------ | --- | --- | --- | -|`FCS_Field_Size`| 0 or 1 | 2 | 4 | 8 | - -When `Flag_Value` is `0`, `FCS_Field_Size` depends on `Single_Segment_flag` : -if `Single_Segment_flag` is set, `FCS_Field_Size` is 1. -Otherwise, `FCS_Field_Size` is 0 : `Frame_Content_Size` is not provided. - -__`Single_Segment_flag`__ - -If this flag is set, -data must be regenerated within a single continuous memory segment. - -In this case, `Window_Descriptor` byte is skipped, -but `Frame_Content_Size` is necessarily present. -As a consequence, the decoder must allocate a memory segment -of size equal or larger than `Frame_Content_Size`. - -In order to preserve the decoder from unreasonable memory requirements, -a decoder is allowed to reject a compressed frame -which requests a memory size beyond decoder's authorized range. - -For broader compatibility, decoders are recommended to support -memory sizes of at least 8 MB. -This is only a recommendation, -each decoder is free to support higher or lower limits, -depending on local limitations. - -__`Unused_bit`__ - -A decoder compliant with this specification version shall not interpret this bit. -It might be used in any future version, -to signal a property which is transparent to properly decode the frame. -An encoder compliant with this specification version must set this bit to zero. - -__`Reserved_bit`__ - -This bit is reserved for some future feature. -Its value _must be zero_. -A decoder compliant with this specification version must ensure it is not set. -This bit may be used in a future revision, -to signal a feature that must be interpreted to decode the frame correctly. - -__`Content_Checksum_flag`__ - -If this flag is set, a 32-bits `Content_Checksum` will be present at frame's end. -See `Content_Checksum` paragraph. - -__`Dictionary_ID_flag`__ - -This is a 2-bits flag (`= FHD & 3`), -telling if a dictionary ID is provided within the header. -It also specifies the size of this field as `DID_Field_Size`. - -|`Flag_Value` | 0 | 1 | 2 | 3 | -| -------------- | --- | --- | --- | --- | -|`DID_Field_Size`| 0 | 1 | 2 | 4 | - -#### `Window_Descriptor` - -Provides guarantees on minimum memory buffer required to decompress a frame. -This information is important for decoders to allocate enough memory. - -The `Window_Descriptor` byte is optional. -When `Single_Segment_flag` is set, `Window_Descriptor` is not present. -In this case, `Window_Size` is `Frame_Content_Size`, -which can be any value from 0 to 2^64-1 bytes (16 ExaBytes). - -| Bit numbers | 7-3 | 2-0 | -| ----------- | ---------- | ---------- | -| Field name | `Exponent` | `Mantissa` | - -The minimum memory buffer size is called `Window_Size`. -It is described by the following formulas : -``` -windowLog = 10 + Exponent; -windowBase = 1 << windowLog; -windowAdd = (windowBase / 8) * Mantissa; -Window_Size = windowBase + windowAdd; -``` -The minimum `Window_Size` is 1 KB. -The maximum `Window_Size` is `(1<<41) + 7*(1<<38)` bytes, which is 3.75 TB. - -In general, larger `Window_Size` tend to improve compression ratio, -but at the cost of memory usage. - -To properly decode compressed data, -a decoder will need to allocate a buffer of at least `Window_Size` bytes. - -In order to preserve decoder from unreasonable memory requirements, -a decoder is allowed to reject a compressed frame -which requests a memory size beyond decoder's authorized range. - -For improved interoperability, -it's recommended for decoders to support `Window_Size` of up to 8 MB, -and it's recommended for encoders to not generate frame requiring `Window_Size` larger than 8 MB. -It's merely a recommendation though, -decoders are free to support larger or lower limits, -depending on local limitations. - -#### `Dictionary_ID` - -This is a variable size field, which contains -the ID of the dictionary required to properly decode the frame. -`Dictionary_ID` field is optional. When it's not present, -it's up to the decoder to know which dictionary to use. - -`Dictionary_ID` field size is provided by `DID_Field_Size`. -`DID_Field_Size` is directly derived from value of `Dictionary_ID_flag`. -1 byte can represent an ID 0-255. -2 bytes can represent an ID 0-65535. -4 bytes can represent an ID 0-4294967295. -Format is __little-endian__. - -It's allowed to represent a small ID (for example `13`) -with a large 4-bytes dictionary ID, even if it is less efficient. - -A value of `0` has same meaning as no `Dictionary_ID`, -in which case the frame may or may not need a dictionary to be decoded, -and the ID of such a dictionary is not specified. -The decoder must know this information by other means. - -#### `Frame_Content_Size` - -This is the original (uncompressed) size. This information is optional. -`Frame_Content_Size` uses a variable number of bytes, provided by `FCS_Field_Size`. -`FCS_Field_Size` is provided by the value of `Frame_Content_Size_flag`. -`FCS_Field_Size` can be equal to 0 (not present), 1, 2, 4 or 8 bytes. - -| `FCS_Field_Size` | Range | -| ---------------- | ---------- | -| 0 | unknown | -| 1 | 0 - 255 | -| 2 | 256 - 65791| -| 4 | 0 - 2^32-1 | -| 8 | 0 - 2^64-1 | - -`Frame_Content_Size` format is __little-endian__. -When `FCS_Field_Size` is 1, 4 or 8 bytes, the value is read directly. -When `FCS_Field_Size` is 2, _the offset of 256 is added_. -It's allowed to represent a small size (for example `18`) using any compatible variant. - - -Blocks -------- - -After `Magic_Number` and `Frame_Header`, there are some number of blocks. -Each frame must have at least one block, -but there is no upper limit on the number of blocks per frame. - -The structure of a block is as follows: - -| `Block_Header` | `Block_Content` | -|:--------------:|:---------------:| -| 3 bytes | n bytes | - -__`Block_Header`__ - -`Block_Header` uses 3 bytes, written using __little-endian__ convention. -It contains 3 fields : - -| `Last_Block` | `Block_Type` | `Block_Size` | -|:------------:|:------------:|:------------:| -| bit 0 | bits 1-2 | bits 3-23 | - -__`Last_Block`__ - -The lowest bit signals if this block is the last one. -The frame will end after this last block. -It may be followed by an optional `Content_Checksum` -(see [Zstandard Frames](#zstandard-frames)). - -__`Block_Type`__ - -The next 2 bits represent the `Block_Type`. -`Block_Type` influences the meaning of `Block_Size`. -There are 4 block types : - -| Value | 0 | 1 | 2 | 3 | -| ------------ | ----------- | ----------- | ------------------ | --------- | -| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`| - -- `Raw_Block` - this is an uncompressed block. - `Block_Content` contains `Block_Size` bytes. - -- `RLE_Block` - this is a single byte, repeated `Block_Size` times. - `Block_Content` consists of a single byte. - On the decompression side, this byte must be repeated `Block_Size` times. - -- `Compressed_Block` - this is a [Zstandard compressed block](#compressed-blocks), - explained later on. - `Block_Size` is the length of `Block_Content`, the compressed data. - The decompressed size is not known, - but its maximum possible value is guaranteed (see below) - -- `Reserved` - this is not a block. - This value cannot be used with current version of this specification. - If such a value is present, it is considered corrupted data. - -__`Block_Size`__ - -The upper 21 bits of `Block_Header` represent the `Block_Size`. - -When `Block_Type` is `Compressed_Block` or `Raw_Block`, -`Block_Size` is the size of `Block_Content` (hence excluding `Block_Header`). - -When `Block_Type` is `RLE_Block`, since `Block_Content`’s size is always 1, -`Block_Size` represents the number of times this byte must be repeated. - -`Block_Size` is limited by `Block_Maximum_Size` (see below). - -__`Block_Content`__ and __`Block_Maximum_Size`__ - -The size of `Block_Content` is limited by `Block_Maximum_Size`, -which is the smallest of: -- `Window_Size` -- 128 KB - -`Block_Maximum_Size` is constant for a given frame. -This maximum is applicable to both the decompressed size -and the compressed size of any block in the frame. - -The reasoning for this limit is that a decoder can read this information -at the beginning of a frame and use it to allocate buffers. -The guarantees on the size of blocks ensure that -the buffers will be large enough for any following block of the valid frame. - - -Compressed Blocks ------------------ -To decompress a compressed block, the compressed size must be provided -from `Block_Size` field within `Block_Header`. - -A compressed block consists of 2 sections : -- [Literals Section](#literals-section) -- [Sequences Section](#sequences-section) - -The results of the two sections are then combined to produce the decompressed -data in [Sequence Execution](#sequence-execution) - -#### Prerequisites -To decode a compressed block, the following elements are necessary : -- Previous decoded data, up to a distance of `Window_Size`, - or beginning of the Frame, whichever is smaller. -- List of "recent offsets" from previous `Compressed_Block`. -- The previous Huffman tree, required by `Treeless_Literals_Block` type -- Previous FSE decoding tables, required by `Repeat_Mode` - for each symbol type (literals lengths, match lengths, offsets) - -Note that decoding tables aren't always from the previous `Compressed_Block`. - -- Every decoding table can come from a dictionary. -- The Huffman tree comes from the previous `Compressed_Literals_Block`. - -Literals Section ----------------- -All literals are regrouped in the first part of the block. -They can be decoded first, and then copied during [Sequence Execution], -or they can be decoded on the flow during [Sequence Execution]. - -Literals can be stored uncompressed or compressed using Huffman prefix codes. -When compressed, a tree description may optionally be present, -followed by 1 or 4 streams. - -| `Literals_Section_Header` | [`Huffman_Tree_Description`] | [jumpTable] | Stream1 | [Stream2] | [Stream3] | [Stream4] | -| ------------------------- | ---------------------------- | ----------- | ------- | --------- | --------- | --------- | - - -### `Literals_Section_Header` - -Header is in charge of describing how literals are packed. -It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, -using __little-endian__ convention. - -| `Literals_Block_Type` | `Size_Format` | `Regenerated_Size` | [`Compressed_Size`] | -| --------------------- | ------------- | ------------------ | ------------------- | -| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits | - -In this representation, bits on the left are the lowest bits. - -__`Literals_Block_Type`__ - -This field uses 2 lowest bits of first byte, describing 4 different block types : - -| `Literals_Block_Type` | Value | -| --------------------------- | ----- | -| `Raw_Literals_Block` | 0 | -| `RLE_Literals_Block` | 1 | -| `Compressed_Literals_Block` | 2 | -| `Treeless_Literals_Block` | 3 | - -- `Raw_Literals_Block` - Literals are stored uncompressed. -- `RLE_Literals_Block` - Literals consist of a single byte value - repeated `Regenerated_Size` times. -- `Compressed_Literals_Block` - This is a standard Huffman-compressed block, - starting with a Huffman tree description. - In this mode, there are at least 2 different literals represented in the Huffman tree description. - See details below. -- `Treeless_Literals_Block` - This is a Huffman-compressed block, - using Huffman tree _from previous Huffman-compressed literals block_. - `Huffman_Tree_Description` will be skipped. - Note: If this mode is triggered without any previous Huffman-table in the frame - (or [dictionary](#dictionary-format)), this should be treated as data corruption. - -__`Size_Format`__ - -`Size_Format` is divided into 2 families : - -- For `Raw_Literals_Block` and `RLE_Literals_Block`, - it's only necessary to decode `Regenerated_Size`. - There is no `Compressed_Size` field. -- For `Compressed_Block` and `Treeless_Literals_Block`, - it's required to decode both `Compressed_Size` - and `Regenerated_Size` (the decompressed size). - It's also necessary to decode the number of streams (1 or 4). - -For values spanning several bytes, convention is __little-endian__. - -__`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ : - -`Size_Format` uses 1 _or_ 2 bits. -Its value is : `Size_Format = (Literals_Section_Header[0]>>2) & 3` - -- `Size_Format` == 00 or 10 : `Size_Format` uses 1 bit. - `Regenerated_Size` uses 5 bits (0-31). - `Literals_Section_Header` uses 1 byte. - `Regenerated_Size = Literals_Section_Header[0]>>3` -- `Size_Format` == 01 : `Size_Format` uses 2 bits. - `Regenerated_Size` uses 12 bits (0-4095). - `Literals_Section_Header` uses 2 bytes. - `Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4)` -- `Size_Format` == 11 : `Size_Format` uses 2 bits. - `Regenerated_Size` uses 20 bits (0-1048575). - `Literals_Section_Header` uses 3 bytes. - `Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)` - -Only Stream1 is present for these cases. -Note : it's allowed to represent a short value (for example `27`) -using a long format, even if it's less efficient. - -__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ : - -`Size_Format` always uses 2 bits. - -- `Size_Format` == 00 : _A single stream_. - Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023). - `Literals_Section_Header` uses 3 bytes. -- `Size_Format` == 01 : 4 streams. - Both `Regenerated_Size` and `Compressed_Size` use 10 bits (6-1023). - `Literals_Section_Header` uses 3 bytes. -- `Size_Format` == 10 : 4 streams. - Both `Regenerated_Size` and `Compressed_Size` use 14 bits (6-16383). - `Literals_Section_Header` uses 4 bytes. -- `Size_Format` == 11 : 4 streams. - Both `Regenerated_Size` and `Compressed_Size` use 18 bits (6-262143). - `Literals_Section_Header` uses 5 bytes. - -Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention. -Note: `Compressed_Size` __includes__ the size of the Huffman Tree description -_when_ it is present. -Note 2: `Compressed_Size` can never be `==0`. -Even in single-stream scenario, assuming an empty content, it must be `>=1`, -since it contains at least the final end bit flag. -In 4-streams scenario, a valid `Compressed_Size` is necessarily `>= 10` -(6 bytes for the jump table, + 4x1 bytes for the 4 streams). - -4 streams is faster than 1 stream in decompression speed, -by exploiting instruction level parallelism. -But it's also more expensive, -costing on average ~7.3 bytes more than the 1 stream mode, mostly from the jump table. - -In general, use the 4 streams mode when there are more literals to decode, -to favor higher decompression speeds. -Note that beyond >1KB of literals, the 4 streams mode is compulsory. - -Note that a minimum of 6 bytes is required for the 4 streams mode. -That's a technical minimum, but it's not recommended to employ the 4 streams mode -for such a small quantity, that would be wasteful. -A more practical lower bound would be around ~256 bytes. - -#### Raw Literals Block -The data in Stream1 is `Regenerated_Size` bytes long, -it contains the raw literals data to be used during [Sequence Execution]. - -#### RLE Literals Block -Stream1 consists of a single byte which should be repeated `Regenerated_Size` times -to generate the decoded literals. - -#### Compressed Literals Block and Treeless Literals Block -Both of these modes contain Huffman encoded data. - -For `Treeless_Literals_Block`, -the Huffman table comes from previously compressed literals block, -or from a dictionary. - - -### `Huffman_Tree_Description` -This section is only present when `Literals_Block_Type` type is `Compressed_Literals_Block` (`2`). -The tree describes the weights of all literals symbols that can be present in the literals block, at least 2 and up to 256. -The format of the Huffman tree description can be found at [Huffman Tree description](#huffman-tree-description). -The size of `Huffman_Tree_Description` is determined during decoding process, -it must be used to determine where streams begin. -`Total_Streams_Size = Compressed_Size - Huffman_Tree_Description_Size`. - - -### Jump Table -The Jump Table is only present when there are 4 Huffman-coded streams. - -Reminder : Huffman compressed data consists of either 1 or 4 streams. - -If only one stream is present, it is a single bitstream occupying the entire -remaining portion of the literals block, encoded as described in -[Huffman-Coded Streams](#huffman-coded-streams). - -If there are four streams, `Literals_Section_Header` only provided -enough information to know the decompressed and compressed sizes -of all four streams _combined_. -The decompressed size of _each_ stream is equal to `(Regenerated_Size+3)/4`, -except for the last stream which may be up to 3 bytes smaller, -to reach a total decompressed size as specified in `Regenerated_Size`. - -The compressed size of each stream is provided explicitly in the Jump Table. -Jump Table is 6 bytes long, and consists of three 2-byte __little-endian__ fields, -describing the compressed sizes of the first three streams. -`Stream4_Size` is computed from `Total_Streams_Size` minus sizes of other streams: - -`Stream4_Size = Total_Streams_Size - 6 - Stream1_Size - Stream2_Size - Stream3_Size`. - -`Stream4_Size` is necessarily `>= 1`. Therefore, -if `Total_Streams_Size < Stream1_Size + Stream2_Size + Stream3_Size + 6 + 1`, -data is considered corrupted. - -Each of these 4 bitstreams is then decoded independently as a Huffman-Coded stream, -as described in [Huffman-Coded Streams](#huffman-coded-streams) - - -Sequences Section ------------------ -A compressed block is a succession of _sequences_ . -A sequence is a literal copy command, followed by a match copy command. -A literal copy command specifies a length. -It is the number of bytes to be copied (or extracted) from the Literals Section. -A match copy command specifies an offset and a length. - -When all _sequences_ are decoded, -if there are literals left in the _literals section_, -these bytes are added at the end of the block. - -This is described in more detail in [Sequence Execution](#sequence-execution). - -The `Sequences_Section` regroup all symbols required to decode commands. -There are 3 symbol types : literals lengths, offsets and match lengths. -They are encoded together, interleaved, in a single _bitstream_. - -The `Sequences_Section` starts by a header, -followed by optional probability tables for each symbol type, -followed by the bitstream. - -| `Sequences_Section_Header` | [`Literals_Length_Table`] | [`Offset_Table`] | [`Match_Length_Table`] | bitStream | -| -------------------------- | ------------------------- | ---------------- | ---------------------- | --------- | - -To decode the `Sequences_Section`, it's required to know its size. -Its size is deduced from the size of `Literals_Section`: -`Sequences_Section_Size = Block_Size - Literals_Section_Size`. - - -#### `Sequences_Section_Header` - -Consists of 2 items: -- `Number_of_Sequences` -- Symbol compression modes - -__`Number_of_Sequences`__ - -This is a variable size field using between 1 and 3 bytes. -Let's call its first byte `byte0`. -- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte. -- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0 - 0x80) << 8) + byte1`. Uses 2 bytes. - Note that the 2 bytes format fully overlaps the 1 byte format. -- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00`. Uses 3 bytes. - -`if (Number_of_Sequences == 0)` : there are no sequences. - The sequence section stops immediately, - FSE tables used in `Repeat_Mode` aren't updated. - Block's decompressed content is defined solely by the Literals Section content. - -__Symbol compression modes__ - -This is a single byte, defining the compression mode of each symbol type. - -|Bit number| 7-6 | 5-4 | 3-2 | 1-0 | -| -------- | ----------------------- | -------------- | -------------------- | ---------- | -|Field name| `Literals_Lengths_Mode` | `Offsets_Mode` | `Match_Lengths_Mode` | `Reserved` | - -The last field, `Reserved`, must be all-zeroes. - -`Literals_Lengths_Mode`, `Offsets_Mode` and `Match_Lengths_Mode` define the `Compression_Mode` of -literals lengths, offsets, and match lengths symbols respectively. - -They follow the same enumeration : - -| Value | 0 | 1 | 2 | 3 | -| ------------------ | ----------------- | ---------- | --------------------- | ------------- | -| `Compression_Mode` | `Predefined_Mode` | `RLE_Mode` | `FSE_Compressed_Mode` | `Repeat_Mode` | - -- `Predefined_Mode` : A predefined FSE distribution table is used, defined in - [default distributions](#default-distributions). - No distribution table will be present. -- `RLE_Mode` : The table description consists of a single byte, which contains the symbol's value. - This symbol will be used for all sequences. -- `FSE_Compressed_Mode` : standard FSE compression. - A distribution table will be present. - The format of this distribution table is described in [FSE Table Description](#fse-table-description). - Note that the maximum allowed accuracy log for literals length and match length tables is 9, - and the maximum accuracy log for the offsets table is 8. - `FSE_Compressed_Mode` must not be used when only one symbol is present, - `RLE_Mode` should be used instead (although any other mode will work). -- `Repeat_Mode` : The table used in the previous `Compressed_Block` with `Number_of_Sequences > 0` will be used again, - or if this is the first block, table in the dictionary will be used. - Note that this includes `RLE_mode`, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated. - It also includes `Predefined_Mode`, in which case `Repeat_Mode` will have same outcome as `Predefined_Mode`. - No distribution table will be present. - If this mode is used without any previous sequence table in the frame - (nor [dictionary](#dictionary-format)) to repeat, this should be treated as corruption. - -#### The codes for literals lengths, match lengths, and offsets. - -Each symbol is a _code_ in its own context, -which specifies `Baseline` and `Number_of_Bits` to add. -_Codes_ are FSE compressed, -and interleaved with raw additional bits in the same bitstream. - -##### Literals length codes - -Literals length codes are values ranging from `0` to `35` included. -They define lengths from 0 to 131071 bytes. -The literals length is equal to the decoded `Baseline` plus -the result of reading `Number_of_Bits` bits from the bitstream, -as a __little-endian__ value. - -| `Literals_Length_Code` | 0-15 | -| ---------------------- | ---------------------- | -| length | `Literals_Length_Code` | -| `Number_of_Bits` | 0 | - -| `Literals_Length_Code` | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -| ---------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 16 | 18 | 20 | 22 | 24 | 28 | 32 | 40 | -| `Number_of_Bits` | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 3 | - -| `Literals_Length_Code` | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -| ---------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 48 | 64 | 128 | 256 | 512 | 1024 | 2048 | 4096 | -| `Number_of_Bits` | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | - -| `Literals_Length_Code` | 32 | 33 | 34 | 35 | -| ---------------------- | ---- | ---- | ---- | ---- | -| `Baseline` | 8192 |16384 |32768 |65536 | -| `Number_of_Bits` | 13 | 14 | 15 | 16 | - - -##### Match length codes - -Match length codes are values ranging from `0` to `52` included. -They define lengths from 3 to 131074 bytes. -The match length is equal to the decoded `Baseline` plus -the result of reading `Number_of_Bits` bits from the bitstream, -as a __little-endian__ value. - -| `Match_Length_Code` | 0-31 | -| ------------------- | ----------------------- | -| value | `Match_Length_Code` + 3 | -| `Number_of_Bits` | 0 | - -| `Match_Length_Code` | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -| ------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 35 | 37 | 39 | 41 | 43 | 47 | 51 | 59 | -| `Number_of_Bits` | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 3 | - -| `Match_Length_Code` | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -| ------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 67 | 83 | 99 | 131 | 259 | 515 | 1027 | 2051 | -| `Number_of_Bits` | 4 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | - -| `Match_Length_Code` | 48 | 49 | 50 | 51 | 52 | -| ------------------- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 4099 | 8195 |16387 |32771 |65539 | -| `Number_of_Bits` | 12 | 13 | 14 | 15 | 16 | - -##### Offset codes - -Offset codes are values ranging from `0` to `N`. - -A decoder is free to limit its maximum `N` supported. -Recommendation is to support at least up to `22`. -For information, at the time of this writing. -the reference decoder supports a maximum `N` value of `31`. - -An offset code is also the number of additional bits to read in __little-endian__ fashion, -and can be translated into an `Offset_Value` using the following formulas : - -``` -Offset_Value = (1 << offsetCode) + readNBits(offsetCode); -if (Offset_Value > 3) offset = Offset_Value - 3; -``` -It means that maximum `Offset_Value` is `(2^(N+1))-1` -supporting back-reference distances up to `(2^(N+1))-4`, -but is limited by [maximum back-reference distance](#window_descriptor). - -`Offset_Value` from 1 to 3 are special : they define "repeat codes". -This is described in more detail in [Repeat Offsets](#repeat-offsets). - -#### Decoding Sequences -FSE bitstreams are read in reverse direction than written. In zstd, -the compressor writes bits forward into a block and the decompressor -must read the bitstream _backwards_. - -To find the start of the bitstream it is therefore necessary to -know the offset of the last byte of the block which can be found -by counting `Block_Size` bytes after the block header. - -After writing the last bit containing information, the compressor -writes a single `1`-bit and then fills the byte with 0-7 `0` bits of -padding. The last byte of the compressed bitstream cannot be `0` for -that reason. - -When decompressing, the last byte containing the padding is the first -byte to read. The decompressor needs to skip 0-7 initial `0`-bits and -the first `1`-bit it occurs. Afterwards, the useful part of the bitstream -begins. - -FSE decoding requires a 'state' to be carried from symbol to symbol. -For more explanation on FSE decoding, see the [FSE section](#fse). - -For sequence decoding, a separate state keeps track of each -literal lengths, offsets, and match lengths symbols. -Some FSE primitives are also used. -For more details on the operation of these primitives, see the [FSE section](#fse). - -##### Starting states -The bitstream starts with initial FSE state values, -each using the required number of bits in their respective _accuracy_, -decoded previously from their normalized distribution. - -It starts by `Literals_Length_State`, -followed by `Offset_State`, -and finally `Match_Length_State`. - -Reminder : always keep in mind that all values are read _backward_, -so the 'start' of the bitstream is at the highest position in memory, -immediately before the last `1`-bit for padding. - -After decoding the starting states, a single sequence is decoded -`Number_Of_Sequences` times. -These sequences are decoded in order from first to last. -Since the compressor writes the bitstream in the forward direction, -this means the compressor must encode the sequences starting with the last -one and ending with the first. - -##### Decoding a sequence -For each of the symbol types, the FSE state can be used to determine the appropriate code. -The code then defines the `Baseline` and `Number_of_Bits` to read for each type. -See the [description of the codes] for how to determine these values. - -[description of the codes]: #the-codes-for-literals-lengths-match-lengths-and-offsets - -Decoding starts by reading the `Number_of_Bits` required to decode `Offset`. -It then does the same for `Match_Length`, and then for `Literals_Length`. -This sequence is then used for [sequence execution](#sequence-execution). - -If it is not the last sequence in the block, -the next operation is to update states. -Using the rules pre-calculated in the decoding tables, -`Literals_Length_State` is updated, -followed by `Match_Length_State`, -and then `Offset_State`. -See the [FSE section](#fse) for details on how to update states from the bitstream. - -This operation will be repeated `Number_of_Sequences` times. -At the end, the bitstream shall be entirely consumed, -otherwise the bitstream is considered corrupted. - -#### Default Distributions -If `Predefined_Mode` is selected for a symbol type, -its FSE decoding table is generated from a predefined distribution table defined here. -For details on how to convert this distribution into a decoding table, see the [FSE section]. - -[FSE section]: #from-normalized-distribution-to-decoding-tables - -##### Literals Length -The decoding table uses an accuracy log of 6 bits (64 states). -``` -short literalsLength_defaultDistribution[36] = - { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, - -1,-1,-1,-1 }; -``` - -##### Match Length -The decoding table uses an accuracy log of 6 bits (64 states). -``` -short matchLengths_defaultDistribution[53] = - { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, - -1,-1,-1,-1,-1 }; -``` - -##### Offset Codes -The decoding table uses an accuracy log of 5 bits (32 states), -and supports a maximum `N` value of 28, allowing offset values up to 536,870,908 . - -If any sequence in the compressed block requires a larger offset than this, -it's not possible to use the default distribution to represent it. -``` -short offsetCodes_defaultDistribution[29] = - { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; -``` - - -Sequence Execution ------------------- -Once literals and sequences have been decoded, -they are combined to produce the decoded content of a block. - -Each sequence consists of a tuple of (`literals_length`, `offset_value`, `match_length`), -decoded as described in the [Sequences Section](#sequences-section). -To execute a sequence, first copy `literals_length` bytes -from the decoded literals to the output. - -Then `match_length` bytes are copied from previous decoded data. -The offset to copy from is determined by `offset_value`: -if `offset_value > 3`, then the offset is `offset_value - 3`. -If `offset_value` is from 1-3, the offset is a special repeat offset value. -See the [repeat offset](#repeat-offsets) section for how the offset is determined -in this case. - -The offset is defined as from the current position, so an offset of 6 -and a match length of 3 means that 3 bytes should be copied from 6 bytes back. -Note that all offsets leading to previously decoded data -must be smaller than `Window_Size` defined in `Frame_Header_Descriptor`. - -#### Repeat offsets -As seen in [Sequence Execution](#sequence-execution), -the first 3 values define a repeated offset and we will call them -`Repeated_Offset1`, `Repeated_Offset2`, and `Repeated_Offset3`. -They are sorted in recency order, with `Repeated_Offset1` meaning "most recent one". - -If `offset_value == 1`, then the offset used is `Repeated_Offset1`, etc. - -There is an exception though, when current sequence's `literals_length = 0`. -In this case, repeated offsets are shifted by one, -so an `offset_value` of 1 means `Repeated_Offset2`, -an `offset_value` of 2 means `Repeated_Offset3`, -and an `offset_value` of 3 means `Repeated_Offset1 - 1`. - -In the final case, if `Repeated_Offset1 - 1` evaluates to 0, then the -data is considered corrupted. - -For the first block, the starting offset history is populated with following values : -`Repeated_Offset1`=1, `Repeated_Offset2`=4, `Repeated_Offset3`=8, -unless a dictionary is used, in which case they come from the dictionary. - -Then each block gets its starting offset history from the ending values of the most recent `Compressed_Block`. -Note that blocks which are not `Compressed_Block` are skipped, they do not contribute to offset history. - -[Offset Codes]: #offset-codes - -###### Offset updates rules - -During the execution of the sequences of a `Compressed_Block`, the -`Repeated_Offsets`' values are kept up to date, so that they always represent -the three most-recently used offsets. In order to achieve that, they are -updated after executing each sequence in the following way: - -When the sequence's `offset_value` does not refer to one of the -`Repeated_Offsets`--when it has value greater than 3, or when it has value 3 -and the sequence's `literals_length` is zero--the `Repeated_Offsets`' values -are shifted back one, and `Repeated_Offset1` takes on the value of the -just-used offset. - -Otherwise, when the sequence's `offset_value` refers to one of the -`Repeated_Offsets`--when it has value 1 or 2, or when it has value 3 and the -sequence's `literals_length` is non-zero--the `Repeated_Offsets` are re-ordered -so that `Repeated_Offset1` takes on the value of the used Repeated_Offset, and -the existing values are pushed back from the first `Repeated_Offset` through to -the `Repeated_Offset` selected by the `offset_value`. This effectively performs -a single-stepped wrapping rotation of the values of these offsets, so that -their order again reflects the recency of their use. - -The following table shows the values of the `Repeated_Offsets` as a series of -sequences are applied to them: - -| `offset_value` | `literals_length` | `Repeated_Offset1` | `Repeated_Offset2` | `Repeated_Offset3` | Comment | -|:--------------:|:-----------------:|:------------------:|:------------------:|:------------------:|:-----------------------:| -| | | 1 | 4 | 8 | starting values | -| 1114 | 11 | 1111 | 1 | 4 | non-repeat | -| 1 | 22 | 1111 | 1 | 4 | repeat 1: no change | -| 2225 | 22 | 2222 | 1111 | 1 | non-repeat | -| 1114 | 111 | 1111 | 2222 | 1111 | non-repeat | -| 3336 | 33 | 3333 | 1111 | 2222 | non-repeat | -| 2 | 22 | 1111 | 3333 | 2222 | repeat 2: swap 1 & 2 | -| 3 | 33 | 2222 | 1111 | 3333 | repeat 3: rotate 3 to 1 | -| 3 | 0 | 2221 | 2222 | 1111 | special case : insert `repeat1 - 1` | -| 1 | 0 | 2222 | 2221 | 1111 | == repeat 2 | - - -Skippable Frames ----------------- - -| `Magic_Number` | `Frame_Size` | `User_Data` | -|:--------------:|:------------:|:-----------:| -| 4 bytes | 4 bytes | n bytes | - -Skippable frames allow the insertion of user-defined metadata -into a flow of concatenated frames. - -Skippable frames defined in this specification are compatible with [LZ4] ones. - -[LZ4]:https://lz4.github.io/lz4/ - -From a compliant decoder perspective, skippable frames need just be skipped, -and their content ignored, resuming decoding after the skippable frame. - -It can be noted that a skippable frame -can be used to watermark a stream of concatenated frames -embedding any kind of tracking information (even just a UUID). -Users wary of such possibility should scan the stream of concatenated frames -in an attempt to detect such frame for analysis or removal. - -__`Magic_Number`__ - -4 Bytes, __little-endian__ format. -Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F. -All 16 values are valid to identify a skippable frame. -This specification doesn't detail any specific tagging for skippable frames. - -__`Frame_Size`__ - -This is the size, in bytes, of the following `User_Data` -(without including the magic number nor the size field itself). -This field is represented using 4 Bytes, __little-endian__ format, unsigned 32-bits. -This means `User_Data` can’t be bigger than (2^32-1) bytes. - -__`User_Data`__ - -The `User_Data` can be anything. Data will just be skipped by the decoder. - - - -Entropy Encoding ----------------- -Two types of entropy encoding are used by the Zstandard format: -FSE, and Huffman coding. -Huffman is used to compress literals, -while FSE is used for all other symbols -(`Literals_Length_Code`, `Match_Length_Code`, offset codes) -and to compress Huffman headers. - - -FSE ---- -FSE, short for Finite State Entropy, is an entropy codec based on [ANS]. -FSE encoding/decoding involves a state that is carried over between symbols. -Decoding must be done in the opposite direction as encoding. -Therefore, all FSE bitstreams are read from end to beginning. -Note that the order of the bits in the stream is not reversed, -we just read each multi-bits element in the reverse order they are encoded. - -For additional details on FSE, see [Finite State Entropy]. - -[Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/ - -FSE decoding is directed by a decoding table with a power of 2 size, each row containing three elements: -`Symbol`, `Num_Bits`, and `Baseline`. -The `log2` of the table size is its `Accuracy_Log`. -An FSE state value represents an index in this table. - -To obtain the initial state value, consume `Accuracy_Log` bits from the stream as a __little-endian__ value. -The first symbol in the stream is the `Symbol` indicated in the table for that state. -To obtain the next state value, -the decoder should consume `Num_Bits` bits from the stream as a __little-endian__ value and add it to `Baseline`. - -[ANS]: https://en.wikipedia.org/wiki/Asymmetric_Numeral_Systems - -### FSE Table Description -To decode an FSE bitstream, it is necessary to build its FSE decoding table. -The decoding table is derived from a distribution of Probabilities. -The Zstandard format encodes distributions of Probabilities as follows: - -The distribution of probabilities is described in a bitstream which is read forward, -in __little-endian__ fashion. -The amount of bytes consumed from the bitstream to describe the distribution -is discovered at the end of the decoding process. - -The bitstream starts by reporting on which scale the distribution operates. -Let's `low4Bits` designate the lowest 4 bits of the first byte : -`Accuracy_Log = low4bits + 5`. - -An FSE distribution table describes the probabilities of all symbols -from `0` to the last present one (included) in natural order. -The sum of probabilities is normalized to reach a power of 2 total of `1 << Accuracy_Log` . -There must be two or more symbols with non-zero probabilities. - -The number of bits used to decode each probability is variable. -It depends on : - -- Remaining probabilities + 1 : - __example__ : - Presuming an `Accuracy_Log` of 8, - and presuming 100 probability points have already been distributed, - the decoder may read any value from `0` to `256 - 100 + 1 == 157` (inclusive). - Therefore, it may read up to `log2sup(157) == 8` bits, where `log2sup(N)` - is the smallest integer `T` that satisfies `(1 << T) > N`. - -- Value decoded : small values use 1 less bit : - __example__ : - Presuming values from 0 to 157 (inclusive) are possible, - 255-157 = 98 values are remaining in an 8-bits field. - They are used this way : - first 98 values (hence from 0 to 97) use only 7 bits, - values from 98 to 157 use 8 bits. - This is achieved through this scheme : - - | 8-bit field read | Value decoded | Nb of bits consumed | - | ---------------- | ------------- | ------------------- | - | 0 - 97 | 0 - 97 | 7 | - | 98 - 127 | 98 - 127 | 8 | - | 128 - 225 | 0 - 97 | 7 | - | 226 - 255 | 128 - 157 | 8 | - -Probability is derived from Value decoded using the following formula: -`Probality = Value - 1` - -Consequently, a Probability of `0` is described by a Value `1`. - -A Value `0` is used to signal a special case, named "Probability `-1`". -It describes a probability which should have been "less than 1". -Its effect on the decoding table building process is described in the [next section]. -For the purpose of counting total allocated probability points, it counts as one. - -[next section]:#from-normalized-distribution-to-decoding-tables - -Symbols probabilities are read one by one, in order. -After each probability is decoded, the total nb of probability points is updated. -This is used to determine how many bits must be read to decode the probability of next symbol. - -When a symbol has a __probability__ of `zero` (decoded from reading a Value `1`), -it is followed by a 2-bits repeat flag. -This repeat flag tells how many probabilities of zeroes follow the current one. -It provides a number ranging from 0 to 3. -If it is a 3, another 2-bits repeat flag follows, and so on. - -When the Probability for a symbol makes cumulated total reach `1 << Accuracy_Log`, -then it's the last symbol, and decoding is complete. - -Then the decoder can tell how many bytes were used in this process, -and how many symbols are present. -The bitstream consumes a round number of bytes. -Any remaining bit within the last byte is just unused. - -If this process results in a non-zero probability for a symbol outside of the -valid range of symbols that the FSE table is defined for, even if that symbol is -not used, then the data is considered corrupted. -For the specific case of offset codes, -a decoder implementation may reject a frame containing a non-zero probability -for an offset code larger than the largest offset code supported by the decoder -implementation. - -#### From normalized distribution to decoding tables - -The normalized distribution of probabilities is enough -to create a unique decoding table. -It is generated using the following build rule : - -The table has a size of `Table_Size = 1 << Accuracy_Log`. -Each row specifies the decoded symbol, -and instructions to reach the next state (`Number_of_Bits` and `Baseline`). - -Symbols are first scanned in their natural order for "less than 1" probabilities -(previously decoded from a Value of `0`). -Symbols with this special probability are being attributed a single row, -starting from the end of the table and retreating. -These symbols define a full state reset, reading `Accuracy_Log` bits. - -Then, all remaining symbols, sorted in natural order, are allocated rows. -Starting from smallest present symbol, and table position `0`, -each symbol gets allocated as many rows as its probability. - -Row allocation is not linear, it follows this order, in modular arithmetic: -``` -position += (tableSize>>1) + (tableSize>>3) + 3; -position &= tableSize-1; -``` - -Using above ordering rule, each symbol gets allocated as many rows as its probability. -If a position is already occupied by a "less than 1" probability symbol, -it is simply skipped, and the next position is allocated instead. -Once enough rows have been allocated for the current symbol, -the allocation process continues, using the next symbol, in natural order. -This process guarantees that the table is entirely and exactly filled. - -Each row specifies a decoded symbol, and is accessed by current state value. -It also specifies `Number_of_Bits` and `Baseline`, which are required to determine next state value. - -To correctly set these fields, it's necessary to sort all occurrences of each symbol in state value order, -and then attribute N+1 bits to lower rows, and N bits to higher rows, -following the process described below (using an example): - -__Example__ : -Presuming an `Accuracy_Log` of 7, -let's imagine a symbol with a Probability of 5: -it receives 5 rows, corresponding to 5 state values between `0` and `127`. - -In this example, the first state value happens to be `1` (after unspecified previous symbols). -The next 4 states are then determined using above modular arithmetic rule, -which specifies to add `64+16+3 = 83` modulo `128` to jump to next position, -producing the following series: `1`, `84`, `39`, `122`, `77` (modular arithmetic). -(note: the next symbol will then start at `32`). - -These state values are then sorted in natural order, -resulting in the following series: `1`, `39`, `77`, `84`, `122`. - -The next power of 2 after 5 is 8. -Therefore, the probability space will be divided into 8 equal parts. -Since the probability space is `1<<7 = 128` large, each share is `128/8 = 16` large. - -In order to reach 8 shares, the `8-5 = 3` lowest states will count "double", -doubling their shares (32 in width), hence requiring one more bit. - -Baseline is assigned starting from the lowest state using fewer bits, -continuing in natural state order, looping back at the beginning. -Each state takes its allocated range from Baseline, sized by its `Number_of_Bits`. - -| state order | 0 | 1 | 2 | 3 | 4 | -| ---------------- | ----- | ----- | ------ | ---- | ------ | -| state value | 1 | 39 | 77 | 84 | 122 | -| width | 32 | 32 | 32 | 16 | 16 | -| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 | -| allocation order | 3 | 4 | 5 | 1 | 2 | -| `Baseline` | 32 | 64 | 96 | 0 | 16 | -| range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 | - -During decoding, the next state value is determined by using current state value as row number, -then reading the required `Number_of_Bits` from the bitstream, and adding the specified `Baseline`. - -Note: -as a trivial example, it follows that, for a symbol with a Probability of `1`, -`Baseline` is necessarily `0`, and `Number_of_Bits` is necessarily `Accuracy_Log`. - -See [Appendix A] to see the outcome of this process applied to the default distributions. - -[Appendix A]: #appendix-a---decoding-tables-for-predefined-codes - - -Huffman Coding --------------- -Zstandard Huffman-coded streams are read backwards, -similar to the FSE bitstreams. -Therefore, to find the start of the bitstream, it is required to -know the offset of the last byte of the Huffman-coded stream. - -After writing the last bit containing information, the compressor -writes a single `1`-bit and then fills the byte with 0-7 `0` bits of -padding. The last byte of the compressed bitstream cannot be `0` for -that reason. - -When decompressing, the last byte containing the padding is the first -byte to read. The decompressor needs to skip 0-7 initial `0`-bits and -the first `1`-bit it occurs. Afterwards, the useful part of the bitstream -begins. - -The bitstream contains Huffman-coded symbols in __little-endian__ order, -with the codes defined by the method below. - -### Huffman Tree Description - -Prefix coding represents symbols from an a priori known alphabet -by bit sequences (codewords), one codeword for each symbol, -in a manner such that different symbols may be represented -by bit sequences of different lengths, -but a parser can always parse an encoded string -unambiguously symbol-by-symbol. - -Given an alphabet with known symbol frequencies, -the Huffman algorithm allows the construction of an optimal prefix code -using the fewest bits of any possible prefix codes for that alphabet. - -Prefix code must not exceed a maximum code length. -More bits improve accuracy but cost more header size, -and require more memory or more complex decoding operations. -This specification limits maximum code length to 11 bits. - -#### Representation - -All literal symbols from zero (included) to last present one (excluded) -are represented by `Weight` with values from `0` to `Max_Number_of_Bits`. -Transformation from `Weight` to `Number_of_Bits` follows this formula : -``` -Number_of_Bits = Weight ? (Max_Number_of_Bits + 1 - Weight) : 0 -``` -When a literal symbol is not present, it receives a `Weight` of 0. -The least frequent symbol receives a `Weight` of 1. -If no literal has a `Weight` of 1, then the data is considered corrupted. -If there are not at least two literals with non-zero `Weight`, then the data -is considered corrupted. -The most frequent symbol receives a `Weight` anywhere between 1 and 11 (max). -The last symbol's `Weight` is deduced from previously retrieved Weights, -by completing to the nearest power of 2. It's necessarily non 0. -If it's not possible to reach a clean power of 2 with a single `Weight` value, -the Huffman Tree Description is considered invalid. -This final power of 2 gives `Max_Number_of_Bits`, the depth of the current tree. -`Max_Number_of_Bits` must be <= 11, -otherwise the representation is considered corrupted. - -__Example__ : -Let's presume the following Huffman tree must be described : - -| literal symbol | A | B | C | D | E | F | -| ---------------- | --- | --- | --- | --- | --- | --- | -| `Number_of_Bits` | 1 | 2 | 3 | 0 | 4 | 4 | - -The tree depth is 4, since its longest elements uses 4 bits -(longest elements are the ones with smallest frequency). - -All symbols will now receive a `Weight` instead of `Number_of_Bits`. -Weight formula is : -``` -Weight = Number_of_Bits ? (Max_Number_of_Bits + 1 - Number_of_Bits) : 0 -``` -It gives the following series of Weights : - -| literal symbol | A | B | C | D | E | F | -| -------------- | --- | --- | --- | --- | --- | --- | -| `Weight` | 4 | 3 | 2 | 0 | 1 | 1 | - -This list will be sent to the decoder, with the following modifications: - -- `F` will not be listed, because it can be determined from previous symbols -- nor will symbols above `F` as they are all 0 -- on the other hand, all symbols before `A`, starting with `\0`, will be listed, with a Weight of 0. - -The decoder will do the inverse operation : -having collected weights of literal symbols from `A` to `E`, -it knows the last literal, `F`, is present with a non-zero `Weight`. -The `Weight` of `F` can be determined by advancing to the next power of 2. -The sum of `2^(Weight-1)` (excluding 0's) is : -`8 + 4 + 2 + 0 + 1 = 15`. -Nearest larger power of 2 value is 16. -Therefore, `Max_Number_of_Bits = log2(16) = 4` and `Weight[F] = log_2(16 - 15) + 1 = 1`. - -#### Huffman Tree header - -This is a single byte value (0-255), -which describes how the series of weights is encoded. - -- if `headerByte` < 128 : - the series of weights is compressed using FSE (see below). - The length of the FSE-compressed series is equal to `headerByte` (0-127). - -- if `headerByte` >= 128 : - + the series of weights uses a direct representation, - where each `Weight` is encoded directly as a 4 bits field (0-15). - + They are encoded forward, 2 weights to a byte, - first weight taking the top four bits and second one taking the bottom four. - * e.g. the following operations could be used to read the weights: - `Weight[0] = (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf)`, etc. - + The full representation occupies `Ceiling(Number_of_Weights/2)` bytes, - meaning it uses only full bytes even if `Number_of_Weights` is odd. - + `Number_of_Weights = headerByte - 127`. - * Note that maximum `Number_of_Weights` is 255-127 = 128, - therefore, only up to 128 `Weight` can be encoded using direct representation. - * Since the last non-zero `Weight` is _not_ encoded, - this scheme is compatible with alphabet sizes of up to 129 symbols, - hence including literal symbol 128. - * If any literal symbol > 128 has a non-zero `Weight`, - direct representation is not possible. - In such case, it's necessary to use FSE compression. - - -#### Finite State Entropy (FSE) compression of Huffman weights - -In this case, the series of Huffman weights is compressed using FSE compression. -It's a single bitstream with 2 interleaved states, -sharing a single distribution table. - -To decode an FSE bitstream, it is necessary to know its compressed size. -Compressed size is provided by `headerByte`. -It's also necessary to know its _maximum possible_ decompressed size, -which is `255`, since literal symbols span from `0` to `255`, -and last symbol's `Weight` is not represented. - -An FSE bitstream starts by a header, describing probabilities distribution. -It will create a Decoding Table. -For a list of Huffman weights, the maximum accuracy log is 6 bits. -For more description see the [FSE header description](#fse-table-description) - -The Huffman header compression uses 2 states, -which share the same FSE distribution table. -The first state (`State1`) encodes the even indexed symbols, -and the second (`State2`) encodes the odd indexed symbols. -`State1` is initialized first, and then `State2`, and they take turns -decoding a single symbol and updating their state. -For more details on these FSE operations, see the [FSE section](#fse). - -The number of symbols to decode is determined -by tracking bitStream overflow condition: -If updating state after decoding a symbol would require more bits than -remain in the stream, it is assumed that extra bits are 0. Then, -symbols for each of the final states are decoded and the process is complete. - -If this process would produce more weights than the maximum number of decoded -weights (255), then the data is considered corrupted. - -If either of the 2 initial states are absent or truncated, then the data is -considered corrupted. Consequently, it is not possible to encode fewer than -2 weights using this mode. - -#### Conversion from weights to Huffman prefix codes - -All present symbols shall now have a `Weight` value. -It is possible to transform weights into `Number_of_Bits`, using this formula: -``` -Number_of_Bits = (Weight>0) ? Max_Number_of_Bits + 1 - Weight : 0 -``` -In order to determine which prefix code is assigned to each Symbol, -Symbols are first sorted by `Weight`, then by natural sequential order. -Symbols with a `Weight` of zero are removed. -Then, starting from lowest `Weight` (hence highest `Number_of_Bits`), -prefix codes are assigned in ascending order. - -__Example__ : -Let's assume the following list of weights has been decoded: - -| Literal | A | B | C | D | E | F | -| -------- | --- | --- | --- | --- | --- | --- | -| `Weight` | 4 | 3 | 2 | 0 | 1 | 1 | - -Sorted by weight and then natural sequential order, -it gives the following prefix codes distribution: - -| Literal | D | E | F | C | B | A | -| ---------------- | --- | ---- | ---- | ---- | ---- | ---- | -| `Weight` | 0 | 1 | 1 | 2 | 3 | 4 | -| `Number_of_Bits` | 0 | 4 | 4 | 3 | 2 | 1 | -| prefix code | N/A | 0000 | 0001 | 001 | 01 | 1 | -| ascending order | N/A | 0000 | 0001 | 001x | 01xx | 1xxx | - -### Huffman-coded Streams - -Given a Huffman decoding table, -it's possible to decode a Huffman-coded stream. - -Each bitstream must be read _backward_, -that is starting from the end down to the beginning. -Therefore it's necessary to know the size of each bitstream. - -It's also necessary to know exactly which _bit_ is the last one. -This is detected by a final bit flag : -the highest bit of latest byte is a final-bit-flag. -Consequently, a last byte of `0` is not possible. -And the final-bit-flag itself is not part of the useful bitstream. -Hence, the last byte contains between 0 and 7 useful bits. - -Starting from the end, -it's possible to read the bitstream in a __little-endian__ fashion, -keeping track of already used bits. Since the bitstream is encoded in reverse -order, starting from the end read symbols in forward order. - -For example, if the literal sequence `ABEF` was encoded using above prefix code, -it would be encoded (in reverse order) as: - -|Symbol | F | E | B | A | Padding | -|--------|------|------|----|---|---------| -|Encoding|`0000`|`0001`|`01`|`1`| `00001` | - -Resulting in following 2-bytes bitstream : -``` -00010000 00001101 -``` - -Here is an alternative representation with the symbol codes separated by underscore: -``` -0001_0000 00001_1_01 -``` - -Reading highest `Max_Number_of_Bits` bits, -it's possible to compare extracted value to decoding table, -determining the symbol to decode and number of bits to discard. - -The process continues up to reading the required number of symbols per stream. -If a bitstream is not entirely and exactly consumed, -hence reaching exactly its beginning position with _all_ bits consumed, -the decoding process is considered faulty. - - -Dictionary Format ------------------ - -Zstandard is compatible with "raw content" dictionaries, -free of any format restriction, except that they must be at least 8 bytes. -These dictionaries function as if they were just the `Content` part -of a formatted dictionary. - -But dictionaries created by `zstd --train` follow a format, described here. - -__Pre-requisites__ : a dictionary has a size, - defined either by a buffer limit, or a file size. - -| `Magic_Number` | `Dictionary_ID` | `Entropy_Tables` | `Content` | -| -------------- | --------------- | ---------------- | --------- | - -__`Magic_Number`__ : 4 bytes ID, value 0xEC30A437, __little-endian__ format - -__`Dictionary_ID`__ : 4 bytes, stored in __little-endian__ format. - `Dictionary_ID` can be any value, except 0 (which means no `Dictionary_ID`). - It's used by decoders to check if they use the correct dictionary. - -_Reserved ranges :_ -If the dictionary is going to be distributed in a public environment, -the following ranges of `Dictionary_ID` are reserved for some future registrar -and shall not be used : - - - low range : <= 32767 - - high range : >= (2^31) - -Outside of these ranges, any value of `Dictionary_ID` -which is both `>= 32768` and `< (1<<31)` can be used freely, -even in public environment. - - -__`Entropy_Tables`__ : follow the same format as tables in [compressed blocks]. - See the relevant [FSE](#fse-table-description) - and [Huffman](#huffman-tree-description) sections for how to decode these tables. - They are stored in following order : - Huffman tables for literals, FSE table for offsets, - FSE table for match lengths, and FSE table for literals lengths. - These tables populate the Repeat Stats literals mode and - Repeat distribution mode for sequence decoding. - It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`), - stored in order, 4-bytes __little-endian__ each, for a total of 12 bytes. - Each recent offset must have a value <= dictionary content size, and cannot equal 0. - -__`Content`__ : The rest of the dictionary is its content. - The content act as a "past" in front of data to compress or decompress, - so it can be referenced in sequence commands. - As long as the amount of data decoded from this frame is less than or - equal to `Window_Size`, sequence commands may specify offsets longer - than the total length of decoded output so far to reference back to the - dictionary, even parts of the dictionary with offsets larger than `Window_Size`. - After the total output has surpassed `Window_Size` however, - this is no longer allowed and the dictionary is no longer accessible. - -[compressed blocks]: #the-format-of-compressed_block - -If a dictionary is provided by an external source, -it should be loaded with great care, its content considered untrusted. - - - -Appendix A - Decoding tables for predefined codes -------------------------------------------------- - -This appendix contains FSE decoding tables -for the predefined literal length, match length, and offset codes. -The tables have been constructed using the algorithm as given above in chapter -"from normalized distribution to decoding tables". -The tables here can be used as examples -to crosscheck that an implementation build its decoding tables correctly. - -#### Literal Length Code: - -| State | Symbol | Number_Of_Bits | Base | -| ----- | ------ | -------------- | ---- | -| 0 | 0 | 4 | 0 | -| 1 | 0 | 4 | 16 | -| 2 | 1 | 5 | 32 | -| 3 | 3 | 5 | 0 | -| 4 | 4 | 5 | 0 | -| 5 | 6 | 5 | 0 | -| 6 | 7 | 5 | 0 | -| 7 | 9 | 5 | 0 | -| 8 | 10 | 5 | 0 | -| 9 | 12 | 5 | 0 | -| 10 | 14 | 6 | 0 | -| 11 | 16 | 5 | 0 | -| 12 | 18 | 5 | 0 | -| 13 | 19 | 5 | 0 | -| 14 | 21 | 5 | 0 | -| 15 | 22 | 5 | 0 | -| 16 | 24 | 5 | 0 | -| 17 | 25 | 5 | 32 | -| 18 | 26 | 5 | 0 | -| 19 | 27 | 6 | 0 | -| 20 | 29 | 6 | 0 | -| 21 | 31 | 6 | 0 | -| 22 | 0 | 4 | 32 | -| 23 | 1 | 4 | 0 | -| 24 | 2 | 5 | 0 | -| 25 | 4 | 5 | 32 | -| 26 | 5 | 5 | 0 | -| 27 | 7 | 5 | 32 | -| 28 | 8 | 5 | 0 | -| 29 | 10 | 5 | 32 | -| 30 | 11 | 5 | 0 | -| 31 | 13 | 6 | 0 | -| 32 | 16 | 5 | 32 | -| 33 | 17 | 5 | 0 | -| 34 | 19 | 5 | 32 | -| 35 | 20 | 5 | 0 | -| 36 | 22 | 5 | 32 | -| 37 | 23 | 5 | 0 | -| 38 | 25 | 4 | 0 | -| 39 | 25 | 4 | 16 | -| 40 | 26 | 5 | 32 | -| 41 | 28 | 6 | 0 | -| 42 | 30 | 6 | 0 | -| 43 | 0 | 4 | 48 | -| 44 | 1 | 4 | 16 | -| 45 | 2 | 5 | 32 | -| 46 | 3 | 5 | 32 | -| 47 | 5 | 5 | 32 | -| 48 | 6 | 5 | 32 | -| 49 | 8 | 5 | 32 | -| 50 | 9 | 5 | 32 | -| 51 | 11 | 5 | 32 | -| 52 | 12 | 5 | 32 | -| 53 | 15 | 6 | 0 | -| 54 | 17 | 5 | 32 | -| 55 | 18 | 5 | 32 | -| 56 | 20 | 5 | 32 | -| 57 | 21 | 5 | 32 | -| 58 | 23 | 5 | 32 | -| 59 | 24 | 5 | 32 | -| 60 | 35 | 6 | 0 | -| 61 | 34 | 6 | 0 | -| 62 | 33 | 6 | 0 | -| 63 | 32 | 6 | 0 | - -#### Match Length Code: - -| State | Symbol | Number_Of_Bits | Base | -| ----- | ------ | -------------- | ---- | -| 0 | 0 | 6 | 0 | -| 1 | 1 | 4 | 0 | -| 2 | 2 | 5 | 32 | -| 3 | 3 | 5 | 0 | -| 4 | 5 | 5 | 0 | -| 5 | 6 | 5 | 0 | -| 6 | 8 | 5 | 0 | -| 7 | 10 | 6 | 0 | -| 8 | 13 | 6 | 0 | -| 9 | 16 | 6 | 0 | -| 10 | 19 | 6 | 0 | -| 11 | 22 | 6 | 0 | -| 12 | 25 | 6 | 0 | -| 13 | 28 | 6 | 0 | -| 14 | 31 | 6 | 0 | -| 15 | 33 | 6 | 0 | -| 16 | 35 | 6 | 0 | -| 17 | 37 | 6 | 0 | -| 18 | 39 | 6 | 0 | -| 19 | 41 | 6 | 0 | -| 20 | 43 | 6 | 0 | -| 21 | 45 | 6 | 0 | -| 22 | 1 | 4 | 16 | -| 23 | 2 | 4 | 0 | -| 24 | 3 | 5 | 32 | -| 25 | 4 | 5 | 0 | -| 26 | 6 | 5 | 32 | -| 27 | 7 | 5 | 0 | -| 28 | 9 | 6 | 0 | -| 29 | 12 | 6 | 0 | -| 30 | 15 | 6 | 0 | -| 31 | 18 | 6 | 0 | -| 32 | 21 | 6 | 0 | -| 33 | 24 | 6 | 0 | -| 34 | 27 | 6 | 0 | -| 35 | 30 | 6 | 0 | -| 36 | 32 | 6 | 0 | -| 37 | 34 | 6 | 0 | -| 38 | 36 | 6 | 0 | -| 39 | 38 | 6 | 0 | -| 40 | 40 | 6 | 0 | -| 41 | 42 | 6 | 0 | -| 42 | 44 | 6 | 0 | -| 43 | 1 | 4 | 32 | -| 44 | 1 | 4 | 48 | -| 45 | 2 | 4 | 16 | -| 46 | 4 | 5 | 32 | -| 47 | 5 | 5 | 32 | -| 48 | 7 | 5 | 32 | -| 49 | 8 | 5 | 32 | -| 50 | 11 | 6 | 0 | -| 51 | 14 | 6 | 0 | -| 52 | 17 | 6 | 0 | -| 53 | 20 | 6 | 0 | -| 54 | 23 | 6 | 0 | -| 55 | 26 | 6 | 0 | -| 56 | 29 | 6 | 0 | -| 57 | 52 | 6 | 0 | -| 58 | 51 | 6 | 0 | -| 59 | 50 | 6 | 0 | -| 60 | 49 | 6 | 0 | -| 61 | 48 | 6 | 0 | -| 62 | 47 | 6 | 0 | -| 63 | 46 | 6 | 0 | - -#### Offset Code: - -| State | Symbol | Number_Of_Bits | Base | -| ----- | ------ | -------------- | ---- | -| 0 | 0 | 5 | 0 | -| 1 | 6 | 4 | 0 | -| 2 | 9 | 5 | 0 | -| 3 | 15 | 5 | 0 | -| 4 | 21 | 5 | 0 | -| 5 | 3 | 5 | 0 | -| 6 | 7 | 4 | 0 | -| 7 | 12 | 5 | 0 | -| 8 | 18 | 5 | 0 | -| 9 | 23 | 5 | 0 | -| 10 | 5 | 5 | 0 | -| 11 | 8 | 4 | 0 | -| 12 | 14 | 5 | 0 | -| 13 | 20 | 5 | 0 | -| 14 | 2 | 5 | 0 | -| 15 | 7 | 4 | 16 | -| 16 | 11 | 5 | 0 | -| 17 | 17 | 5 | 0 | -| 18 | 22 | 5 | 0 | -| 19 | 4 | 5 | 0 | -| 20 | 8 | 4 | 16 | -| 21 | 13 | 5 | 0 | -| 22 | 19 | 5 | 0 | -| 23 | 1 | 5 | 0 | -| 24 | 6 | 4 | 16 | -| 25 | 10 | 5 | 0 | -| 26 | 16 | 5 | 0 | -| 27 | 28 | 5 | 0 | -| 28 | 27 | 5 | 0 | -| 29 | 26 | 5 | 0 | -| 30 | 25 | 5 | 0 | -| 31 | 24 | 5 | 0 | - - - -Appendix B - Resources for implementers -------------------------------------------------- - -An open source reference implementation is available on : -https://github.com/facebook/zstd - -The project contains a frame generator, called [decodeCorpus], -which can be used by any 3rd-party implementation -to verify that a tested decoder is compliant with the specification. - -[decodeCorpus]: https://github.com/facebook/zstd/tree/v1.3.4/tests#decodecorpus---tool-to-generate-zstandard-frames-for-decoder-testing - -`decodeCorpus` generates random valid frames. -A compliant decoder should be able to decode them all, -or at least provide a meaningful error code explaining for which reason it cannot -(memory limit restrictions for example). - - -Version changes ---------------- -- 0.4.3 : clarifications for Huffman prefix code assignment example -- 0.4.2 : refactor FSE table construction process, inspired by Donald Pian -- 0.4.1 : clarifications on a few error scenarios, by Eric Lasota -- 0.4.0 : fixed imprecise behavior for nbSeq==0, detected by Igor Pavlov -- 0.3.9 : clarifications for Huffman-compressed literal sizes. -- 0.3.8 : clarifications for Huffman Blocks and Huffman Tree descriptions. -- 0.3.7 : clarifications for Repeat_Offsets, matching RFC8878 -- 0.3.6 : clarifications for Dictionary_ID -- 0.3.5 : clarifications for Block_Maximum_Size -- 0.3.4 : clarifications for FSE decoding table -- 0.3.3 : clarifications for field Block_Size -- 0.3.2 : remove additional block size restriction on compressed blocks -- 0.3.1 : minor clarification regarding offset history update rules -- 0.3.0 : minor edits to match RFC8478 -- 0.2.9 : clarifications for huffman weights direct representation, by Ulrich Kunitz -- 0.2.8 : clarifications for IETF RFC discuss -- 0.2.7 : clarifications from IETF RFC review, by Vijay Gurbani and Nick Terrell -- 0.2.6 : fixed an error in huffman example, by Ulrich Kunitz -- 0.2.5 : minor typos and clarifications -- 0.2.4 : section restructuring, by Sean Purcell -- 0.2.3 : clarified several details, by Sean Purcell -- 0.2.2 : added predefined codes, by Johannes Rudolph -- 0.2.1 : clarify field names, by Przemyslaw Skibinski -- 0.2.0 : numerous format adjustments for zstd v0.8+ -- 0.1.2 : limit Huffman tree depth to 11 bits -- 0.1.1 : reserved dictID ranges -- 0.1.0 : initial release diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html deleted file mode 100644 index 485d5eafbe92..000000000000 --- a/doc/zstd_manual.html +++ /dev/null @@ -1,2237 +0,0 @@ -<html> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> -<title>zstd 1.5.7 Manual</title> -</head> -<body> -<h1>zstd 1.5.7 Manual</h1> -Note: the content of this file has been automatically generated by parsing "zstd.h" -<hr> -<a name="Contents"></a><h2>Contents</h2> -<ol> -<li><a href="#Chapter1">Introduction</a></li> -<li><a href="#Chapter2">Version</a></li> -<li><a href="#Chapter3">Simple Core API</a></li> -<li><a href="#Chapter4">Explicit context</a></li> -<li><a href="#Chapter5">Advanced compression API (Requires v1.4.0+)</a></li> -<li><a href="#Chapter6">Advanced decompression API (Requires v1.4.0+)</a></li> -<li><a href="#Chapter7">Streaming</a></li> -<li><a href="#Chapter8">Streaming compression - HowTo</a></li> -<li><a href="#Chapter9">Streaming decompression - HowTo</a></li> -<li><a href="#Chapter10">Simple dictionary API</a></li> -<li><a href="#Chapter11">Bulk processing dictionary API</a></li> -<li><a href="#Chapter12">Dictionary helper functions</a></li> -<li><a href="#Chapter13">Advanced dictionary and prefix API (Requires v1.4.0+)</a></li> -<li><a href="#Chapter14">experimental API (static linking only)</a></li> -<li><a href="#Chapter15">Frame header and size functions</a></li> -<li><a href="#Chapter16">Memory management</a></li> -<li><a href="#Chapter17">Advanced compression functions</a></li> -<li><a href="#Chapter18">Advanced decompression functions</a></li> -<li><a href="#Chapter19">Advanced streaming functions</a></li> -<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions (DEPRECATED)</a></li> -<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li> -<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li> -<li><a href="#Chapter23">Block level API (DEPRECATED)</a></li> -</ol> -<hr> -<a name="Chapter1"></a><h2>Introduction</h2><pre> - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting - real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression - functions. - - The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), - which is currently 22. Levels >= 20, labeled `--ultra`, should be used with - caution, as they require more memory. The library also offers negative - compression levels, which extend the range of speed vs. ratio preferences. - The lower the level, the faster the speed (at the cost of compression). - - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit context) - - unbounded multiple steps (described as Streaming compression) - - The compression ratio achievable on small data can be highly improved using - a dictionary. Dictionary compression can be performed in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing - dictionary API) - - Advanced experimental functions can be accessed using - `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. - - Advanced experimental APIs should never be used with a dynamically-linked - library. They are not "stable"; their definitions or signatures may change in - the future. Only static linking is allowed. -<BR></pre> - -<a name="Chapter2"></a><h2>Version</h2><pre></pre> - -<pre><b>unsigned ZSTD_versionNumber(void); -</b><p> Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). -</p></pre><BR> - -<pre><b>const char* ZSTD_versionString(void); -</b><p> Return runtime library version, like "1.4.5". Requires v1.3.0+. -</p></pre><BR> - -<a name="Chapter3"></a><h2>Simple Core API</h2><pre></pre> - -<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); -</b><p> Compresses `src` content as a single zstd compressed frame into already allocated `dst`. - NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have - enough space to successfully compress the data. - @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). -</p></pre><BR> - -<pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); -</b><p> `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - Multiple compressed frames can be decompressed at once with this method. - The result will be the concatenation of all decompressed frames, back to back. - `dstCapacity` is an upper bound of originalSize to regenerate. - First frame's decompressed size can be extracted using ZSTD_getFrameContentSize(). - If maximum upper bound isn't known, prefer using streaming mode to decompress data. - @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - or an errorCode if it fails (which can be tested using ZSTD_isError()). -</p></pre><BR> - -<h3>Decompression helper functions</h3><pre></pre><b><pre></pre></b><BR> -<pre><b>#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -</b><p> `src` should point to the start of a ZSTD encoded frame. - `srcSize` must be at least as large as the frame header. - hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - @return : - decompressed size of `src` frame content, if known - - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - note 1 : a 0 return value means the frame is valid but "empty". - note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode). - When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - In which case, it's necessary to use streaming mode to decompress data. - Optionally, application can rely on some implicit limit, - as ZSTD_decompress() only needs an upper bound of decompressed size. - (For example, data could be necessarily cut into blocks <= 16 KB). - note 3 : decompressed size is always present when compression is completed using single-pass functions, - such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - note 4 : decompressed size can be very large (64-bits value), - potentially larger than what local system can handle as a single memory segment. - In which case, it's necessary to use streaming mode to decompress data. - note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - Always ensure return value fits within application's authorized limits. - Each application can set its own limits. - note 6 : This function replaces ZSTD_getDecompressedSize() -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize") -ZSTDLIB_API -unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); -</b><p> NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - Both functions work the same way, but ZSTD_getDecompressedSize() blends - "empty", "unknown" and "error" results to the same return value (0), - while ZSTD_getFrameContentSize() gives them separate return values. - @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. -</p></pre><BR> - -<pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); -</b><p> `src` should point to the start of a ZSTD frame or skippable frame. - `srcSize` must be >= first frame size - @return : the compressed size of the first frame starting at `src`, - suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - or an error code if input is invalid -</p></pre><BR> - -<h3>Compression helper functions</h3><pre></pre><b><pre></pre></b><BR> -<pre><b>#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) -#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) </b>/* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */<b> -size_t ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case single-pass scenario */<b> -</b><p> maximum compressed size in worst case single-pass scenario. - When invoking `ZSTD_compress()`, or any other one-pass compression function, - it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) - as it eliminates one potential failure scenario, - aka not enough room in dst buffer to write the compressed frame. - Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE . - In which case, ZSTD_compressBound() will return an error code - which can be tested using ZSTD_isError(). - - ZSTD_COMPRESSBOUND() : - same as ZSTD_compressBound(), but as a macro. - It can be used to produce constants, which can be useful for static allocation, - for example to size a static array on stack. - Will produce constant value 0 if srcSize is too large. - -</p></pre><BR> - -<h3>Error helper functions</h3><pre></pre><b><pre>#include "zstd_errors.h" </b>/* list of errors */<b> -</b>/* ZSTD_isError() :<b> - * Most ZSTD_* functions returning a size_t value can be tested for error, - * using ZSTD_isError(). - * @return 1 if error, 0 otherwise - */ -unsigned ZSTD_isError(size_t result); </b>/*!< tells if a `size_t` function result is an error code */<b> -ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); </b>/* convert a result into an error code, which can be compared to error enum list */<b> -const char* ZSTD_getErrorName(size_t result); </b>/*!< provides readable string from a function result */<b> -int ZSTD_minCLevel(void); </b>/*!< minimum negative compression level allowed, requires v1.4.0+ */<b> -int ZSTD_maxCLevel(void); </b>/*!< maximum compression level available */<b> -int ZSTD_defaultCLevel(void); </b>/*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */<b> -</pre></b><BR> -<a name="Chapter4"></a><h2>Explicit context</h2><pre></pre> - -<h3>Compression context</h3><pre> When compressing many times, - it is recommended to allocate a compression context just once, - and reuse it for each successive compression operation. - This will make the workload easier for system's memory. - Note : re-using context is just a speed / resource optimization. - It doesn't change the compression ratio, which remains identical. - Note 2: For parallel execution in multi-threaded environments, - use one different context per thread . - -</pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTD_CCtx* ZSTD_createCCtx(void); -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); </b>/* compatible with NULL pointer */<b> -</pre></b><BR> -<pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); -</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - Important : in order to mirror `ZSTD_compress()` behavior, - this function compresses at the requested compression level, - __ignoring any other advanced parameter__ . - If any advanced parameter was set using the advanced API, - they will all be reset. Only @compressionLevel remains. - -</p></pre><BR> - -<h3>Decompression context</h3><pre> When decompressing many times, - it is recommended to allocate a context only once, - and reuse it for each successive compression operation. - This will make workload friendlier for system's memory. - Use one context per thread for parallel execution. -</pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTD_DCtx* ZSTD_createDCtx(void); -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b> -</pre></b><BR> -<pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); -</b><p> Same as ZSTD_decompress(), - requires an allocated ZSTD_DCtx. - Compatible with sticky parameters (see below). - -</p></pre><BR> - -<a name="Chapter5"></a><h2>Advanced compression API (Requires v1.4.0+)</h2><pre></pre> - -<pre><b>typedef enum { ZSTD_fast=1, - ZSTD_dfast=2, - ZSTD_greedy=3, - ZSTD_lazy=4, - ZSTD_lazy2=5, - ZSTD_btlazy2=6, - ZSTD_btopt=7, - ZSTD_btultra=8, - ZSTD_btultra2=9 - </b>/* note : new strategies _might_ be added in the future.<b> - Only the order (from fast to strong) is guaranteed */ -} ZSTD_strategy; -</b></pre><BR> -<pre><b>typedef enum { - - </b>/* compression parameters<b> - * Note: When compressing with a ZSTD_CDict these parameters are superseded - * by the parameters used to construct the ZSTD_CDict. - * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ - ZSTD_c_compressionLevel=100, </b>/* Set compression parameters according to pre-defined cLevel table.<b> - * Note that exact compression parameters are dynamically determined, - * depending on both compression level and srcSize (when known). - * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. - * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level does not automatically set all other compression parameters - * to default. Setting this will however eventually dynamically impact the compression - * parameters which have not been manually set. The manually set - * ones will 'stick'. */ - </b>/* Advanced compression parameters :<b> - * It's possible to pin down compression parameters to some specific values. - * In which case, these values are no longer dynamically selected by the compressor */ - ZSTD_c_windowLog=101, </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b> - * This will set a memory budget for streaming decompression, - * with larger values requiring more memory - * and typically compressing more. - * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * Special: value 0 means "use default windowLog". - * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such size at streaming decompression stage. */ - ZSTD_c_hashLog=102, </b>/* Size of the initial probe table, as a power of 2.<b> - * Resulting memory usage is (1 << (hashLog+2)). - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. - * Larger tables improve compression ratio of strategies <= dFast, - * and improve speed of strategies > dFast. - * Special: value 0 means "use default hashLog". */ - ZSTD_c_chainLog=103, </b>/* Size of the multi-probe search table, as a power of 2.<b> - * Resulting memory usage is (1 << (chainLog+2)). - * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. - * Larger tables result in better and slower compression. - * This parameter is useless for "fast" strategy. - * It's still useful when using "dfast" strategy, - * in which case it defines a secondary probe table. - * Special: value 0 means "use default chainLog". */ - ZSTD_c_searchLog=104, </b>/* Number of search attempts, as a power of 2.<b> - * More attempts result in better and slower compression. - * This parameter is useless for "fast" and "dFast" strategies. - * Special: value 0 means "use default searchLog". */ - ZSTD_c_minMatch=105, </b>/* Minimum size of searched matches.<b> - * Note that Zstandard can still find matches of smaller size, - * it just tweaks its search algorithm to look for this size and larger. - * Larger values increase compression and decompression speed, but decrease ratio. - * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. - * Note that currently, for all strategies < btopt, effective minimum is 4. - * , for all strategies > fast, effective maximum is 6. - * Special: value 0 means "use default minMatchLength". */ - ZSTD_c_targetLength=106, </b>/* Impact of this field depends on strategy.<b> - * For strategies btopt, btultra & btultra2: - * Length of Match considered "good enough" to stop search. - * Larger values make compression stronger, and slower. - * For strategy fast: - * Distance between match sampling. - * Larger values make compression faster, and weaker. - * Special: value 0 means "use default targetLength". */ - ZSTD_c_strategy=107, </b>/* See ZSTD_strategy enum definition.<b> - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ - - ZSTD_c_targetCBlockSize=130, </b>/* v1.5.6+<b> - * Attempts to fit compressed block size into approximately targetCBlockSize. - * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. - * Note that it's not a guarantee, just a convergence target (default:0). - * No target when targetCBlockSize == 0. - * This is helpful in low bandwidth streaming environments to improve end-to-end latency, - * when a client can make use of partial documents (a prominent example being Chrome). - * Note: this parameter is stable since v1.5.6. - * It was present as an experimental parameter in earlier versions, - * but it's not recommended using it with earlier library versions - * due to massive performance regressions. - */ - </b>/* LDM mode parameters */<b> - ZSTD_c_enableLongDistanceMatching=160, </b>/* Enable long distance matching.<b> - * This parameter is designed to improve compression ratio - * for large inputs, by finding large matches at long distance. - * It increases memory usage and window size. - * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB - * except when expressly set to a different value. - * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and - * compression strategy >= ZSTD_btopt (== compression level 16+) */ - ZSTD_c_ldmHashLog=161, </b>/* Size of the table for long distance matching, as a power of 2.<b> - * Larger values increase memory usage and compression ratio, - * but decrease compression speed. - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * default: windowlog - 7. - * Special: value 0 means "automatically determine hashlog". */ - ZSTD_c_ldmMinMatch=162, </b>/* Minimum match size for long distance matcher.<b> - * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. - * Special: value 0 means "use default value" (default: 64). */ - ZSTD_c_ldmBucketSizeLog=163, </b>/* Log size of each bucket in the LDM hash table for collision resolution.<b> - * Larger values improve collision resolution but decrease compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. - * Special: value 0 means "use default value" (default: 3). */ - ZSTD_c_ldmHashRateLog=164, </b>/* Frequency of inserting/looking up entries into the LDM hash table.<b> - * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). - * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. - * Larger values improve compression speed. - * Deviating far from default value will likely result in a compression ratio decrease. - * Special: value 0 means "automatically determine hashRateLog". */ - - </b>/* frame parameters */<b> - ZSTD_c_contentSizeFlag=200, </b>/* Content size will be written into frame header _whenever known_ (default:1)<b> - * Content size must be known at the beginning of compression. - * This is automatically the case when using ZSTD_compress2(), - * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ - ZSTD_c_checksumFlag=201, </b>/* A 32-bits checksum of content is written at end of frame (default:0) */<b> - ZSTD_c_dictIDFlag=202, </b>/* When applicable, dictionary's ID is written into frame header (default:1) */<b> - - </b>/* multi-threading parameters */<b> - </b>/* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).<b> - * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. - * In a situation where it's unknown if the linked library supports multi-threading or not, - * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. - */ - ZSTD_c_nbWorkers=400, </b>/* Select how many threads will be spawned to compress in parallel.<b> - * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : - * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, - * while compression is performed in parallel, within worker thread(s). - * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : - * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). - * More workers improve speed, but also increase memory usage. - * Default value is `0`, aka "single-threaded mode" : no worker is spawned, - * compression is performed inside Caller's thread, and all invocations are blocking */ - ZSTD_c_jobSize=401, </b>/* Size of a compression job. This value is enforced only when nbWorkers >= 1.<b> - * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. - * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. - * The minimum size is automatically and transparently enforced. */ - ZSTD_c_overlapLog=402, </b>/* Control the overlap size, as a fraction of window size.<b> - * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. - * It helps preserve compression ratio, while each job is compressed in parallel. - * This value is enforced only when nbWorkers >= 1. - * Larger values increase compression ratio, but decrease speed. - * Possible values range from 0 to 9 : - * - 0 means "default" : value will be determined by the library, depending on strategy - * - 1 means "no overlap" - * - 9 means "full overlap", using a full window size. - * Each intermediate rank increases/decreases load size by a factor 2 : - * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default - * default value varies between 6 and 9, depending on strategy */ - - </b>/* note : additional experimental parameters are also available<b> - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_rsyncable - * ZSTD_c_format - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode - * ZSTD_c_srcSizeHint - * ZSTD_c_enableDedicatedDictSearch - * ZSTD_c_stableInBuffer - * ZSTD_c_stableOutBuffer - * ZSTD_c_blockDelimiters - * ZSTD_c_validateSequences - * ZSTD_c_blockSplitterLevel - * ZSTD_c_splitAfterSequences - * ZSTD_c_useRowMatchFinder - * ZSTD_c_prefetchCDictTables - * ZSTD_c_enableSeqProducerFallback - * ZSTD_c_maxBlockSize - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. - */ - ZSTD_c_experimentalParam1=500, - ZSTD_c_experimentalParam2=10, - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, - </b>/* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */<b> - ZSTD_c_experimentalParam7=1004, - ZSTD_c_experimentalParam8=1005, - ZSTD_c_experimentalParam9=1006, - ZSTD_c_experimentalParam10=1007, - ZSTD_c_experimentalParam11=1008, - ZSTD_c_experimentalParam12=1009, - ZSTD_c_experimentalParam13=1010, - ZSTD_c_experimentalParam14=1011, - ZSTD_c_experimentalParam15=1012, - ZSTD_c_experimentalParam16=1013, - ZSTD_c_experimentalParam17=1014, - ZSTD_c_experimentalParam18=1015, - ZSTD_c_experimentalParam19=1016, - ZSTD_c_experimentalParam20=1017 -} ZSTD_cParameter; -</b></pre><BR> -<pre><b>typedef struct { - size_t error; - int lowerBound; - int upperBound; -} ZSTD_bounds; -</b></pre><BR> -<pre><b>ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); -</b><p> All parameters must belong to an interval with lower and upper bounds, - otherwise they will either trigger an error or be automatically clamped. - @return : a structure, ZSTD_bounds, which contains - - an error status field, which must be tested using ZSTD_isError() - - lower and upper bounds, both inclusive - -</p></pre><BR> - -<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); -</b><p> Set one compression parameter, selected by enum ZSTD_cParameter. - All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). - Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - Setting a parameter is generally only possible during frame initialization (before starting compression). - Exception : when using multi-threading mode (nbWorkers >= 1), - the following parameters can be updated _during_ compression (within same frame): - => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. - new parameters will be active for next job only (after a flush()). - @return : an error code (which can be tested using ZSTD_isError()). - -</p></pre><BR> - -<pre><b>size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); -</b><p> Total input data size to be compressed as a single frame. - Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. - This value will also be controlled at end of frame, and trigger an error if not respected. - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. - In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. - ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. - Note 2 : pledgedSrcSize is only valid once, for the next frame. - It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. - Note 3 : Whenever all input data is provided and consumed in a single round, - for example with ZSTD_compress2(), - or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), - this value is automatically overridden by srcSize instead. - -</p></pre><BR> - -<pre><b>typedef enum { - ZSTD_reset_session_only = 1, - ZSTD_reset_parameters = 2, - ZSTD_reset_session_and_parameters = 3 -} ZSTD_ResetDirective; -</b></pre><BR> -<pre><b>size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); -</b><p> There are 2 different things that can be reset, independently or jointly : - - The session : will stop compressing current frame, and make CCtx ready to start a new one. - Useful after an error, or to interrupt any ongoing compression. - Any internal data not yet flushed is cancelled. - Compression parameters and dictionary remain unchanged. - They will be used to compress next frame. - Resetting session never fails. - - The parameters : changes all parameters back to "default". - This also removes any reference to any dictionary or external sequence producer. - Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - - Both : similar to resetting the session, followed by resetting parameters. - -</p></pre><BR> - -<pre><b>size_t ZSTD_compress2( ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); -</b><p> Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. - (note that this entry point doesn't even expose a compression level parameter). - ZSTD_compress2() always starts a new frame. - Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - - The function is always blocking, returns when compression is completed. - NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have - enough space to successfully compress the data, though it is possible it fails for other reasons. - @return : compressed size written into `dst` (<= `dstCapacity), - or an error code if it fails (which can be tested using ZSTD_isError()). - -</p></pre><BR> - -<a name="Chapter6"></a><h2>Advanced decompression API (Requires v1.4.0+)</h2><pre></pre> - -<pre><b>typedef enum { - - ZSTD_d_windowLogMax=100, </b>/* Select a size limit (in power of 2) beyond which<b> - * the streaming API will refuse to allocate memory buffer - * in order to protect the host from unreasonable memory requirements. - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). - * Special: value 0 means "use default maximum windowLog". */ - - </b>/* note : additional experimental parameters are also available<b> - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_d_format - * ZSTD_d_stableOutBuffer - * ZSTD_d_forceIgnoreChecksum - * ZSTD_d_refMultipleDDicts - * ZSTD_d_disableHuffmanAssembly - * ZSTD_d_maxBlockSize - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001, - ZSTD_d_experimentalParam3=1002, - ZSTD_d_experimentalParam4=1003, - ZSTD_d_experimentalParam5=1004, - ZSTD_d_experimentalParam6=1005 - -} ZSTD_dParameter; -</b></pre><BR> -<pre><b>ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); -</b><p> All parameters must belong to an interval with lower and upper bounds, - otherwise they will either trigger an error or be automatically clamped. - @return : a structure, ZSTD_bounds, which contains - - an error status field, which must be tested using ZSTD_isError() - - both lower and upper bounds, inclusive - -</p></pre><BR> - -<pre><b>size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); -</b><p> Set one compression parameter, selected by enum ZSTD_dParameter. - All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). - Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - Setting a parameter is only possible during frame initialization (before starting decompression). - @return : 0, or an error code (which can be tested using ZSTD_isError()). - -</p></pre><BR> - -<pre><b>size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); -</b><p> Return a DCtx to clean state. - Session and parameters can be reset jointly or separately. - Parameters can only be reset when no active frame is being decompressed. - @return : 0, or an error code, which can be tested with ZSTD_isError() - -</p></pre><BR> - -<a name="Chapter7"></a><h2>Streaming</h2><pre></pre> - -<pre><b>typedef struct ZSTD_inBuffer_s { - const void* src; </b>/**< start of input buffer */<b> - size_t size; </b>/**< size of input buffer */<b> - size_t pos; </b>/**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */<b> -} ZSTD_inBuffer; -</b></pre><BR> -<pre><b>typedef struct ZSTD_outBuffer_s { - void* dst; </b>/**< start of output buffer */<b> - size_t size; </b>/**< size of output buffer */<b> - size_t pos; </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b> -} ZSTD_outBuffer; -</b></pre><BR> -<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre> - A ZSTD_CStream object is required to track streaming operation. - Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. - ZSTD_CStream objects can be reused multiple times on consecutive compression operations. - It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. - - For parallel execution, use one separate ZSTD_CStream per thread. - - note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. - - Parameters are sticky : when starting a new compression on the same context, - it will reuse the same sticky parameters as previous compression session. - When in doubt, it's recommended to fully initialize the context before usage. - Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), - ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to - set more specific parameters, the pledged source size, or load a dictionary. - - Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to - consume input stream. The function will automatically update both `pos` - fields within `input` and `output`. - Note that the function may not consume the entire input, for example, because - the output buffer is already full, in which case `input.pos < input.size`. - The caller must check if input has been entirely consumed. - If not, the caller must make some room to receive more compressed data, - and then present again remaining input data. - note: ZSTD_e_continue is guaranteed to make some forward progress when called, - but doesn't guarantee maximal forward progress. This is especially relevant - when compressing with multiple threads. The call won't block if it can - consume some input, but if it can't it will wait for some, but not all, - output to be flushed. - @return : provides a minimum amount of data remaining to be flushed from internal buffers - or an error code, which can be tested using ZSTD_isError(). - - At any moment, it's possible to flush whatever data might remain stuck within internal buffer, - using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. - Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). - In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. - You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the - operation. - note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will - block until the flush is complete or the output buffer is full. - @return : 0 if internal buffers are entirely flushed, - >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), - or an error code, which can be tested using ZSTD_isError(). - - Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. - It will perform a flush and write frame epilogue. - The epilogue is required for decoders to consider a frame completed. - flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. - You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to - start a new frame. - note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will - block until the flush is complete or the output buffer is full. - @return : 0 if frame fully completed and fully flushed, - >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), - or an error code, which can be tested using ZSTD_isError(). - - -<BR></pre> - -<pre><b>typedef ZSTD_CCtx ZSTD_CStream; </b>/**< CCtx and CStream are now effectively same object (>= v1.3.0) */<b> -</b></pre><BR> -<h3>ZSTD_CStream management functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream(void); -size_t ZSTD_freeCStream(ZSTD_CStream* zcs); </b>/* accept NULL pointer */<b> -</pre></b><BR> -<h3>Streaming compression functions</h3><pre></pre><b><pre>typedef enum { - ZSTD_e_continue=0, </b>/* collect more data, encoder decides when to output compressed result, for optimal compression ratio */<b> - ZSTD_e_flush=1, </b>/* flush any data provided so far,<b> - * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. - * note : multithreaded compression will block to flush as much output as possible. */ - ZSTD_e_end=2 </b>/* flush any remaining data _and_ close current frame.<b> - * note that frame is only closed after compressed data is fully flushed (return value == 0). - * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). - : note : multithreaded compression will block to flush as much output as possible. */ -} ZSTD_EndDirective; -</pre></b><BR> -<pre><b>size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); -</b><p> Behaves about the same as ZSTD_compressStream, with additional control on end directive. - - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) - - output->pos must be <= dstCapacity, input->pos must be <= srcSize - - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - - endOp must be a valid directive - - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. - - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, - and then immediately returns, just indicating that there is some data remaining to be flushed. - The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. - - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. - - @return provides a minimum amount of data remaining to be flushed from internal buffers - or an error code, which can be tested using ZSTD_isError(). - if @return != 0, flush is not fully completed, there is still some data left within internal buffers. - This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. - For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. - - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), - only ZSTD_e_end or ZSTD_e_flush operations are allowed. - Before starting a new compression job, or changing compression parameters, - it is required to fully flush internal buffers. - - note: if an operation ends with an error, it may leave @cctx in an undefined state. - Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. - In order to be re-employed after an error, a state must be reset, - which can be done explicitly (ZSTD_CCtx_reset()), - or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) - -</p></pre><BR> - -<pre><b>size_t ZSTD_CStreamInSize(void); </b>/**< recommended size for input buffer */<b> -</b></pre><BR> -<pre><b>size_t ZSTD_CStreamOutSize(void); </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */<b> -</b></pre><BR> -<pre><b>size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -</b>/*!<b> - * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). - * NOTE: The return value is different. ZSTD_compressStream() returns a hint for - * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the minimum nb of bytes left to flush (if non-zero and not an error). - */ -size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */<b> -size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */<b> -size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -</b><p> - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - - Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API - to compress with a dictionary. - -</p></pre><BR> - -<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre> - A ZSTD_DStream object is required to track streaming operations. - Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. - ZSTD_DStream objects can be re-employed multiple times. - - Use ZSTD_initDStream() to start a new decompression operation. - @return : recommended first input size - Alternatively, use advanced API to set specific properties. - - Use ZSTD_decompressStream() repetitively to consume your input. - The function will update both `pos` fields. - If `input.pos < input.size`, some input has not been consumed. - It's up to the caller to present again remaining data. - - The function tries to flush all data decoded immediately, respecting output buffer size. - If `output.pos < output.size`, decoder has flushed everything it could. - - However, when `output.pos == output.size`, it's more difficult to know. - If @return > 0, the frame is not complete, meaning - either there is still some data left to flush within internal buffers, - or there is more input to read to complete the frame (or both). - In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. - Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. - @return : 0 when a frame is completely decoded and fully flushed, - or an error code, which can be tested using ZSTD_isError(), - or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : - the return value is a suggested next input size (just a hint for better latency) - that will never request more than the remaining content of the compressed frame. - -<BR></pre> - -<pre><b>typedef ZSTD_DCtx ZSTD_DStream; </b>/**< DCtx and DStream are now effectively same object (>= v1.3.0) */<b> -</b></pre><BR> -<h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void); -size_t ZSTD_freeDStream(ZSTD_DStream* zds); </b>/* accept NULL pointer */<b> -</pre></b><BR> -<h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR> -<pre><b>size_t ZSTD_initDStream(ZSTD_DStream* zds); -</b><p> Initialize/reset DStream state for new decompression operation. - Call before new decompression operation using same DStream. - - Note : This function is redundant with the advanced API and equivalent to: - ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - ZSTD_DCtx_refDDict(zds, NULL); - -</p></pre><BR> - -<pre><b>size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -</b><p> Streaming decompression function. - Call repetitively to consume full input updating it as necessary. - Function will update both input and output `pos` fields exposing current state via these fields: - - `input.pos < input.size`, some input remaining and caller should provide remaining input - on the next call. - - `output.pos < output.size`, decoder flushed internal output buffer. - - `output.pos == output.size`, unflushed data potentially present in the internal buffers, - check ZSTD_decompressStream() @return value, - if > 0, invoke it again to flush remaining data to output. - Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. - - @return : 0 when a frame is completely decoded and fully flushed, - or an error code, which can be tested using ZSTD_isError(), - or any other value > 0, which means there is some decoding or flushing to do to complete current frame. - - Note: when an operation returns with an error code, the @zds state may be left in undefined state. - It's UB to invoke `ZSTD_decompressStream()` on such a state. - In order to re-use such a state, it must be first reset, - which can be done explicitly (`ZSTD_DCtx_reset()`), - or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) - -</p></pre><BR> - -<pre><b>size_t ZSTD_DStreamInSize(void); </b>/*!< recommended size for input buffer */<b> -</b></pre><BR> -<pre><b>size_t ZSTD_DStreamOutSize(void); </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b> -</b></pre><BR> -<a name="Chapter10"></a><h2>Simple dictionary API</h2><pre></pre> - -<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); -</b><p> Compression at an explicit compression level using a Dictionary. - A dictionary can be any arbitrary data segment (also called a prefix), - or a buffer with specified information (see zdict.h). - Note : This function loads the dictionary, resulting in significant startup delay. - It's intended for a dictionary used only once. - Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. -</p></pre><BR> - -<pre><b>size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); -</b><p> Decompression using a known Dictionary. - Dictionary must be identical to the one used during compression. - Note : This function loads the dictionary, resulting in significant startup delay. - It's intended for a dictionary used only once. - Note : When `dict == NULL || dictSize < 8` no dictionary is used. -</p></pre><BR> - -<a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre> - -<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); -</b><p> When compressing multiple messages or blocks using the same dictionary, - it's recommended to digest the dictionary only once, since it's a costly operation. - ZSTD_createCDict() will create a state from digesting a dictionary. - The resulting state can be used for future compression operations with very limited startup cost. - ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. - Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. - Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, - in which case the only thing that it transports is the @compressionLevel. - This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, - expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. -</p></pre><BR> - -<pre><b>size_t ZSTD_freeCDict(ZSTD_CDict* CDict); -</b><p> Function frees memory allocated by ZSTD_createCDict(). - If a NULL pointer is passed, no operation is performed. -</p></pre><BR> - -<pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); -</b><p> Compression using a digested Dictionary. - Recommended when same dictionary is used multiple times. - Note : compression level is _decided at dictionary creation time_, - and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) -</p></pre><BR> - -<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); -</b><p> Create a digested dictionary, ready to start decompression operation without startup delay. - dictBuffer can be released after DDict creation, as its content is copied inside DDict. -</p></pre><BR> - -<pre><b>size_t ZSTD_freeDDict(ZSTD_DDict* ddict); -</b><p> Function frees memory allocated with ZSTD_createDDict() - If a NULL pointer is passed, no operation is performed. -</p></pre><BR> - -<pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); -</b><p> Decompression using a digested Dictionary. - Recommended when same dictionary is used multiple times. -</p></pre><BR> - -<a name="Chapter12"></a><h2>Dictionary helper functions</h2><pre></pre> - -<pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); -</b><p> Provides the dictID stored within dictionary. - if @return == 0, the dictionary is not conformant with Zstandard specification. - It can still be loaded, but as a content-only dictionary. -</p></pre><BR> - -<pre><b>unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); -</b><p> Provides the dictID of the dictionary loaded into `cdict`. - If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - Non-conformant dictionaries can still be loaded, but as content-only dictionaries. -</p></pre><BR> - -<pre><b>unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); -</b><p> Provides the dictID of the dictionary loaded into `ddict`. - If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - Non-conformant dictionaries can still be loaded, but as content-only dictionaries. -</p></pre><BR> - -<pre><b>unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); -</b><p> Provides the dictID required to decompressed the frame stored within `src`. - If @return == 0, the dictID could not be decoded. - This could for one of the following reasons : - - The frame does not require a dictionary to be decoded (most common case). - - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information. - Note : this use case also happens when using a non-conformant dictionary. - - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - - This is not a Zstandard frame. - When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. -</p></pre><BR> - -<a name="Chapter13"></a><h2>Advanced dictionary and prefix API (Requires v1.4.0+)</h2><pre> - This API allows dictionaries to be used with ZSTD_compress2(), - ZSTD_compressStream2(), and ZSTD_decompressDCtx(). - Dictionaries are sticky, they remain valid when same context is reused, - they only reset when the context is reset - with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. - In contrast, Prefixes are single-use. -<BR></pre> - -<pre><b>size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -</b><p> Create an internal CDict from `dict` buffer. - Decompression will have to use same dictionary. - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - meaning "return to no-dictionary mode". - Note 1 : Dictionary is sticky, it will be used for all future compressed frames, - until parameters are reset, a new dictionary is loaded, or the dictionary - is explicitly invalidated by loading a NULL dictionary. - Note 2 : Loading a dictionary involves building tables. - It's also a CPU consuming operation, with non-negligible impact on latency. - Tables are dependent on compression parameters, and for this reason, - compression parameters can no longer be changed after loading a dictionary. - Note 3 :`dict` content will be copied internally. - Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - In such a case, dictionary buffer must outlive its users. - Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - to precisely select how dictionary content must be interpreted. - Note 5 : This method does not benefit from LDM (long distance mode). - If you want to employ LDM on some large dictionary content, - prefer employing ZSTD_CCtx_refPrefix() described below. - -</p></pre><BR> - -<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -</b><p> Reference a prepared dictionary, to be used for all future compressed frames. - Note that compression parameters are enforced from within CDict, - and supersede any compression parameter previously set within CCtx. - The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. - The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. - The dictionary will remain valid for future compressed frames using same CCtx. - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special : Referencing a NULL CDict means "return to no-dictionary mode". - Note 1 : Currently, only one dictionary can be managed. - Referencing a new dictionary effectively "discards" any previous one. - Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. -</p></pre><BR> - -<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); -</b><p> Reference a prefix (single-usage dictionary) for next compressed frame. - A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). - Decompression will need same prefix to properly regenerate data. - Compressing with a prefix is similar in outcome as performing a diff and compressing it, - but performs much faster, especially during decompression (compression speed is tunable with compression level). - This method is compatible with LDM (long distance mode). - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - Note 1 : Prefix buffer is referenced. It **must** outlive compression. - Its content must remain unmodified during compression. - Note 2 : If the intention is to diff some large src data blob with some prior version of itself, - ensure that the window size is large enough to contain the entire source. - See ZSTD_c_windowLog. - Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. - It's a CPU consuming operation, with non-negligible impact on latency. - If there is a need to use the same prefix multiple times, consider loadDictionary instead. - Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). - Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. -</p></pre><BR> - -<pre><b>size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -</b><p> Create an internal DDict from dict buffer, to be used to decompress all future frames. - The dictionary remains valid for all future frames, until explicitly invalidated, or - a new dictionary is loaded. - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - meaning "return to no-dictionary mode". - Note 1 : Loading a dictionary involves building tables, - which has a non-negligible impact on CPU usage and latency. - It's recommended to "load once, use many times", to amortize the cost - Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. - Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. - Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of - how dictionary content is loaded and interpreted. - -</p></pre><BR> - -<pre><b>size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -</b><p> Reference a prepared dictionary, to be used to decompress next frames. - The dictionary remains active for decompression of future frames using same DCtx. - - If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function - will store the DDict references in a table, and the DDict used for decompression - will be determined at decompression time, as per the dict ID in the frame. - The memory for the table is allocated on the first call to refDDict, and can be - freed with ZSTD_freeDCtx(). - - If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary - will be managed, and referencing a dictionary effectively "discards" any previous one. - - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special: referencing a NULL DDict means "return to no-dictionary mode". - Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. - -</p></pre><BR> - -<pre><b>size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, - const void* prefix, size_t prefixSize); -</b><p> Reference a prefix (single-usage dictionary) to decompress next frame. - This is the reverse operation of ZSTD_CCtx_refPrefix(), - and must use the same prefix as the one used during compression. - Prefix is **only used once**. Reference is discarded at end of frame. - End of frame is reached when ZSTD_decompressStream() returns 0. - @result : 0, or an error code (which can be tested with ZSTD_isError()). - Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary - Note 2 : Prefix buffer is referenced. It **must** outlive decompression. - Prefix buffer must remain unmodified up to the end of frame, - reached when ZSTD_decompressStream() returns 0. - Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). - Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) - Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. - A full dictionary is more costly, as it requires building tables. - -</p></pre><BR> - -<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); -size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); -</b><p> These functions give the _current_ memory usage of selected object. - Note that object memory usage can evolve (increase or decrease) over time. -</p></pre><BR> - -<a name="Chapter14"></a><h2>experimental API (static linking only)</h2><pre> - The following symbols and constants - are not planned to join "stable API" status in the near future. - They can still change in future versions. - Some of them are planned to remain in the static_only section indefinitely. - Some of them might be removed in the future (especially when redundant with existing stable functions) - -<BR></pre> - -<pre><b>typedef struct { - unsigned int offset; </b>/* The offset of the match. (NOT the same as the offset code)<b> - * If offset == 0 and matchLength == 0, this sequence represents the last - * literals in the block of litLength size. - */ - - unsigned int litLength; </b>/* Literal length of the sequence. */<b> - unsigned int matchLength; </b>/* Match length of the sequence. */<b> - - </b>/* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.<b> - * In this case, we will treat the sequence as a marker for a block boundary. - */ - - unsigned int rep; </b>/* Represents which repeat offset is represented by the field 'offset'.<b> - * Ranges from [0, 3]. - * - * Repeat offsets are essentially previous offsets from previous sequences sorted in - * recency order. For more detail, see doc/zstd_compression_format.md - * - * If rep == 0, then 'offset' does not contain a repeat offset. - * If rep > 0: - * If litLength != 0: - * rep == 1 --> offset == repeat_offset_1 - * rep == 2 --> offset == repeat_offset_2 - * rep == 3 --> offset == repeat_offset_3 - * If litLength == 0: - * rep == 1 --> offset == repeat_offset_2 - * rep == 2 --> offset == repeat_offset_3 - * rep == 3 --> offset == repeat_offset_1 - 1 - * - * Note: This field is optional. ZSTD_generateSequences() will calculate the value of - * 'rep', but repeat offsets do not necessarily need to be calculated from an external - * sequence provider perspective. For example, ZSTD_compressSequences() does not - * use this 'rep' field at all (as of now). - */ -} ZSTD_Sequence; -</b></pre><BR> -<pre><b>typedef struct { - unsigned windowLog; </b>/**< largest match distance : larger == more compression, more memory needed during decompression */<b> - unsigned chainLog; </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b> - unsigned hashLog; </b>/**< dispatch table : larger == faster, more memory */<b> - unsigned searchLog; </b>/**< nb of searches : larger == more compression, slower */<b> - unsigned minMatch; </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b> - unsigned targetLength; </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b> - ZSTD_strategy strategy; </b>/**< see ZSTD_strategy definition above */<b> -} ZSTD_compressionParameters; -</b></pre><BR> -<pre><b>typedef struct { - int contentSizeFlag; </b>/**< 1: content size will be in frame header (when known) */<b> - int checksumFlag; </b>/**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */<b> - int noDictIDFlag; </b>/**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */<b> -} ZSTD_frameParameters; -</b></pre><BR> -<pre><b>typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; -</b></pre><BR> -<pre><b>typedef enum { - ZSTD_dct_auto = 0, </b>/* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */<b> - ZSTD_dct_rawContent = 1, </b>/* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */<b> - ZSTD_dct_fullDict = 2 </b>/* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */<b> -} ZSTD_dictContentType_e; -</b></pre><BR> -<pre><b>typedef enum { - ZSTD_dlm_byCopy = 0, </b>/**< Copy dictionary content internally */<b> - ZSTD_dlm_byRef = 1 </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b> -} ZSTD_dictLoadMethod_e; -</b></pre><BR> -<pre><b>typedef enum { - ZSTD_f_zstd1 = 0, </b>/* zstd frame format, specified in zstd_compression_format.md (default) */<b> - ZSTD_f_zstd1_magicless = 1 </b>/* Variant of zstd frame format, without initial 4-bytes magic number.<b> - * Useful to save 4 bytes per generated frame. - * Decoder cannot recognise automatically this format, requiring this instruction. */ -} ZSTD_format_e; -</b></pre><BR> -<pre><b>typedef enum { - </b>/* Note: this enum controls ZSTD_d_forceIgnoreChecksum */<b> - ZSTD_d_validateChecksum = 0, - ZSTD_d_ignoreChecksum = 1 -} ZSTD_forceIgnoreChecksum_e; -</b></pre><BR> -<pre><b>typedef enum { - </b>/* Note: this enum controls ZSTD_d_refMultipleDDicts */<b> - ZSTD_rmd_refSingleDDict = 0, - ZSTD_rmd_refMultipleDDicts = 1 -} ZSTD_refMultipleDDicts_e; -</b></pre><BR> -<pre><b>typedef enum { - </b>/* Note: this enum and the behavior it controls are effectively internal<b> - * implementation details of the compressor. They are expected to continue - * to evolve and should be considered only in the context of extremely - * advanced performance tuning. - * - * Zstd currently supports the use of a CDict in three ways: - * - * - The contents of the CDict can be copied into the working context. This - * means that the compression can search both the dictionary and input - * while operating on a single set of internal tables. This makes - * the compression faster per-byte of input. However, the initial copy of - * the CDict's tables incurs a fixed cost at the beginning of the - * compression. For small compressions (< 8 KB), that copy can dominate - * the cost of the compression. - * - * - The CDict's tables can be used in-place. In this model, compression is - * slower per input byte, because the compressor has to search two sets of - * tables. However, this model incurs no start-up cost (as long as the - * working context's tables can be reused). For small inputs, this can be - * faster than copying the CDict's tables. - * - * - The CDict's tables are not used at all, and instead we use the working - * context alone to reload the dictionary and use params based on the source - * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). - * This method is effective when the dictionary sizes are very small relative - * to the input size, and the input size is fairly large to begin with. - * - * Zstd has a simple internal heuristic that selects which strategy to use - * at the beginning of a compression. However, if experimentation shows that - * Zstd is making poor choices, it is possible to override that choice with - * this enum. - */ - ZSTD_dictDefaultAttach = 0, </b>/* Use the default heuristic. */<b> - ZSTD_dictForceAttach = 1, </b>/* Never copy the dictionary. */<b> - ZSTD_dictForceCopy = 2, </b>/* Always copy the dictionary. */<b> - ZSTD_dictForceLoad = 3 </b>/* Always reload the dictionary */<b> -} ZSTD_dictAttachPref_e; -</b></pre><BR> -<pre><b>typedef enum { - ZSTD_lcm_auto = 0, </b>/**< Automatically determine the compression mode based on the compression level.<b> - * Negative compression levels will be uncompressed, and positive compression - * levels will be compressed. */ - ZSTD_lcm_huffman = 1, </b>/**< Always attempt Huffman compression. Uncompressed literals will still be<b> - * emitted if Huffman compression is not profitable. */ - ZSTD_lcm_uncompressed = 2 </b>/**< Always emit uncompressed literals. */<b> -} ZSTD_literalCompressionMode_e; -</b></pre><BR> -<pre><b>typedef enum { - </b>/* Note: This enum controls features which are conditionally beneficial.<b> - * Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto), - * but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature. - */ - ZSTD_ps_auto = 0, </b>/* Let the library automatically determine whether the feature shall be enabled */<b> - ZSTD_ps_enable = 1, </b>/* Force-enable the feature */<b> - ZSTD_ps_disable = 2 </b>/* Do not use the feature */<b> -} ZSTD_ParamSwitch_e; -</b></pre><BR> -<a name="Chapter15"></a><h2>Frame header and size functions</h2><pre></pre> - -<pre><b>ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); -</b><p> `src` should point to the start of a series of ZSTD encoded and/or skippable frames - `srcSize` must be the _exact_ size of this series - (i.e. there should be a frame boundary at `src + srcSize`) - @return : - decompressed size of all data in all successive frames - - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN - - if an error occurred: ZSTD_CONTENTSIZE_ERROR - - note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. - When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - In which case, it's necessary to use streaming mode to decompress data. - note 2 : decompressed size is always present when compression is done with ZSTD_compress() - note 3 : decompressed size can be very large (64-bits value), - potentially larger than what local system can handle as a single memory segment. - In which case, it's necessary to use streaming mode to decompress data. - note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. - Always ensure result fits within application's authorized limits. - Each application can set its own limits. - note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to - read each contained frame header. This is fast as most of the data is skipped, - however it does mean that all frame data must be present and valid. -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); -</b><p> `src` should point to the start of a series of ZSTD encoded and/or skippable frames - `srcSize` must be the _exact_ size of this series - (i.e. there should be a frame boundary at `src + srcSize`) - @return : - upper-bound for the decompressed size of all data in all successive frames - - if an error occurred: ZSTD_CONTENTSIZE_ERROR - - note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. - note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. - in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. - note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: - upper-bound = # blocks * min(128 KB, Window_Size) - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -</b><p> srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. - @return : size of the Frame Header, - or an error code (if srcSize is too small) -</p></pre><BR> - -<pre><b>typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e; -</b></pre><BR> -<pre><b>typedef struct { - unsigned long long frameContentSize; </b>/* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */<b> - unsigned long long windowSize; </b>/* can be very large, up to <= frameContentSize */<b> - unsigned blockSizeMax; - ZSTD_FrameType_e frameType; </b>/* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */<b> - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; - unsigned _reserved1; - unsigned _reserved2; -} ZSTD_frameHeader; -</b></pre><BR> -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize); </b>/**< doesn't consume input */<b> -</b>/*! ZSTD_getFrameHeader_advanced() :<b> - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -</b><p> decode Frame Header, or requires larger `srcSize`. - @return : 0, `zfhPtr` is correctly filled, - >0, `srcSize` is too small, value is wanted `srcSize` amount, - or an error code, which can be tested using ZSTD_isError() -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize); -</b><p> Zstd supports in-place decompression, where the input and output buffers overlap. - In this case, the output buffer must be at least (Margin + Output_Size) bytes large, - and the input buffer must be at the end of the output buffer. - - _______________________ Output Buffer ________________________ - | | - | ____ Input Buffer ____| - | | | - v v v - |---------------------------------------|-----------|----------| - ^ ^ ^ - |___________________ Output_Size ___________________|_ Margin _| - - NOTE: See also ZSTD_DECOMPRESSION_MARGIN(). - NOTE: This applies only to single-pass decompression through ZSTD_decompress() or - ZSTD_decompressDCtx(). - NOTE: This function supports multi-frame input. - - @param src The compressed frame(s) - @param srcSize The size of the compressed frame(s) - @returns The decompression margin or an error that can be checked with ZSTD_isError(). - -</p></pre><BR> - -<pre><b>#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)( \ - ZSTD_FRAMEHEADERSIZE_MAX </b>/* Frame header */ + \<b> - 4 </b>/* checksum */ + \<b> - ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) </b>/* 3 bytes per block */ + \<b> - (blockSize) </b>/* One block of margin */ \<b> - )) -</b><p> Similar to ZSTD_decompressionMargin(), but instead of computing the margin from - the compressed frame, compute it from the original size and the blockSizeLog. - See ZSTD_decompressionMargin() for details. - - WARNING: This macro does not support multi-frame input, the input must be a single - zstd frame. If you need that support use the function, or implement it yourself. - - @param originalSize The original uncompressed size of the data. - @param blockSize The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX). - Unless you explicitly set the windowLog smaller than - ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX. - -</p></pre><BR> - -<pre><b>typedef enum { - ZSTD_sf_noBlockDelimiters = 0, </b>/* ZSTD_Sequence[] has no block delimiters, just sequences */<b> - ZSTD_sf_explicitBlockDelimiters = 1 </b>/* ZSTD_Sequence[] contains explicit block delimiters */<b> -} ZSTD_SequenceFormat_e; -</b></pre><BR> -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); -</b><p> `srcSize` : size of the input buffer - @return : upper-bound for the number of sequences that can be generated - from a buffer of srcSize bytes - - note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") -ZSTDLIB_STATIC_API size_t -ZSTD_generateSequences(ZSTD_CCtx* zc, - ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, - const void* src, size_t srcSize); -</b><p> WARNING: This function is meant for debugging and informational purposes ONLY! - Its implementation is flawed, and it will be deleted in a future version. - It is not guaranteed to succeed, as there are several cases where it will give - up and fail. You should NOT use this function in production code. - - This function is deprecated, and will be removed in a future version. - - Generate sequences using ZSTD_compress2(), given a source buffer. - - @param zc The compression context to be used for ZSTD_compress2(). Set any - compression parameters you need on this context. - @param outSeqs The output sequences buffer of size @p outSeqsSize - @param outSeqsCapacity The size of the output sequences buffer. - ZSTD_sequenceBound(srcSize) is an upper bound on the number - of sequences that can be generated. - @param src The source buffer to generate sequences from of size @p srcSize. - @param srcSize The size of the source buffer. - - Each block will end with a dummy sequence - with offset == 0, matchLength == 0, and litLength == length of last literals. - litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) - simply acts as a block delimiter. - - @returns The number of sequences generated, necessarily less than - ZSTD_sequenceBound(srcSize), or an error code that can be checked - with ZSTD_isError(). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); -</b><p> Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals - by merging them into the literals of the next sequence. - - As such, the final generated result has no explicit representation of block boundaries, - and the final last literals segment is not represented in the sequences. - - The output of this function can be fed into ZSTD_compressSequences() with CCtx - setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters - @return : number of sequences left after merging - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t -ZSTD_compressSequences(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize); -</b><p> Compress an array of ZSTD_Sequence, associated with @src buffer, into dst. - @src contains the entire input (not just the literals). - If @srcSize > sum(sequence.length), the remaining bytes are considered all literals - If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.). - The entire source is compressed into a single frame. - - The compression behavior changes based on cctx params. In particular: - If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain - no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on - the block size derived from the cctx, and sequences may be split. This is the default setting. - - If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain - valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. - - When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes - using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit - can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation. - By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10). - ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction. - - If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined - behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for - specifics regarding offset/matchlength requirements) and then bail out and return an error. - - In addition to the two adjustable experimental params, there are other important cctx params. - - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. - - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. - - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset - is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md - - Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused. - Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly, - and cannot emit an RLE block that disagrees with the repcode history. - @return : final compressed size, or a ZSTD error code. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t -ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t nbSequences, - const void* literals, size_t litSize, size_t litCapacity, - size_t decompressedSize); -</b><p> This is a variant of ZSTD_compressSequences() which, - instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), - aka all the literals, already extracted and laid out into a single continuous buffer. - This can be useful if the process generating the sequences also happens to generate the buffer of literals, - thus skipping an extraction + caching stage. - It's a speed optimization, useful when the right conditions are met, - but it also features the following limitations: - - Only supports explicit delimiter mode - - Currently does not support Sequences validation (so input Sequences are trusted) - - Not compatible with frame checksum, which must be disabled - - If any block is incompressible, will fail and return an error - - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. - - the buffer @literals must have a size @litCapacity which is larger than @litSize by at least 8 bytes. - - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error. - @return : final compressed size, or a ZSTD error code. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, - const void* src, size_t srcSize, unsigned magicVariant); -</b><p> Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. - - Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, - ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. - As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so - the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. - - Returns an error if destination buffer is not large enough, if the source size is not representable - with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). - - @return : number of bytes written or a ZSTD error. - -</p></pre><BR> - -<pre><b>size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, - const void* src, size_t srcSize); -</b><p> Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. - - The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, - i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested - in the magicVariant. - - Returns an error if destination buffer is not large enough, or if the frame is not skippable. - - @return : number of bytes written or a ZSTD error. - -</p></pre><BR> - -<pre><b>unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); -</b><p> Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. - -</p></pre><BR> - -<a name="Chapter16"></a><h2>Memory management</h2><pre></pre> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); -ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); -</b><p> These functions make it possible to estimate memory usage - of a future {D,C}Ctx, before its creation. - This is useful in combination with ZSTD_initStatic(), - which makes it possible to employ a static buffer for ZSTD_CCtx* state. - - ZSTD_estimateCCtxSize() will provide a memory budget large enough - to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() - associated with any compression level up to max specified one. - The estimate will assume the input may be arbitrarily large, - which is the worst case. - - Note that the size estimation is specific for one-shot compression, - it is not valid for streaming (see ZSTD_estimateCStreamSize*()) - nor other potential ways of using a ZSTD_CCtx* state. - - When srcSize can be bound by a known and rather "small" value, - this knowledge can be used to provide a tighter budget estimation - because the ZSTD_CCtx* state will need less memory for small inputs. - This tighter estimation can be provided by employing more advanced functions - ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), - and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). - Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. - - Note : only single-threaded compression is supported. - ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); -ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); -</b><p> ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression - using any compression level up to the max specified one. - It will also consider src size to be arbitrarily "large", which is a worst case scenario. - If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - Note : CStream size estimation is only correct for single-threaded compression. - ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. - Size estimates assume that no external sequence producer is registered. - - ZSTD_DStream memory budget depends on frame's window Size. - This information can be passed manually, using ZSTD_estimateDStreamSize, - or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); - Any frame requesting a window size larger than max specified one will be rejected. - Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - an internal ?Dict will be created, which additional size is not estimated here. - In this case, get total size by adding ZSTD_estimate?DictSize - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); -</b><p> ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). - ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). - Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); </b>/**< same as ZSTD_initStaticCCtx() */<b> -</b><p> Initialize an object using a pre-allocated fixed-size buffer. - workspace: The memory area to emplace the object into. - Provided pointer *must be 8-bytes aligned*. - Buffer must outlive object. - workspaceSize: Use ZSTD_estimate*Size() to determine - how large workspace must be to support target scenario. - @return : pointer to object (same address as workspace, just different type), - or NULL if error (size too small, incorrect alignment, etc.) - Note : zstd will never resize nor malloc() when using a static buffer. - If the object requires more memory than available, - zstd will just error out (typically ZSTD_error_memory_allocation). - Note 2 : there is no corresponding "free" function. - Since workspace is allocated externally, it must be freed externally too. - Note 3 : cParams : use ZSTD_getCParams() to convert a compression level - into its associated cParams. - Limitation 1 : currently not compatible with internal dictionary creation, triggered by - ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). - Limitation 2 : static cctx currently not compatible with multi-threading. - Limitation 3 : static dctx is incompatible with legacy support. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); </b>/**< same as ZSTD_initStaticDCtx() */<b> -</b></pre><BR> -<pre><b>typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); -typedef void (*ZSTD_freeFunction) (void* opaque, void* address); -typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -static -#ifdef __GNUC__ -__attribute__((__unused__)) -#endif -</b><p> These prototypes make it possible to pass your own allocation/free functions. - ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. - All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones. - -</p></pre><BR> - -<pre><b>ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< this constant defers to stdlib's functions */<b> -</b></pre><BR> -<pre><b>typedef struct POOL_ctx_s ZSTD_threadPool; -ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); -ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); </b>/* accept NULL pointer */<b> -ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); -</b><p> These prototypes make it possible to share a thread pool among multiple compression contexts. - This can limit resources for applications with multiple threads where each one uses - a threaded compression mode (via ZSTD_c_nbWorkers parameter). - ZSTD_createThreadPool creates a new thread pool with a given number of threads. - Note that the lifetime of such pool must exist while being used. - ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value - to use an internal thread pool). - ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. - -</p></pre><BR> - -<a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); -</b><p> Create a digested dictionary for compression - Dictionary content is just referenced, not duplicated. - As a consequence, `dictBuffer` **must** outlive CDict, - and its content must remain unmodified throughout the lifetime of CDict. - note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); -</b><p> @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - `estimatedSrcSize` value is optional, select 0 if not known -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); -</b><p> same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); -</b><p> Ensure param values remain within authorized range. - @return 0 on success, or an error code (can be checked with ZSTD_isError()) -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); -</b><p> optimize params for a given `srcSize` and `dictSize`. - `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. - `dictSize` must be `0` when there is no dictionary. - cPar can be invalid : all parameters will be clamped within valid range in the @return struct. - This function never fails (wide contract) -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); -</b><p> Set all parameters provided within @p cparams into the working @p cctx. - Note : if modifying parameters during compression (MT mode only), - note that changes to the .windowLog parameter will be ignored. - @return 0 on success, or an error code (can be checked with ZSTD_isError()). - On failure, no parameters are updated. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams); -</b><p> Set all parameters provided within @p fparams into the working @p cctx. - @return 0 on success, or an error code (can be checked with ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params); -</b><p> Set all parameters provided within @p params into the working @p cctx. - @return 0 on success, or an error code (can be checked with ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_compress2") -ZSTDLIB_STATIC_API -size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); -</b><p> Note : this function is now DEPRECATED. - It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - This prototype will generate compilation warnings. -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") -ZSTDLIB_STATIC_API -size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams); -</b><p> Note : this function is now DEPRECATED. - It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - This prototype will generate compilation warnings. -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -</b><p> Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - It saves some memory, but also requires that `dict` outlives its usage within `cctx` -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); -</b><p> Same as ZSTD_CCtx_loadDictionary(), but gives finer control over - how to load the dictionary (by copy ? by reference ?) - and how to interpret it (automatic ? force raw mode ? full mode only ?) -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); -</b><p> Same as ZSTD_CCtx_refPrefix(), but gives finer control over - how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); -</b><p> Get the requested compression parameter value, selected by enum ZSTD_cParameter, - and store it into int* value. - @return : 0, or an error code (which can be tested with ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); </b>/* accept NULL pointer */<b> -</b><p> Quick howto : - - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - - ZSTD_CCtxParams_setParameter() : Push parameters one by one into - an existing ZSTD_CCtx_params structure. - This is similar to - ZSTD_CCtx_setParameter(). - - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to - an existing CCtx. - These parameters will be applied to - all subsequent frames. - - ZSTD_compressStream2() : Do compression using the CCtx. - - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. - - This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() - for static allocation of CCtx for single-threaded compression. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); -</b><p> Reset params to default values. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); -</b><p> Initializes the compression parameters of cctxParams according to - compression level. All other parameters are reset to their default values. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -</b><p> Initializes the compression and frame parameters of cctxParams according to - params. All other parameters are reset to their default values. - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); -</b><p> Similar to ZSTD_CCtx_setParameter. - Set one compression parameter, selected by enum ZSTD_cParameter. - Parameters must be applied to a ZSTD_CCtx using - ZSTD_CCtx_setParametersUsingCCtxParams(). - @result : a code representing success or failure (which can be tested with - ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); -</b><p> Similar to ZSTD_CCtx_getParameter. - Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. - @result : 0, or an error code (which can be tested with ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); -</b><p> Apply a set of ZSTD_CCtx_params to the compression context. - This can be done even after compression is started, - if nbWorkers==0, this will have no impact until a new compression is started. - if nbWorkers>=1, new parameters will be picked up at next job, - with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp); -</b><p> Same as ZSTD_compressStream2(), - but using only integral types as arguments. - This variant might be helpful for binders from dynamic languages - which have troubles handling structures containing memory pointers. - -</p></pre><BR> - -<a name="Chapter18"></a><h2>Advanced decompression functions</h2><pre></pre> - -<pre><b>ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size); -</b><p> Tells if the content of `buffer` starts with a valid Frame Identifier. - Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - Note 3 : Skippable Frame Identifiers are considered valid. -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); -</b><p> Create a digested dictionary, ready to start decompression operation without startup delay. - Dictionary content is referenced, and therefore stays in dictBuffer. - It is important that dictBuffer outlives DDict, - it must remain read accessible throughout the lifetime of DDict -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -</b><p> Same as ZSTD_DCtx_loadDictionary(), - but references `dict` content instead of copying it into `dctx`. - This saves memory if `dict` remains around., - However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); -</b><p> Same as ZSTD_DCtx_loadDictionary(), - but gives direct control over - how to load the dictionary (by copy ? by reference ?) - and how to interpret it (automatic ? force raw mode ? full mode only ?). -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); -</b><p> Same as ZSTD_DCtx_refPrefix(), but gives finer control over - how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); -</b><p> Refuses allocating internal buffers for frames requiring a window size larger than provided limit. - This protects a decoder context from reserving too much memory for itself (potential attack scenario). - This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) - @return : 0, or an error code (which can be tested using ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); -</b><p> Get the requested decompression parameter value, selected by enum ZSTD_dParameter, - and store it into int* value. - @return : 0, or an error code (which can be tested with ZSTD_isError()). - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") -ZSTDLIB_STATIC_API -size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); -</b><p> This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). - Instruct the decoder context about what kind of data to decode next. - This instruction is mandatory to decode data without a fully-formed header, - such ZSTD_f_zstd1_magicless for example. - @return : 0, or an error code (which can be tested using ZSTD_isError()). -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos); -</b><p> Same as ZSTD_decompressStream(), - but using only integral types as arguments. - This can be helpful for binders from dynamic languages - which have troubles handling structures containing memory pointers. - -</p></pre><BR> - -<a name="Chapter19"></a><h2>Advanced streaming functions</h2><pre> Warning : most of these functions are now redundant with the Advanced API. - Once Advanced API reaches "stable" status, - redundant functions will be deprecated, and then at some point removed. -<BR></pre> - -<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre></pre></b><BR> -<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); -</b><p> This function is DEPRECATED, and equivalent to: - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - - pledgedSrcSize must be correct. If it is not known at init time, use - ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, - "0" also disables frame content size field. It may be enabled in the future. - This prototype will generate compilation warnings. - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API -size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); -</b><p> This function is DEPRECATED, and is equivalent to: - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - - Creates of an internal CDict (incompatible with static CCtx), except if - dict == NULL or dictSize < 8, in which case no dict is used. - Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if - it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - This prototype will generate compilation warnings. - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API -size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, - unsigned long long pledgedSrcSize); -</b><p> This function is DEPRECATED, and is equivalent to: - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_setParams(zcs, params); - ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - - dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. - pledgedSrcSize must be correct. - If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - This prototype will generate compilation warnings. - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API -size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); -</b><p> This function is DEPRECATED, and equivalent to: - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_refCDict(zcs, cdict); - - note : cdict will just be referenced, and must outlive compression session - This prototype will generate compilation warnings. - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API -size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize); -</b><p> This function is DEPRECATED, and is equivalent to: - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_setFParams(zcs, fParams); - ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - ZSTD_CCtx_refCDict(zcs, cdict); - - same as ZSTD_initCStream_usingCDict(), with control over frame parameters. - pledgedSrcSize must be correct. If srcSize is not known at init time, use - value ZSTD_CONTENTSIZE_UNKNOWN. - This prototype will generate compilation warnings. - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); -</b><p> This function is DEPRECATED, and is equivalent to: - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but - ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be - explicitly specified. - - start a new frame, using same parameters from previous frame. - This is typically useful to skip dictionary loading stage, since it will reuse it in-place. - Note that zcs must be init at least once before using ZSTD_resetCStream(). - If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. - For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, - but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. - @return : 0, or an error code (which can be tested using ZSTD_isError()) - This prototype will generate compilation warnings. - -</p></pre><BR> - -<pre><b>typedef struct { - unsigned long long ingested; </b>/* nb input bytes read and buffered */<b> - unsigned long long consumed; </b>/* nb input bytes actually compressed */<b> - unsigned long long produced; </b>/* nb of compressed bytes generated and buffered */<b> - unsigned long long flushed; </b>/* nb of compressed bytes flushed : not provided; can be tracked from caller side */<b> - unsigned currentJobID; </b>/* MT only : latest started job nb */<b> - unsigned nbActiveWorkers; </b>/* MT only : nb of workers actively compressing at probe time */<b> -} ZSTD_frameProgression; -</b></pre><BR> -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); -</b><p> Tell how many bytes are ready to be flushed immediately. - Useful for multithreading scenarios (nbWorkers >= 1). - Probe the oldest active job, defined as oldest job not yet entirely flushed, - and check its output buffer. - @return : amount of data stored in oldest job and ready to be flushed immediately. - if @return == 0, it means either : - + there is no active job (could be checked with ZSTD_frameProgression()), or - + oldest job is still actively compressing data, - but everything it has produced has also been flushed so far, - therefore flush speed is limited by production speed of oldest job - irrespective of the speed of concurrent (and newer) jobs. - -</p></pre><BR> - -<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR> -<pre><b>ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); -</b><p> - ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - - note: no dictionary will be used if dict == NULL or dictSize < 8 - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); -</b><p> - ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - ZSTD_DCtx_refDDict(zds, ddict); - - note : ddict is referenced, it must outlive decompression session - -</p></pre><BR> - -<pre><b>ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") -ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); -</b><p> - ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - - reuse decompression parameters from previous init; saves dictionary loading - -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API void -ZSTD_registerSequenceProducer( - ZSTD_CCtx* cctx, - void* sequenceProducerState, - ZSTD_sequenceProducer_F sequenceProducer -); -</b><p> Instruct zstd to use a block-level external sequence producer function. - - The sequenceProducerState must be initialized by the caller, and the caller is - responsible for managing its lifetime. This parameter is sticky across - compressions. It will remain set until the user explicitly resets compression - parameters. - - Sequence producer registration is considered to be an "advanced parameter", - part of the "advanced API". This means it will only have an effect on compression - APIs which respect advanced parameters, such as compress2() and compressStream2(). - Older compression APIs such as compressCCtx(), which predate the introduction of - "advanced parameters", will ignore any external sequence producer setting. - - The sequence producer can be "cleared" by registering a NULL function pointer. This - removes all limitations described above in the "LIMITATIONS" section of the API docs. - - The user is strongly encouraged to read the full API documentation (above) before - calling this function. -</p></pre><BR> - -<pre><b>ZSTDLIB_STATIC_API void -ZSTD_CCtxParams_registerSequenceProducer( - ZSTD_CCtx_params* params, - void* sequenceProducerState, - ZSTD_sequenceProducer_F sequenceProducer -); -</b><p> Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. - This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), - which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). - - If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() - is required, then this function is for you. Otherwise, you probably don't need it. - - See tests/zstreamtest.c for example usage. -</p></pre><BR> - -<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions (DEPRECATED)</h2><pre> - This API is deprecated, and will be removed in a future version. - It allows streaming (de)compression with user allocated buffers. - However, it is hard to use, and not as well tested as the rest of - our API. - - Please use the normal streaming API instead: ZSTD_compressStream2, - and ZSTD_decompressStream. - If there is functionality that you need, but it doesn't provide, - please open an issue on our GitHub. - -<BR></pre> - -<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre> - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be reused multiple times within successive compression operations. - - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. - - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. - - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - - `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again. -<BR></pre> - -<h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b> -</pre></b><BR> -<pre><b>size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b> -</b></pre><BR> -<pre><b>size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */<b> -</b></pre><BR> -<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre> - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be reused multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - - It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame, - such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). - Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. - As a consequence, check that values remain within valid application range. - For example, do not allocate memory blindly, check that `windowSize` is within expectation. - Each application can set its own limits, depending on local restrictions. - For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. - - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference distance. - There are multiple ways to guarantee this condition. - - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. - At which point, decoding can resume from the beginning of the buffer. - Note that already decoded data stored in the buffer should be flushed before being overwritten. - - There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. - - Finally, if you control the compression process, you can also ignore all buffer size rules, - as long as the encoder and decoder progress in "lock-step", - aka use exactly the same buffer sizes, break contiguity at the same place, etc. - - Once buffers are setup, start decompression, with ZSTD_decompressBegin(). - If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). - - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - - result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. - - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. - - == Special case : skippable frames - - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by decompressor. - The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. - For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. -<BR></pre> - -<h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR> -<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b> -</b></pre><BR> -<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -</b></pre><BR> -<a name="Chapter23"></a><h2>Block level API (DEPRECATED)</h2><pre></pre> - -<pre><b></b><p> You can get the frame header down to 2 bytes by setting: - - ZSTD_c_format = ZSTD_f_zstd1_magicless - - ZSTD_c_contentSizeFlag = 0 - - ZSTD_c_checksumFlag = 0 - - ZSTD_c_dictIDFlag = 0 - - This API is not as well tested as our normal API, so we recommend not using it. - We will be removing it in a future version. If the normal API doesn't provide - the functionality you need, please open a GitHub issue. - - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. - - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! - ===> In which case, nothing is produced into `dst` ! - + User __must__ test for such outcome and deal directly with uncompressed data - + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. - Doing so would mess up with statistics history, leading to potential data corruption. - + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! - + In case of multiple successive blocks, should some of them be uncompressed, - decoder must be informed of their existence in order to follow proper history. - Use ZSTD_insertBlock() for such a case. -</p></pre><BR> - -<h3>Raw zstd block functions</h3><pre></pre><b><pre>ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") -ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); </b>/**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */<b> -</pre></b><BR> -</html> -</body> diff --git a/examples/Makefile b/examples/Makefile deleted file mode 100644 index 31f52d35763f..000000000000 --- a/examples/Makefile +++ /dev/null @@ -1,93 +0,0 @@ -# ################################################################ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# You may select, at your option, one of the above-listed licenses. -# ################################################################ - -LIBDIR =../lib -CPPFLAGS += -I$(LIBDIR) -LIB = $(LIBDIR)/libzstd.a - - -.PHONY: default -default: all - -.PHONY: all -all: simple_compression simple_decompression \ - multiple_simple_compression\ - dictionary_compression dictionary_decompression \ - streaming_compression streaming_decompression \ - multiple_streaming_compression streaming_memory_usage - -$(LIB) : - $(MAKE) -C $(LIBDIR) libzstd.a - -simple_compression.o: common.h -simple_compression : $(LIB) - -simple_decompression.o: common.h -simple_decompression : $(LIB) - -multiple_simple_compression.o: common.h -multiple_simple_compression : $(LIB) - -dictionary_compression.o: common.h -dictionary_compression : $(LIB) - -dictionary_decompression.o: common.h -dictionary_decompression : $(LIB) - -streaming_compression.o: common.h -streaming_compression : $(LIB) - -multiple_streaming_compression.o: common.h -multiple_streaming_compression : $(LIB) - -streaming_decompression.o: common.h -streaming_decompression : $(LIB) - -streaming_memory_usage.o: common.h -streaming_memory_usage : $(LIB) - - -.PHONY:clean -clean: - @$(RM) core *.o tmp* result* *.zst \ - simple_compression simple_decompression \ - multiple_simple_compression \ - dictionary_compression dictionary_decompression \ - streaming_compression streaming_decompression \ - multiple_streaming_compression streaming_memory_usage - @echo Cleaning completed - -.PHONY:test -test: all - cp README.md tmp - cp Makefile tmp2 - @echo -- Simple compression tests - ./simple_compression tmp - ./simple_decompression tmp.zst - ./multiple_simple_compression *.c - ./streaming_decompression tmp.zst > /dev/null - @echo -- Streaming memory usage - ./streaming_memory_usage - @echo -- Streaming compression tests - ./streaming_compression tmp - ./streaming_decompression tmp.zst > /dev/null - @echo -- Edge cases detection - ! ./streaming_decompression tmp # invalid input, must fail - ! ./simple_decompression tmp # invalid input, must fail - touch tmpNull # create 0-size file - ./simple_compression tmpNull - ./simple_decompression tmpNull.zst # 0-size frame : must work - @echo -- Multiple streaming tests - ./multiple_streaming_compression *.c - @echo -- Dictionary compression tests - ./dictionary_compression tmp2 tmp README.md - ./dictionary_decompression tmp2.zst tmp.zst README.md - $(RM) tmp* *.zst - @echo tests completed diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 0bff7ac19a29..000000000000 --- a/examples/README.md +++ /dev/null @@ -1,46 +0,0 @@ -Zstandard library : usage examples -================================== - -- [Simple compression](simple_compression.c) : - Compress a single file. - Introduces usage of : `ZSTD_compress()` - -- [Simple decompression](simple_decompression.c) : - Decompress a single file. - Only compatible with simple compression. - Result remains in memory. - Introduces usage of : `ZSTD_decompress()` - -- [Multiple simple compression](multiple_simple_compression.c) : - Compress multiple files (in simple mode) in a single command line. - Demonstrates memory preservation technique that - minimizes malloc()/free() calls by re-using existing resources. - Introduces usage of : `ZSTD_compressCCtx()` - -- [Streaming memory usage](streaming_memory_usage.c) : - Provides amount of memory used by streaming context. - Introduces usage of : `ZSTD_sizeof_CStream()` - -- [Streaming compression](streaming_compression.c) : - Compress a single file. - Introduces usage of : `ZSTD_compressStream()` - -- [Multiple Streaming compression](multiple_streaming_compression.c) : - Compress multiple files (in streaming mode) in a single command line. - Introduces memory usage preservation technique, - reducing impact of malloc()/free() and memset() by re-using existing resources. - -- [Streaming decompression](streaming_decompression.c) : - Decompress a single file compressed by zstd. - Compatible with both simple and streaming compression. - Result is sent to stdout. - Introduces usage of : `ZSTD_decompressStream()` - -- [Dictionary compression](dictionary_compression.c) : - Compress multiple files using the same dictionary. - Introduces usage of : `ZSTD_createCDict()` and `ZSTD_compress_usingCDict()` - -- [Dictionary decompression](dictionary_decompression.c) : - Decompress multiple files using the same dictionary. - Result remains in memory. - Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()` diff --git a/examples/common.h b/examples/common.h deleted file mode 100644 index 4873e877a7af..000000000000 --- a/examples/common.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* - * This header file has common utility functions used in examples. - */ -#ifndef COMMON_H -#define COMMON_H - -#include <stdlib.h> // malloc, free, exit -#include <stdio.h> // fprintf, perror, fopen, etc. -#include <string.h> // strerror -#include <errno.h> // errno -#include <sys/stat.h> // stat -#include <zstd.h> - - -/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -#if defined(__GNUC__) -# define UNUSED_ATTR __attribute__((unused)) -#else -# define UNUSED_ATTR -#endif - -#define HEADER_FUNCTION static UNUSED_ATTR - - -/* - * Define the returned error code from utility functions. - */ -typedef enum { - ERROR_fsize = 1, - ERROR_fopen = 2, - ERROR_fclose = 3, - ERROR_fread = 4, - ERROR_fwrite = 5, - ERROR_loadFile = 6, - ERROR_saveFile = 7, - ERROR_malloc = 8, - ERROR_largeFile = 9, -} COMMON_ErrorCode; - -/*! CHECK - * Check that the condition holds. If it doesn't print a message and die. - */ -#define CHECK(cond, ...) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, \ - "%s:%d CHECK(%s) failed: ", \ - __FILE__, \ - __LINE__, \ - #cond); \ - fprintf(stderr, "" __VA_ARGS__); \ - fprintf(stderr, "\n"); \ - exit(1); \ - } \ - } while (0) - -/*! CHECK_ZSTD - * Check the zstd error code and die if an error occurred after printing a - * message. - */ -#define CHECK_ZSTD(fn) \ - do { \ - size_t const err = (fn); \ - CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err)); \ - } while (0) - -/*! fsize_orDie() : - * Get the size of a given file path. - * - * @return The size of a given file path. - */ -HEADER_FUNCTION size_t fsize_orDie(const char *filename) -{ - struct stat st; - if (stat(filename, &st) != 0) { - /* error */ - perror(filename); - exit(ERROR_fsize); - } - - off_t const fileSize = st.st_size; - size_t const size = (size_t)fileSize; - /* 1. fileSize should be non-negative, - * 2. if off_t -> size_t type conversion results in discrepancy, - * the file size is too large for type size_t. - */ - if ((fileSize < 0) || (fileSize != (off_t)size)) { - fprintf(stderr, "%s : filesize too large \n", filename); - exit(ERROR_largeFile); - } - return size; -} - -/*! fopen_orDie() : - * Open a file using given file path and open option. - * - * @return If successful this function will return a FILE pointer to an - * opened file otherwise it sends an error to stderr and exits. - */ -HEADER_FUNCTION FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(ERROR_fopen); -} - -/*! fclose_orDie() : - * Close an opened file using given FILE pointer. - */ -HEADER_FUNCTION void fclose_orDie(FILE* file) -{ - if (!fclose(file)) { return; }; - /* error */ - perror("fclose"); - exit(ERROR_fclose); -} - -/*! fread_orDie() : - * - * Read sizeToRead bytes from a given file, storing them at the - * location given by buffer. - * - * @return The number of bytes read. - */ -HEADER_FUNCTION size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(ERROR_fread); -} - -/*! fwrite_orDie() : - * - * Write sizeToWrite bytes to a file pointed to by file, obtaining - * them from a location given by buffer. - * - * Note: This function will send an error to stderr and exit if it - * cannot write data to the given file pointer. - * - * @return The number of bytes written. - */ -HEADER_FUNCTION size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(ERROR_fwrite); -} - -/*! malloc_orDie() : - * Allocate memory. - * - * @return If successful this function returns a pointer to allo- - * cated memory. If there is an error, this function will send that - * error to stderr and exit. - */ -HEADER_FUNCTION void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc"); - exit(ERROR_malloc); -} - -/*! loadFile_orDie() : - * load file into buffer (memory). - * - * Note: This function will send an error to stderr and exit if it - * cannot read data from the given file path. - * - * @return If successful this function will load file into buffer and - * return file size, otherwise it will printout an error to stderr and exit. - */ -HEADER_FUNCTION size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSize) -{ - size_t const fileSize = fsize_orDie(fileName); - CHECK(fileSize <= bufferSize, "File too large!"); - - FILE* const inFile = fopen_orDie(fileName, "rb"); - size_t const readSize = fread(buffer, 1, fileSize, inFile); - if (readSize != (size_t)fileSize) { - fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno)); - exit(ERROR_fread); - } - fclose(inFile); /* can't fail, read only */ - return fileSize; -} - -/*! mallocAndLoadFile_orDie() : - * allocate memory buffer and then load file into it. - * - * Note: This function will send an error to stderr and exit if memory allocation - * fails or it cannot read data from the given file path. - * - * @return If successful this function will return buffer and bufferSize(=fileSize), - * otherwise it will printout an error to stderr and exit. - */ -HEADER_FUNCTION void* mallocAndLoadFile_orDie(const char* fileName, size_t* bufferSize) -{ - size_t const fileSize = fsize_orDie(fileName); - *bufferSize = fileSize; - void* const buffer = malloc_orDie(*bufferSize); - loadFile_orDie(fileName, buffer, *bufferSize); - return buffer; -} - -/*! saveFile_orDie() : - * - * Save buffSize bytes to a given file path, obtaining them from a location pointed - * to by buff. - * - * Note: This function will send an error to stderr and exit if it - * cannot write to a given file. - */ -HEADER_FUNCTION void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize) -{ - FILE* const oFile = fopen_orDie(fileName, "wb"); - size_t const wSize = fwrite(buff, 1, buffSize, oFile); - if (wSize != (size_t)buffSize) { - fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno)); - exit(ERROR_fwrite); - } - if (fclose(oFile)) { - perror(fileName); - exit(ERROR_fclose); - } -} - -#endif diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c deleted file mode 100644 index 83edc1cad9b7..000000000000 --- a/examples/dictionary_compression.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -**/ - -/* This example deals with Dictionary compression, - * its counterpart is `examples/dictionary_decompression.c` . - * These examples presume that a dictionary already exists. - * The main method to create a dictionary is `zstd --train`, - * look at the CLI documentation for details. - * Another possible method is to employ dictionary training API, - * published in `lib/zdict.h` . -**/ - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <string.h> // memset, strcat -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -/* createDict() : -** `dictFileName` is supposed already created using `zstd --train` */ -static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel) -{ - size_t dictSize; - printf("loading dictionary %s \n", dictFileName); - void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize); - ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, cLevel); - CHECK(cdict != NULL, "ZSTD_createCDict() failed!"); - free(dictBuffer); - return cdict; -} - - -static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdict) -{ - size_t fSize; - void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize); - size_t const cBuffSize = ZSTD_compressBound(fSize); - void* const cBuff = malloc_orDie(cBuffSize); - - /* Compress using the dictionary. - * This function writes the dictionary id, and content size into the header. - * But, it doesn't use a checksum. You can control these options using the - * advanced API: ZSTD_CCtx_setParameter(), ZSTD_CCtx_refCDict(), - * and ZSTD_compress2(). - */ - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); - size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict); - CHECK_ZSTD(cSize); - - saveFile_orDie(oname, cBuff, cSize); - - /* success */ - printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname); - - ZSTD_freeCCtx(cctx); /* never fails */ - free(fBuff); - free(cBuff); -} - - -static char* createOutFilename_orDie(const char* filename) -{ - size_t const inL = strlen(filename); - size_t const outL = inL + 5; - void* outSpace = malloc_orDie(outL); - memset(outSpace, 0, outL); - strcat(outSpace, filename); - strcat(outSpace, ".zst"); - return (char*)outSpace; -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - int const cLevel = 3; - - if (argc<3) { - fprintf(stderr, "wrong arguments\n"); - fprintf(stderr, "usage:\n"); - fprintf(stderr, "%s [FILES] dictionary\n", exeName); - return 1; - } - - /* load dictionary only once */ - const char* const dictName = argv[argc-1]; - ZSTD_CDict* const dictPtr = createCDict_orDie(dictName, cLevel); - - int u; - for (u=1; u<argc-1; u++) { - const char* inFilename = argv[u]; - char* const outFilename = createOutFilename_orDie(inFilename); - compress(inFilename, outFilename, dictPtr); - free(outFilename); - } - - ZSTD_freeCDict(dictPtr); - printf("All %u files compressed. \n", argc-2); - return 0; -} diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c deleted file mode 100644 index e6c999964a24..000000000000 --- a/examples/dictionary_decompression.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -/* createDict() : - `dictFileName` is supposed to have been created using `zstd --train` */ -static ZSTD_DDict* createDict_orDie(const char* dictFileName) -{ - size_t dictSize; - printf("loading dictionary %s \n", dictFileName); - void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize); - ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictSize); - CHECK(ddict != NULL, "ZSTD_createDDict() failed!"); - free(dictBuffer); - return ddict; -} - -static void decompress(const char* fname, const ZSTD_DDict* ddict) -{ - size_t cSize; - void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize); - /* Read the content size from the frame header. For simplicity we require - * that it is always present. By default, zstd will write the content size - * in the header when it is known. If you can't guarantee that the frame - * content size is always written into the header, either use streaming - * decompression, or ZSTD_decompressBound(). - */ - unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize); - CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname); - CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname); - void* const rBuff = malloc_orDie((size_t)rSize); - - /* Check that the dictionary ID matches. - * If a non-zstd dictionary is used, then both will be zero. - * By default zstd always writes the dictionary ID into the frame. - * Zstd will check if there is a dictionary ID mismatch as well. - */ - unsigned const expectedDictID = ZSTD_getDictID_fromDDict(ddict); - unsigned const actualDictID = ZSTD_getDictID_fromFrame(cBuff, cSize); - CHECK(actualDictID == expectedDictID, - "DictID mismatch: expected %u got %u", - expectedDictID, - actualDictID); - - /* Decompress using the dictionary. - * If you need to control the decompression parameters, then use the - * advanced API: ZSTD_DCtx_setParameter(), ZSTD_DCtx_refDDict(), and - * ZSTD_decompressDCtx(). - */ - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - CHECK(dctx != NULL, "ZSTD_createDCtx() failed!"); - size_t const dSize = ZSTD_decompress_usingDDict(dctx, rBuff, rSize, cBuff, cSize, ddict); - CHECK_ZSTD(dSize); - /* When zstd knows the content size, it will error if it doesn't match. */ - CHECK(dSize == rSize, "Impossible because zstd will check this condition!"); - - /* success */ - printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize); - - ZSTD_freeDCtx(dctx); - free(rBuff); - free(cBuff); -} - - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc<3) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s [FILES] dictionary\n", exeName); - return 1; - } - - /* load dictionary only once */ - const char* const dictName = argv[argc-1]; - ZSTD_DDict* const dictPtr = createDict_orDie(dictName); - - int u; - for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr); - - ZSTD_freeDDict(dictPtr); - printf("All %u files correctly decoded (in memory) \n", argc-2); - return 0; -} diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c deleted file mode 100644 index bf77ca13317f..000000000000 --- a/examples/multiple_simple_compression.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <string.h> // memcpy, strlen -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -typedef struct { - void* fBuffer; - void* cBuffer; - size_t fBufferSize; - size_t cBufferSize; - ZSTD_CCtx* cctx; -} resources; - -/* - * allocate memory for buffers big enough to compress all files - * as well as memory for output file name (ofn) - */ -static resources createResources_orDie(int argc, const char** argv, char **ofn, size_t* ofnBufferLen) -{ - size_t maxFilenameLength=0; - size_t maxFileSize = 0; - - int argNb; - for (argNb = 1; argNb < argc; argNb++) { - const char* const filename = argv[argNb]; - size_t const filenameLength = strlen(filename); - size_t const fileSize = fsize_orDie(filename); - - if (filenameLength > maxFilenameLength) maxFilenameLength = filenameLength; - if (fileSize > maxFileSize) maxFileSize = fileSize; - } - - resources ress; - ress.fBufferSize = maxFileSize; - ress.cBufferSize = ZSTD_compressBound(maxFileSize); - - *ofnBufferLen = maxFilenameLength + 5; - *ofn = (char*)malloc_orDie(*ofnBufferLen); - ress.fBuffer = malloc_orDie(ress.fBufferSize); - ress.cBuffer = malloc_orDie(ress.cBufferSize); - ress.cctx = ZSTD_createCCtx(); - CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!"); - return ress; -} - -static void freeResources(resources ress, char *outFilename) -{ - free(ress.fBuffer); - free(ress.cBuffer); - ZSTD_freeCCtx(ress.cctx); /* never fails */ - free(outFilename); -} - -/* compress with pre-allocated context (ZSTD_CCtx) and input/output buffers*/ -static void compressFile_orDie(resources ress, const char* fname, const char* oname) -{ - size_t fSize = loadFile_orDie(fname, ress.fBuffer, ress.fBufferSize); - - /* Compress using the context. - * If you need more control over parameters, use the advanced API: - * ZSTD_CCtx_setParameter(), and ZSTD_compress2(). - */ - size_t const cSize = ZSTD_compressCCtx(ress.cctx, ress.cBuffer, ress.cBufferSize, ress.fBuffer, fSize, 1); - CHECK_ZSTD(cSize); - - saveFile_orDie(oname, ress.cBuffer, cSize); - - /* success */ - printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname); -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc<2) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE(s)\n", exeName); - return 1; - } - - /* memory allocation for outFilename and resources */ - char* outFilename; - size_t outFilenameBufferLen; - resources const ress = createResources_orDie(argc, argv, &outFilename, &outFilenameBufferLen); - - /* compress files with shared context, input and output buffers */ - int argNb; - for (argNb = 1; argNb < argc; argNb++) { - const char* const inFilename = argv[argNb]; - size_t const inFilenameLen = strlen(inFilename); - CHECK(inFilenameLen + 5 <= outFilenameBufferLen, "File name too long!"); - memcpy(outFilename, inFilename, inFilenameLen); - memcpy(outFilename+inFilenameLen, ".zst", 5); - compressFile_orDie(ress, inFilename, outFilename); - } - - /* free memory */ - freeResources(ress,outFilename); - - printf("compressed %i files \n", argc-1); - - return 0; -} diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c deleted file mode 100644 index b12ad03dce14..000000000000 --- a/examples/multiple_streaming_compression.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/* The objective of this example is to show of to compress multiple successive files -* while preserving memory management. -* All structures and buffers will be created only once, -* and shared across all compression operations */ - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <string.h> // memset, strcat -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -typedef struct { - void* buffIn; - void* buffOut; - size_t buffInSize; - size_t buffOutSize; - ZSTD_CCtx* cctx; -} resources; - -static resources createResources_orDie(int cLevel) -{ - resources ress; - ress.buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */ - ress.buffOutSize= ZSTD_CStreamOutSize(); /* can always flush a full block */ - ress.buffIn = malloc_orDie(ress.buffInSize); - ress.buffOut= malloc_orDie(ress.buffOutSize); - ress.cctx = ZSTD_createCCtx(); - CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!"); - - /* Set any compression parameters you want here. - * They will persist for every compression operation. - * Here we set the compression level, and enable the checksum. - */ - CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); - CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, 1) ); - return ress; -} - -static void freeResources(resources ress) -{ - ZSTD_freeCCtx(ress.cctx); - free(ress.buffIn); - free(ress.buffOut); -} - -static void compressFile_orDie(resources ress, const char* fname, const char* outName) -{ - // Open the input and output files. - FILE* const fin = fopen_orDie(fname, "rb"); - FILE* const fout = fopen_orDie(outName, "wb"); - - /* Reset the context to a clean state to start a new compression operation. - * The parameters are sticky, so we keep the compression level and extra - * parameters that we set in createResources_orDie(). - */ - CHECK_ZSTD( ZSTD_CCtx_reset(ress.cctx, ZSTD_reset_session_only) ); - - size_t const toRead = ress.buffInSize; - size_t read; - while ( (read = fread_orDie(ress.buffIn, toRead, fin)) ) { - /* This loop is the same as streaming_compression.c. - * See that file for detailed comments. - */ - int const lastChunk = (read < toRead); - ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; - - ZSTD_inBuffer input = { ress.buffIn, read, 0 }; - int finished; - do { - ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 }; - size_t const remaining = ZSTD_compressStream2(ress.cctx, &output, &input, mode); - CHECK_ZSTD(remaining); - fwrite_orDie(ress.buffOut, output.pos, fout); - finished = lastChunk ? (remaining == 0) : (input.pos == input.size); - } while (!finished); - CHECK(input.pos == input.size, - "Impossible: zstd only returns 0 when the input is completely consumed!"); - } - - fclose_orDie(fout); - fclose_orDie(fin); -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc<2) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE(s)\n", exeName); - return 1; - } - - int const cLevel = 7; - resources const ress = createResources_orDie(cLevel); - void* ofnBuffer = NULL; - size_t ofnbSize = 0; - - int argNb; - for (argNb = 1; argNb < argc; argNb++) { - const char* const ifn = argv[argNb]; - size_t const ifnSize = strlen(ifn); - size_t const ofnSize = ifnSize + 5; - if (ofnbSize <= ofnSize) { - ofnbSize = ofnSize + 16; - free(ofnBuffer); - ofnBuffer = malloc_orDie(ofnbSize); - } - memset(ofnBuffer, 0, ofnSize); - strcat(ofnBuffer, ifn); - strcat(ofnBuffer, ".zst"); - compressFile_orDie(ress, ifn, ofnBuffer); - } - - freeResources(ress); - free(ofnBuffer); - - printf("compressed %i files \n", argc-1); - - return 0; -} diff --git a/examples/simple_compression.c b/examples/simple_compression.c deleted file mode 100644 index 7c880725fc76..000000000000 --- a/examples/simple_compression.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <string.h> // strlen, strcat, memset -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -static void compress_orDie(const char* fname, const char* oname) -{ - size_t fSize; - void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize); - size_t const cBuffSize = ZSTD_compressBound(fSize); - void* const cBuff = malloc_orDie(cBuffSize); - - /* Compress. - * If you are doing many compressions, you may want to reuse the context. - * See the multiple_simple_compression.c example. - */ - size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1); - CHECK_ZSTD(cSize); - - saveFile_orDie(oname, cBuff, cSize); - - /* success */ - printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname); - - free(fBuff); - free(cBuff); -} - -static char* createOutFilename_orDie(const char* filename) -{ - size_t const inL = strlen(filename); - size_t const outL = inL + 5; - void* const outSpace = malloc_orDie(outL); - memset(outSpace, 0, outL); - strcat(outSpace, filename); - strcat(outSpace, ".zst"); - return (char*)outSpace; -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc!=2) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE\n", exeName); - return 1; - } - - const char* const inFilename = argv[1]; - - char* const outFilename = createOutFilename_orDie(inFilename); - compress_orDie(inFilename, outFilename); - free(outFilename); - return 0; -} diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c deleted file mode 100644 index f499156f64ee..000000000000 --- a/examples/simple_decompression.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -static void decompress(const char* fname) -{ - size_t cSize; - void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize); - /* Read the content size from the frame header. For simplicity we require - * that it is always present. By default, zstd will write the content size - * in the header when it is known. If you can't guarantee that the frame - * content size is always written into the header, either use streaming - * decompression, or ZSTD_decompressBound(). - */ - unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize); - CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname); - CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname); - - void* const rBuff = malloc_orDie((size_t)rSize); - - /* Decompress. - * If you are doing many decompressions, you may want to reuse the context - * and use ZSTD_decompressDCtx(). If you want to set advanced parameters, - * use ZSTD_DCtx_setParameter(). - */ - size_t const dSize = ZSTD_decompress(rBuff, rSize, cBuff, cSize); - CHECK_ZSTD(dSize); - /* When zstd knows the content size, it will error if it doesn't match. */ - CHECK(dSize == rSize, "Impossible because zstd will check this condition!"); - - /* success */ - printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize); - - free(rBuff); - free(cBuff); -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc!=2) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE\n", exeName); - return 1; - } - - decompress(argv[1]); - - printf("%s correctly decoded (in memory). \n", argv[1]); - - return 0; -} diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c deleted file mode 100644 index 063aa82a2946..000000000000 --- a/examples/streaming_compression.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <string.h> // memset, strcat, strlen -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -static void compressFile_orDie(const char* fname, const char* outName, int cLevel, - int nbThreads) -{ - fprintf (stderr, "Starting compression of %s with level %d, using %d threads\n", - fname, cLevel, nbThreads); - - /* Open the input and output files. */ - FILE* const fin = fopen_orDie(fname, "rb"); - FILE* const fout = fopen_orDie(outName, "wb"); - /* Create the input and output buffers. - * They may be any size, but we recommend using these functions to size them. - * Performance will only suffer significantly for very tiny buffers. - */ - size_t const buffInSize = ZSTD_CStreamInSize(); - void* const buffIn = malloc_orDie(buffInSize); - size_t const buffOutSize = ZSTD_CStreamOutSize(); - void* const buffOut = malloc_orDie(buffOutSize); - - /* Create the context. */ - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); - - /* Set any parameters you want. - * Here we set the compression level, and enable the checksum. - */ - CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) ); - CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); - if (nbThreads > 1) { - size_t const r = ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads); - if (ZSTD_isError(r)) { - fprintf (stderr, "Note: the linked libzstd library doesn't support multithreading. " - "Reverting to single-thread mode. \n"); - } - } - - /* This loop read from the input file, compresses that entire chunk, - * and writes all output produced to the output file. - */ - size_t const toRead = buffInSize; - for (;;) { - size_t read = fread_orDie(buffIn, toRead, fin); - /* Select the flush mode. - * If the read may not be finished (read == toRead) we use - * ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end. - * Zstd optimizes the case where the first flush mode is ZSTD_e_end, - * since it knows it is compressing the entire source in one pass. - */ - int const lastChunk = (read < toRead); - ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; - /* Set the input buffer to what we just read. - * We compress until the input buffer is empty, each time flushing the - * output. - */ - ZSTD_inBuffer input = { buffIn, read, 0 }; - int finished; - do { - /* Compress into the output buffer and write all of the output to - * the file so we can reuse the buffer next iteration. - */ - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode); - CHECK_ZSTD(remaining); - fwrite_orDie(buffOut, output.pos, fout); - /* If we're on the last chunk we're finished when zstd returns 0, - * which means its consumed all the input AND finished the frame. - * Otherwise, we're finished when we've consumed all the input. - */ - finished = lastChunk ? (remaining == 0) : (input.pos == input.size); - } while (!finished); - CHECK(input.pos == input.size, - "Impossible: zstd only returns 0 when the input is completely consumed!"); - - if (lastChunk) { - break; - } - } - - ZSTD_freeCCtx(cctx); - fclose_orDie(fout); - fclose_orDie(fin); - free(buffIn); - free(buffOut); -} - - -static char* createOutFilename_orDie(const char* filename) -{ - size_t const inL = strlen(filename); - size_t const outL = inL + 5; - void* const outSpace = malloc_orDie(outL); - memset(outSpace, 0, outL); - strcat(outSpace, filename); - strcat(outSpace, ".zst"); - return (char*)outSpace; -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc < 2) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE [LEVEL] [THREADS]\n", exeName); - return 1; - } - - int cLevel = 1; - int nbThreads = 1; - - if (argc >= 3) { - cLevel = atoi (argv[2]); - CHECK(cLevel != 0, "can't parse LEVEL!"); - } - - if (argc >= 4) { - nbThreads = atoi (argv[3]); - CHECK(nbThreads != 0, "can't parse THREADS!"); - } - - const char* const inFilename = argv[1]; - - char* const outFilename = createOutFilename_orDie(inFilename); - compressFile_orDie(inFilename, outFilename, cLevel, nbThreads); - - free(outFilename); /* not strictly required, since program execution stops there, - * but some static analyzer may complain otherwise */ - return 0; -} diff --git a/examples/streaming_compression_thread_pool.c b/examples/streaming_compression_thread_pool.c deleted file mode 100644 index a1a024129f2b..000000000000 --- a/examples/streaming_compression_thread_pool.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) Martin Liska, SUSE, Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#include <stdio.h> // printf -#include <stdlib.h> // free -#include <string.h> // memset, strcat, strlen -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() -#include <pthread.h> - -typedef struct compress_args -{ - const char *fname; - char *outName; - int cLevel; -#if defined(ZSTD_STATIC_LINKING_ONLY) - ZSTD_threadPool *pool; -#endif -} compress_args_t; - -static void *compressFile_orDie(void *data) -{ - const int nbThreads = 16; - - compress_args_t *args = (compress_args_t *)data; - fprintf (stderr, "Starting compression of %s with level %d, using %d threads\n", args->fname, args->cLevel, nbThreads); - /* Open the input and output files. */ - FILE* const fin = fopen_orDie(args->fname, "rb"); - FILE* const fout = fopen_orDie(args->outName, "wb"); - /* Create the input and output buffers. - * They may be any size, but we recommend using these functions to size them. - * Performance will only suffer significantly for very tiny buffers. - */ - size_t const buffInSize = ZSTD_CStreamInSize(); - void* const buffIn = malloc_orDie(buffInSize); - size_t const buffOutSize = ZSTD_CStreamOutSize(); - void* const buffOut = malloc_orDie(buffOutSize); - - /* Create the context. */ - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); - -#if defined(ZSTD_STATIC_LINKING_ONLY) - size_t r = ZSTD_CCtx_refThreadPool(cctx, args->pool); - CHECK(r == 0, "ZSTD_CCtx_refThreadPool failed!"); -#endif - - /* Set any parameters you want. - * Here we set the compression level, and enable the checksum. - */ - CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, args->cLevel) ); - CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); - ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads); - - /* This loop reads from the input file, compresses that entire chunk, - * and writes all output produced to the output file. - */ - size_t const toRead = buffInSize; - for (;;) { - size_t read = fread_orDie(buffIn, toRead, fin); - /* Select the flush mode. - * If the read may not be finished (read == toRead) we use - * ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end. - * Zstd optimizes the case where the first flush mode is ZSTD_e_end, - * since it knows it is compressing the entire source in one pass. - */ - int const lastChunk = (read < toRead); - ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; - /* Set the input buffer to what we just read. - * We compress until the input buffer is empty, each time flushing the - * output. - */ - ZSTD_inBuffer input = { buffIn, read, 0 }; - int finished; - do { - /* Compress into the output buffer and write all of the output to - * the file so we can reuse the buffer next iteration. - */ - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode); - CHECK_ZSTD(remaining); - fwrite_orDie(buffOut, output.pos, fout); - /* If we're on the last chunk we're finished when zstd returns 0, - * which means its consumed all the input AND finished the frame. - * Otherwise, we're finished when we've consumed all the input. - */ - finished = lastChunk ? (remaining == 0) : (input.pos == input.size); - } while (!finished); - CHECK(input.pos == input.size, - "Impossible: zstd only returns 0 when the input is completely consumed!"); - - if (lastChunk) { - break; - } - } - - fprintf (stderr, "Finishing compression of %s\n", args->outName); - - ZSTD_freeCCtx(cctx); - fclose_orDie(fout); - fclose_orDie(fin); - free(buffIn); - free(buffOut); - free(args->outName); - - return NULL; -} - - -static char* createOutFilename_orDie(const char* filename) -{ - size_t const inL = strlen(filename); - size_t const outL = inL + 5; - void* const outSpace = malloc_orDie(outL); - memset(outSpace, 0, outL); - strcat(outSpace, filename); - strcat(outSpace, ".zst"); - return (char*)outSpace; -} - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc<=3) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s POOL_SIZE LEVEL FILES\n", exeName); - return 1; - } - - int pool_size = atoi (argv[1]); - CHECK(pool_size != 0, "can't parse POOL_SIZE!"); - - int level = atoi (argv[2]); - CHECK(level != 0, "can't parse LEVEL!"); - - argc -= 3; - argv += 3; - -#if defined(ZSTD_STATIC_LINKING_ONLY) - ZSTD_threadPool *pool = ZSTD_createThreadPool (pool_size); - CHECK(pool != NULL, "ZSTD_createThreadPool() failed!"); - fprintf (stderr, "Using shared thread pool of size %d\n", pool_size); -#else - fprintf (stderr, "All threads use its own thread pool\n"); -#endif - - pthread_t *threads = malloc_orDie(argc * sizeof(pthread_t)); - compress_args_t *args = malloc_orDie(argc * sizeof(compress_args_t)); - - for (unsigned i = 0; i < argc; i++) - { - args[i].fname = argv[i]; - args[i].outName = createOutFilename_orDie(args[i].fname); - args[i].cLevel = level; -#if defined(ZSTD_STATIC_LINKING_ONLY) - args[i].pool = pool; -#endif - - pthread_create (&threads[i], NULL, compressFile_orDie, &args[i]); - } - - for (unsigned i = 0; i < argc; i++) - pthread_join (threads[i], NULL); - -#if defined(ZSTD_STATIC_LINKING_ONLY) - ZSTD_freeThreadPool (pool); -#endif - - return 0; -} diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c deleted file mode 100644 index 95fa11227739..000000000000 --- a/examples/streaming_decompression.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#include <stdio.h> // fprintf -#include <stdlib.h> // free -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - -static void decompressFile_orDie(const char* fname) -{ - FILE* const fin = fopen_orDie(fname, "rb"); - size_t const buffInSize = ZSTD_DStreamInSize(); - void* const buffIn = malloc_orDie(buffInSize); - FILE* const fout = stdout; - size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ - void* const buffOut = malloc_orDie(buffOutSize); - - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - CHECK(dctx != NULL, "ZSTD_createDCtx() failed!"); - - /* This loop assumes that the input file is one or more concatenated zstd - * streams. This example won't work if there is trailing non-zstd data at - * the end, but streaming decompression in general handles this case. - * ZSTD_decompressStream() returns 0 exactly when the frame is completed, - * and doesn't consume input after the frame. - */ - size_t const toRead = buffInSize; - size_t read; - size_t lastRet = 0; - int isEmpty = 1; - while ( (read = fread_orDie(buffIn, toRead, fin)) ) { - isEmpty = 0; - ZSTD_inBuffer input = { buffIn, read, 0 }; - /* Given a valid frame, zstd won't consume the last byte of the frame - * until it has flushed all of the decompressed data of the frame. - * Therefore, instead of checking if the return code is 0, we can - * decompress just check if input.pos < input.size. - */ - while (input.pos < input.size) { - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - /* The return code is zero if the frame is complete, but there may - * be multiple frames concatenated together. Zstd will automatically - * reset the context when a frame is complete. Still, calling - * ZSTD_DCtx_reset() can be useful to reset the context to a clean - * state, for instance if the last decompression call returned an - * error. - */ - size_t const ret = ZSTD_decompressStream(dctx, &output , &input); - CHECK_ZSTD(ret); - fwrite_orDie(buffOut, output.pos, fout); - lastRet = ret; - } - } - - if (isEmpty) { - fprintf(stderr, "input is empty\n"); - exit(1); - } - - if (lastRet != 0) { - /* The last return value from ZSTD_decompressStream did not end on a - * frame, but we reached the end of the file! We assume this is an - * error, and the input was truncated. - */ - fprintf(stderr, "EOF before end of stream: %zu\n", lastRet); - exit(1); - } - - ZSTD_freeDCtx(dctx); - fclose_orDie(fin); - fclose_orDie(fout); - free(buffIn); - free(buffOut); -} - - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc!=2) { - fprintf(stderr, "wrong arguments\n"); - fprintf(stderr, "usage:\n"); - fprintf(stderr, "%s FILE\n", exeName); - return 1; - } - - const char* const inFilename = argv[1]; - - decompressFile_orDie(inFilename); - return 0; -} diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c deleted file mode 100644 index 957acb61a391..000000000000 --- a/examples/streaming_memory_usage.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/*=== Tuning parameter ===*/ -#ifndef MAX_TESTED_LEVEL -#define MAX_TESTED_LEVEL 12 -#endif - - -/*=== Dependencies ===*/ -#include <stdio.h> // printf -#define ZSTD_STATIC_LINKING_ONLY -#include <zstd.h> // presumes zstd library is installed -#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() - - -/*=== functions ===*/ - -/*! readU32FromChar() : - @return : unsigned integer value read from input in `char` format - allows and interprets K, KB, KiB, M, MB and MiB suffix. - Will also modify `*stringPtr`, advancing it to position where it stopped reading. - Note : function result can overflow if digit string > MAX_UINT */ -static unsigned readU32FromChar(const char** stringPtr) -{ - unsigned result = 0; - while ((**stringPtr >='0') && (**stringPtr <='9')) - result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; - if ((**stringPtr=='K') || (**stringPtr=='M')) { - result <<= 10; - if (**stringPtr=='M') result <<= 10; - (*stringPtr)++ ; - if (**stringPtr=='i') (*stringPtr)++; - if (**stringPtr=='B') (*stringPtr)++; - } - return result; -} - - -int main(int argc, char const *argv[]) { - - printf("\n Zstandard (v%s) memory usage for streaming : \n\n", ZSTD_versionString()); - - unsigned wLog = 0; - if (argc > 1) { - const char* valStr = argv[1]; - wLog = readU32FromChar(&valStr); - } - - int compressionLevel; - for (compressionLevel = 1; compressionLevel <= MAX_TESTED_LEVEL; compressionLevel++) { -#define INPUT_SIZE 5 -#define COMPRESSED_SIZE 128 - char const dataToCompress[INPUT_SIZE] = "abcde"; - char compressedData[COMPRESSED_SIZE]; - char decompressedData[INPUT_SIZE]; - /* the ZSTD_CCtx_params structure is a way to save parameters and use - * them across multiple contexts. We use them here so we can call the - * function ZSTD_estimateCStreamSize_usingCCtxParams(). - */ - ZSTD_CCtx_params* const cctxParams = ZSTD_createCCtxParams(); - CHECK(cctxParams != NULL, "ZSTD_createCCtxParams() failed!"); - - /* Set the compression level. */ - CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_compressionLevel, compressionLevel) ); - /* Set the window log. - * The value 0 means use the default window log, which is equivalent to - * not setting it. - */ - CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, wLog) ); - - /* Force the compressor to allocate the maximum memory size for a given - * level by not providing the pledged source size, or calling - * ZSTD_compressStream2() with ZSTD_e_end. - */ - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); - CHECK_ZSTD( ZSTD_CCtx_setParametersUsingCCtxParams(cctx, cctxParams) ); - size_t compressedSize; - { - ZSTD_inBuffer inBuff = { dataToCompress, sizeof(dataToCompress), 0 }; - ZSTD_outBuffer outBuff = { compressedData, sizeof(compressedData), 0 }; - CHECK_ZSTD( ZSTD_compressStream(cctx, &outBuff, &inBuff) ); - size_t const remaining = ZSTD_endStream(cctx, &outBuff); - CHECK_ZSTD(remaining); - CHECK(remaining == 0, "Frame not flushed!"); - compressedSize = outBuff.pos; - } - - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - CHECK(dctx != NULL, "ZSTD_createDCtx() failed!"); - /* Set the maximum allowed window log. - * The value 0 means use the default window log, which is equivalent to - * not setting it. - */ - CHECK_ZSTD( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, wLog) ); - /* forces decompressor to use maximum memory size, since the - * decompressed size is not stored in the frame header. - */ - { ZSTD_inBuffer inBuff = { compressedData, compressedSize, 0 }; - ZSTD_outBuffer outBuff = { decompressedData, sizeof(decompressedData), 0 }; - size_t const remaining = ZSTD_decompressStream(dctx, &outBuff, &inBuff); - CHECK_ZSTD(remaining); - CHECK(remaining == 0, "Frame not complete!"); - CHECK(outBuff.pos == sizeof(dataToCompress), "Bad decompression!"); - } - - size_t const cstreamSize = ZSTD_sizeof_CStream(cctx); - size_t const cstreamEstimatedSize = ZSTD_estimateCStreamSize_usingCCtxParams(cctxParams); - size_t const dstreamSize = ZSTD_sizeof_DStream(dctx); - size_t const dstreamEstimatedSize = ZSTD_estimateDStreamSize_fromFrame(compressedData, compressedSize); - - CHECK(cstreamSize <= cstreamEstimatedSize, "Compression mem (%u) > estimated (%u)", - (unsigned)cstreamSize, (unsigned)cstreamEstimatedSize); - CHECK(dstreamSize <= dstreamEstimatedSize, "Decompression mem (%u) > estimated (%u)", - (unsigned)dstreamSize, (unsigned)dstreamEstimatedSize); - - printf("Level %2i : Compression Mem = %5u KB (estimated : %5u KB) ; Decompression Mem = %4u KB (estimated : %5u KB)\n", - compressionLevel, - (unsigned)(cstreamSize>>10), (unsigned)(cstreamEstimatedSize>>10), - (unsigned)(dstreamSize>>10), (unsigned)(dstreamEstimatedSize>>10)); - - ZSTD_freeDCtx(dctx); - ZSTD_freeCCtx(cctx); - ZSTD_freeCCtxParams(cctxParams); - if (wLog) break; /* single test */ - } - return 0; -} diff --git a/lib/BUCK b/lib/BUCK deleted file mode 100644 index 60c6bbb54d91..000000000000 --- a/lib/BUCK +++ /dev/null @@ -1,232 +0,0 @@ -cxx_library( - name='zstd', - header_namespace='', - exported_headers=['zstd.h'], - visibility=['PUBLIC'], - deps=[ - ':common', - ':compress', - ':decompress', - ':deprecated', - ], -) - -cxx_library( - name='compress', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('compress', 'zstd*.h'), - ]), - srcs=glob(['compress/zstd*.c', 'compress/hist.c']), - deps=[':common'], -) - -cxx_library( - name='decompress', - header_namespace='', - visibility=['PUBLIC'], - headers=subdir_glob([ - ('decompress', '*_impl.h'), - ]), - srcs=glob(['decompress/zstd*.c']), - deps=[ - ':common', - ':legacy', - ], -) - -cxx_library( - name='deprecated', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('deprecated', '*.h'), - ]), - srcs=glob(['deprecated/*.c']), - deps=[':common'], -) - -cxx_library( - name='legacy', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('legacy', '*.h'), - ]), - srcs=glob(['legacy/*.c']), - deps=[':common'], - exported_preprocessor_flags=[ - '-DZSTD_LEGACY_SUPPORT=4', - ], -) - -cxx_library( - name='zdict', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=['zdict.h'], - headers=subdir_glob([ - ('dictBuilder', 'divsufsort.h'), - ('dictBuilder', 'cover.h'), - ]), - srcs=glob(['dictBuilder/*.c']), - deps=[':common'], -) - -cxx_library( - name='compiler', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'compiler.h'), - ]), -) - -cxx_library( - name='cpu', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'cpu.h'), - ]), -) - -cxx_library( - name='bitstream', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'bitstream.h'), - ]), -) - -cxx_library( - name='entropy', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'fse.h'), - ('common', 'huf.h'), - ]), - srcs=[ - 'common/entropy_common.c', - 'common/fse_decompress.c', - 'compress/fse_compress.c', - 'compress/huf_compress.c', - 'decompress/huf_decompress.c', - ], - deps=[ - ':debug', - ':bitstream', - ':compiler', - ':errors', - ':mem', - ], -) - -cxx_library( - name='errors', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=[ - 'zstd_errors.h', - 'common/error_private.h', - ] - srcs=['common/error_private.c'], -) - -cxx_library( - name='mem', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'mem.h'), - ]), -) - -cxx_library( - name='pool', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'pool.h'), - ]), - srcs=['common/pool.c'], - deps=[ - ':threading', - ':zstd_common', - ], -) - -cxx_library( - name='threading', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'threading.h'), - ]), - srcs=['common/threading.c'], - exported_preprocessor_flags=[ - '-DZSTD_MULTITHREAD', - ], - exported_linker_flags=[ - '-pthread', - ], -) - -cxx_library( - name='xxhash', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'xxhash.h'), - ]), - srcs=['common/xxhash.c'], - exported_preprocessor_flags=[ - '-DXXH_NAMESPACE=ZSTD_', - ], -) - -cxx_library( - name='zstd_common', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('', 'zstd.h'), - ('common', 'zstd_internal.h'), - ]), - srcs=['common/zstd_common.c'], - deps=[ - ':compiler', - ':errors', - ':mem', - ], -) - -cxx_library( - name='debug', - header_namespace='', - visibility=['PUBLIC'], - exported_headers=subdir_glob([ - ('common', 'debug.h'), - ]), - srcs=['common/debug.c'], -) - -cxx_library( - name='common', - deps=[ - ':debug', - ':bitstream', - ':compiler', - ':cpu', - ':entropy', - ':errors', - ':mem', - ':pool', - ':threading', - ':xxhash', - ':zstd_common', - ] -) diff --git a/lib/Makefile b/lib/Makefile deleted file mode 100644 index a6a0eb09d821..000000000000 --- a/lib/Makefile +++ /dev/null @@ -1,389 +0,0 @@ -# ################################################################ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# You may select, at your option, one of the above-listed licenses. -# ################################################################ - -# default target (when running `make` with no argument) -lib-release: - -# Modules -ZSTD_LIB_COMPRESSION ?= 1 -ZSTD_LIB_DECOMPRESSION ?= 1 -ZSTD_LIB_DICTBUILDER ?= 1 -ZSTD_LIB_DEPRECATED ?= 0 - -# Input variables for libzstd.mk -ifeq ($(ZSTD_LIB_COMPRESSION), 0) - ZSTD_LIB_DICTBUILDER = 0 - ZSTD_LIB_DEPRECATED = 0 -endif - -ifeq ($(ZSTD_LIB_DECOMPRESSION), 0) - ZSTD_LEGACY_SUPPORT = 0 - ZSTD_LIB_DEPRECATED = 0 -endif - -include libzstd.mk - -ZSTD_FILES := $(ZSTD_COMMON_FILES) $(ZSTD_LEGACY_FILES) - -ifneq ($(ZSTD_LIB_COMPRESSION), 0) - ZSTD_FILES += $(ZSTD_COMPRESS_FILES) -endif - -ifneq ($(ZSTD_LIB_DECOMPRESSION), 0) - ZSTD_FILES += $(ZSTD_DECOMPRESS_FILES) -endif - -ifneq ($(ZSTD_LIB_DEPRECATED), 0) - ZSTD_FILES += $(ZSTD_DEPRECATED_FILES) -endif - -ifneq ($(ZSTD_LIB_DICTBUILDER), 0) - ZSTD_FILES += $(ZSTD_DICTBUILDER_FILES) -endif - -ZSTD_LOCAL_SRC := $(notdir $(ZSTD_FILES)) -ZSTD_LOCAL_OBJ0 := $(ZSTD_LOCAL_SRC:.c=.o) -ZSTD_LOCAL_OBJ := $(ZSTD_LOCAL_OBJ0:.S=.o) - -VERSION := $(ZSTD_VERSION) - -# Note: by default, the static library is built single-threaded and dynamic library is built -# multi-threaded. It is possible to force multi or single threaded builds by appending -# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded). - - -CPPFLAGS_DYNLIB += -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded -LDFLAGS_DYNLIB += -pthread -CPPFLAGS_STATICLIB += # static library build defaults to single-threaded - -# pkg-config Libs.private points to LDFLAGS_DYNLIB -PCLIB := $(LDFLAGS_DYNLIB) - -ifeq ($(findstring GCC,$(CCVER)),GCC) -decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize -endif - - -# macOS linker doesn't support -soname, and use different extension -# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html -UNAME_TARGET_SYSTEM ?= $(UNAME) - -ifeq ($(UNAME_TARGET_SYSTEM), Darwin) - SHARED_EXT = dylib - SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) - SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) - SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER) -else - ifeq ($(UNAME_TARGET_SYSTEM), AIX) - SONAME_FLAGS = - else - SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR) - endif - SHARED_EXT = so - SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR) - SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER) -endif - - -.PHONY: all -all: lib - - -.PHONY: libzstd.a # must be run every time -libzstd.a: CPPFLAGS += $(CPPFLAGS_STATICLIB) - -SET_CACHE_DIRECTORY = \ - +$(MAKE) --no-print-directory $@ \ - BUILD_DIR=obj/$(HASH_DIR) \ - CPPFLAGS="$(CPPFLAGS)" \ - CFLAGS="$(CFLAGS)" \ - LDFLAGS="$(LDFLAGS)" - -ifndef BUILD_DIR -# determine BUILD_DIR from compilation flags - -libzstd.a: - $(SET_CACHE_DIRECTORY) - -else -# BUILD_DIR is defined - -ZSTD_STATICLIB_DIR := $(BUILD_DIR)/static -ZSTD_STATICLIB := $(ZSTD_STATICLIB_DIR)/libzstd.a -ZSTD_STATICLIB_OBJ := $(addprefix $(ZSTD_STATICLIB_DIR)/,$(ZSTD_LOCAL_OBJ)) -$(ZSTD_STATICLIB): ARFLAGS = rcs -$(ZSTD_STATICLIB): | $(ZSTD_STATICLIB_DIR) -$(ZSTD_STATICLIB): $(ZSTD_STATICLIB_OBJ) - # Check for multithread flag at target execution time - $(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\ - @echo compiling multi-threaded static library $(LIBVER),\ - @echo compiling single-threaded static library $(LIBVER)) - $(AR) $(ARFLAGS) $@ $^ - -libzstd.a: $(ZSTD_STATICLIB) - cp -f $< $@ - -endif - -ifneq (,$(filter Windows%,$(TARGET_SYSTEM))) - -LIBZSTD = dll/libzstd.dll -$(LIBZSTD): $(ZSTD_FILES) - @echo compiling dynamic library $(LIBVER) - $(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll/libzstd.dll.a -shared $^ -o $@ - -else # not Windows - -LIBZSTD = libzstd.$(SHARED_EXT_VER) -.PHONY: $(LIBZSTD) # must be run every time -$(LIBZSTD): CPPFLAGS += $(CPPFLAGS_DYNLIB) -$(LIBZSTD): CFLAGS += -fPIC -fvisibility=hidden -$(LIBZSTD): LDFLAGS += -shared $(LDFLAGS_DYNLIB) - -ifndef BUILD_DIR -# determine BUILD_DIR from compilation flags - -$(LIBZSTD): - $(SET_CACHE_DIRECTORY) - -else -# BUILD_DIR is defined - -ZSTD_DYNLIB_DIR := $(BUILD_DIR)/dynamic -ZSTD_DYNLIB := $(ZSTD_DYNLIB_DIR)/$(LIBZSTD) -ZSTD_DYNLIB_OBJ := $(addprefix $(ZSTD_DYNLIB_DIR)/,$(ZSTD_LOCAL_OBJ)) - -$(ZSTD_DYNLIB): | $(ZSTD_DYNLIB_DIR) -$(ZSTD_DYNLIB): $(ZSTD_DYNLIB_OBJ) -# Check for multithread flag at target execution time - $(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\ - @echo compiling multi-threaded dynamic library $(LIBVER),\ - @echo compiling single-threaded dynamic library $(LIBVER)) - $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ - @echo creating versioned links - ln -sf $@ libzstd.$(SHARED_EXT_MAJOR) - ln -sf $@ libzstd.$(SHARED_EXT) - -$(LIBZSTD): $(ZSTD_DYNLIB) - cp -f $< $@ - -endif # ifndef BUILD_DIR -endif # if windows - -.PHONY: libzstd -libzstd : $(LIBZSTD) - -.PHONY: lib -lib : libzstd.a libzstd - - -# note : do not define lib-mt or lib-release as .PHONY -# make does not consider implicit pattern rule for .PHONY target - -%-mt : CPPFLAGS_DYNLIB := -DZSTD_MULTITHREAD -%-mt : CPPFLAGS_STATICLIB := -DZSTD_MULTITHREAD -%-mt : LDFLAGS_DYNLIB := -pthread -%-mt : PCLIB := -%-mt : PCMTLIB := $(LDFLAGS_DYNLIB) -%-mt : % - @echo multi-threaded build completed - -%-nomt : CPPFLAGS_DYNLIB := -%-nomt : LDFLAGS_DYNLIB := -%-nomt : CPPFLAGS_STATICLIB := -%-nomt : PCLIB := -%-nomt : % - @echo single-threaded build completed - -%-release : DEBUGFLAGS := -%-release : % - @echo release build completed - - -# Generate .h dependencies automatically - -# -MMD: compiler generates dependency information as a side-effect of compilation, without system headers -# -MP: adds phony target for each dependency other than main file. -DEPFLAGS = -MMD -MP - -# ensure that ZSTD_DYNLIB_DIR exists prior to generating %.o -$(ZSTD_DYNLIB_DIR)/%.o : %.c | $(ZSTD_DYNLIB_DIR) - @echo CC $@ - $(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $< - -$(ZSTD_STATICLIB_DIR)/%.o : %.c | $(ZSTD_STATICLIB_DIR) - @echo CC $@ - $(COMPILE.c) $(DEPFLAGS) $(OUTPUT_OPTION) $< - -$(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR) - @echo AS $@ - $(COMPILE.S) $(OUTPUT_OPTION) $< - -$(ZSTD_STATICLIB_DIR)/%.o : %.S | $(ZSTD_STATICLIB_DIR) - @echo AS $@ - $(COMPILE.S) $(OUTPUT_OPTION) $< - -MKDIR ?= mkdir -p -$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATICLIB_DIR): - $(MKDIR) $@ - -DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATICLIB_OBJ:.o=.d) -$(DEPFILES): - -# The leading '-' means: do not fail is include fails (ex: directory does not exist yet) --include $(wildcard $(DEPFILES)) - - -# Special case : build library in single-thread mode _and_ without zstdmt_compress.c -# Note : we still need threading.c and pool.c for the dictionary builder, -# but they will correctly behave single-threaded. -ZSTDMT_FILES = zstdmt_compress.c -ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(notdir $(ZSTD_FILES))) -libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden -libzstd-nomt: LDFLAGS += -shared -libzstd-nomt: $(ZSTD_NOMT_FILES) - @echo compiling single-thread dynamic library $(LIBVER) - @echo files : $(ZSTD_NOMT_FILES) - @if echo "$(ZSTD_NOMT_FILES)" | tr ' ' '\n' | $(GREP) -q zstdmt; then \ - echo "Error: Found zstdmt in list."; \ - exit 1; \ - fi - $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ - -.PHONY: clean -clean: - $(RM) -r *.dSYM # macOS-specific - $(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc - $(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt* - $(RM) -r obj/* - @echo Cleaning library completed - -#----------------------------------------------------------------------------- -# make install is validated only for below listed environments -#----------------------------------------------------------------------------- -ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT% CYGWIN_NT%,$(UNAME))) - -lib: libzstd.pc - -HAS_EXPLICIT_EXEC_PREFIX := $(if $(or $(EXEC_PREFIX),$(exec_prefix)),1,) - -DESTDIR ?= -# directory variables : GNU conventions prefer lowercase -# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html -# support both lower and uppercase (BSD), use uppercase in script -prefix ?= /usr/local -PREFIX ?= $(prefix) -exec_prefix ?= $(PREFIX) -EXEC_PREFIX ?= $(exec_prefix) -libdir ?= $(EXEC_PREFIX)/lib -LIBDIR ?= $(libdir) -includedir ?= $(PREFIX)/include -INCLUDEDIR ?= $(includedir) - -PCINCDIR := $(patsubst $(PREFIX)%,%,$(INCLUDEDIR)) -PCLIBDIR := $(patsubst $(EXEC_PREFIX)%,%,$(LIBDIR)) - -# If we successfully stripped off a prefix, we'll add a reference to the -# relevant pc variable. -PCINCPREFIX := $(if $(findstring $(INCLUDEDIR),$(PCINCDIR)),,$${prefix}) -PCLIBPREFIX := $(if $(findstring $(LIBDIR),$(PCLIBDIR)),,$${exec_prefix}) - -# If no explicit EXEC_PREFIX was set by the caller, write it out as a reference -# to PREFIX, rather than as a resolved value. -PCEXEC_PREFIX := $(if $(HAS_EXPLICIT_EXEC_PREFIX),$(EXEC_PREFIX),$${prefix}) - - -ifneq ($(MT),) - PCLIB := - PCMTLIB := $(LDFLAGS_DYNLIB) -else - PCLIB := $(LDFLAGS_DYNLIB) -endif - -ifneq (,$(filter FreeBSD NetBSD DragonFly,$(UNAME))) - PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig -else - PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig -endif - -ifneq (,$(filter SunOS,$(UNAME))) - INSTALL ?= ginstall -else - INSTALL ?= install -endif - -INSTALL_PROGRAM ?= $(INSTALL) -INSTALL_DATA ?= $(INSTALL) -m 644 - - -# pkg-config library define. -# For static single-threaded library declare -pthread in Libs.private -# For static multi-threaded library declare -pthread in Libs and Cflags -.PHONY: libzstd.pc -libzstd.pc: libzstd.pc.in - @echo creating pkgconfig - @sed \ - -e 's|@PREFIX@|$(PREFIX)|' \ - -e 's|@EXEC_PREFIX@|$(PCEXEC_PREFIX)|' \ - -e 's|@INCLUDEDIR@|$(PCINCPREFIX)$(PCINCDIR)|' \ - -e 's|@LIBDIR@|$(PCLIBPREFIX)$(PCLIBDIR)|' \ - -e 's|@VERSION@|$(VERSION)|' \ - -e 's|@LIBS_MT@|$(PCMTLIB)|' \ - -e 's|@LIBS_PRIVATE@|$(PCLIB)|' \ - $< >$@ - -.PHONY: install -install: install-pc install-static install-shared install-includes - @echo zstd static and shared library installed - -.PHONY: install-pc -install-pc: libzstd.pc - [ -e $(DESTDIR)$(PKGCONFIGDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ - $(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/ - -.PHONY: install-static -install-static: - # only generate libzstd.a if it's not already present - [ -e libzstd.a ] || $(MAKE) libzstd.a-release - [ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/ - @echo Installing static library - $(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR) - -.PHONY: install-shared -install-shared: - # only generate libzstd.so if it's not already present - [ -e $(LIBZSTD) ] || $(MAKE) libzstd-release - [ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/ - @echo Installing shared library - $(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR) - ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) - ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT) - -.PHONY: install-includes -install-includes: - [ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/ - @echo Installing includes - $(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR) - $(INSTALL_DATA) zstd_errors.h $(DESTDIR)$(INCLUDEDIR) - $(INSTALL_DATA) zdict.h $(DESTDIR)$(INCLUDEDIR) - -.PHONY: uninstall -uninstall: - $(RM) $(DESTDIR)$(LIBDIR)/libzstd.a - $(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT) - $(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) - $(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD) - $(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc - $(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h - $(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h - $(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h - @echo zstd libraries successfully uninstalled - -endif diff --git a/programs/BUCK b/programs/BUCK deleted file mode 100644 index d2aa6373ff47..000000000000 --- a/programs/BUCK +++ /dev/null @@ -1,44 +0,0 @@ -cxx_binary( - name='zstd', - headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), - srcs=glob(['*.c'], excludes=['datagen.c']), - deps=[ - ':datagen', - ':util', - '//lib:zstd', - '//lib:zdict', - '//lib:mem', - '//lib:xxhash', - ], - preprocessor_flags=[ - '-DZSTD_GZCOMPRESS', - '-DZSTD_GZDECOMPRESS', - '-DZSTD_LZMACOMPRESS', - '-DZSTD_LZMADECOMPRES', - '-DZSTD_LZ4COMPRESS', - '-DZSTD_LZ4DECOMPRES', - ], - linker_flags=[ - '-lz', - '-llzma', - '-llz4', - ], -) - -cxx_library( - name='datagen', - visibility=['PUBLIC'], - header_namespace='', - exported_headers=['datagen.h'], - srcs=['datagen.c'], - deps=['//lib:mem'], -) - - -cxx_library( - name='util', - visibility=['PUBLIC'], - header_namespace='', - exported_headers=['util.h', 'platform.h'], - deps=['//lib:mem'], -) diff --git a/programs/Makefile b/programs/Makefile deleted file mode 100644 index f4af5e98134e..000000000000 --- a/programs/Makefile +++ /dev/null @@ -1,445 +0,0 @@ -# ################################################################ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# You may select, at your option, one of the above-listed licenses. -# ########################################################################## -# zstd : Command Line Utility, supporting gzip-like arguments -# zstd32 : Same as zstd, but forced to compile in 32-bits mode -# zstd-nolegacy : zstd without support of decompression of legacy versions -# zstd-small : minimal zstd without dictionary builder and benchmark -# zstd-compress : compressor-only version of zstd -# zstd-decompress : decompressor-only version of zstd -# ########################################################################## - -# default target (when running `make` with no argument) -zstd-release: - -LIBZSTD_MK_DIR = ../lib -include $(LIBZSTD_MK_DIR)/libzstd.mk - -ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1) - ALIGN_LOOP = -falign-loops=32 -else - ALIGN_LOOP = -endif - -ZSTDLIB_COMMON_SRC := $(sort $(ZSTD_COMMON_FILES)) -ZSTDLIB_COMPRESS_SRC := $(sort $(ZSTD_COMPRESS_FILES)) -ZSTDLIB_DECOMPRESS_SRC := $(sort $(ZSTD_DECOMPRESS_FILES)) -ZSTDLIB_CORE_SRC := $(sort $(ZSTD_DECOMPRESS_FILES) $(ZSTD_COMMON_FILES) $(ZSTD_COMPRESS_FILES)) -ZDICT_SRC := $(sort $(ZSTD_DICTBUILDER_FILES)) -ZSTDLEGACY_SRC := $(sort $(ZSTD_LEGACY_FILES)) - -# Sort files in alphabetical order for reproducible builds -ZSTDLIB_FULL_SRC = $(sort $(ZSTDLIB_CORE_SRC) $(ZSTDLEGACY_SRC) $(ZDICT_SRC)) -ZSTDLIB_LOCAL_SRC = $(notdir $(ZSTDLIB_FULL_SRC)) -ZSTDLIB_LOCAL_OBJ0 := $(ZSTDLIB_LOCAL_SRC:.c=.o) -ZSTDLIB_LOCAL_OBJ := $(ZSTDLIB_LOCAL_OBJ0:.S=.o) - -ZSTD_CLI_SRC := $(sort $(wildcard *.c)) -ZSTD_CLI_OBJ := $(ZSTD_CLI_SRC:.c=.o) - -ZSTD_ALL_SRC = $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC) -ZSTD_ALL_OBJ0 := $(ZSTD_ALL_SRC:.c=.o) -ZSTD_ALL_OBJ := $(ZSTD_ALL_OBJ0:.S=.o) - -# Define *.exe as extension for Windows systems -ifneq (,$(filter Windows%,$(OS))) - EXT =.exe - RES64_FILE = windres/zstd64.res - RES32_FILE = windres/zstd32.res -ifneq (,$(filter x86_64%,$(shell $(CC) -dumpmachine))) - RES_FILE = $(RES64_FILE) -else - RES_FILE = $(RES32_FILE) -endif -else - EXT = -endif - -# thread detection -NO_THREAD_MSG := ==> no threads, building without multithreading support -HAVE_PTHREAD := $(shell printf '$(NUM_SYMBOL)include <pthread.h>\nint main(void) { return 0; }' > have_pthread.c && $(CC) $(FLAGS) -o have_pthread$(EXT) have_pthread.c -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0; rm have_pthread.c) -HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0) -ifeq ($(HAVE_THREAD), 1) - THREAD_MSG := ==> building with threading support - THREAD_CPP := -DZSTD_MULTITHREAD - THREAD_LD := -pthread -else - THREAD_MSG := $(NO_THREAD_MSG) -endif - -# zlib detection -NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support -HAVE_ZLIB ?= $(shell printf '$(NUM_SYMBOL)include <zlib.h>\nint main(void) { return 0; }' > have_zlib.c && $(CC) $(FLAGS) -o have_zlib$(EXT) have_zlib.c -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0; rm have_zlib.c) -ifeq ($(HAVE_ZLIB), 1) - ZLIB_MSG := ==> building zstd with .gz compression support - ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS - ZLIBLD = -lz -else - ZLIB_MSG := $(NO_ZLIB_MSG) -endif - -# lzma detection -NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support -HAVE_LZMA ?= $(shell printf '$(NUM_SYMBOL)include <lzma.h>\nint main(void) { return 0; }' > have_lzma.c && $(CC) $(FLAGS) -o have_lzma$(EXT) have_lzma.c -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0; rm have_lzma.c) -ifeq ($(HAVE_LZMA), 1) - LZMA_MSG := ==> building zstd with .xz/.lzma compression support - LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS - LZMALD = -llzma -else - LZMA_MSG := $(NO_LZMA_MSG) -endif - -# lz4 detection -NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support -HAVE_LZ4 ?= $(shell printf '$(NUM_SYMBOL)include <lz4frame.h>\n$(NUM_SYMBOL)include <lz4.h>\nint main(void) { return 0; }' > have_lz4.c && $(CC) $(FLAGS) -o have_lz4$(EXT) have_lz4.c -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0; rm have_lz4.c) -ifeq ($(HAVE_LZ4), 1) - LZ4_MSG := ==> building zstd with .lz4 compression support - LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS - LZ4LD = -llz4 -else - LZ4_MSG := $(NO_LZ4_MSG) -endif - -# explicit backtrace enable/disable for Linux & Darwin -ifeq ($(BACKTRACE), 0) - DEBUGFLAGS += -DBACKTRACE_ENABLE=0 -endif -ifeq (,$(filter Windows%, $(OS))) -ifeq ($(BACKTRACE), 1) - DEBUGFLAGS += -DBACKTRACE_ENABLE=1 - DEBUGFLAGS_LD += -rdynamic -endif -endif - -SET_CACHE_DIRECTORY = \ - +$(MAKE) --no-print-directory $@ \ - BUILD_DIR=obj/$(HASH_DIR) \ - CPPFLAGS="$(CPPFLAGS)" \ - CFLAGS="$(CFLAGS)" \ - LDFLAGS="$(LDFLAGS)" \ - LDLIBS="$(LDLIBS)" \ - ZSTD_ALL_SRC="$(ZSTD_ALL_SRC)" - - -.PHONY: all -all: zstd zstd-compress zstd-decompress zstd-small - -.PHONY: allVariants -allVariants: all zstd-frugal zstd-nolegacy zstd-dictBuilder - -.PHONY: zstd # must always be run -zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) -zstd : LDFLAGS += $(THREAD_LD) $(DEBUGFLAGS_LD) -zstd : LDLIBS += $(ZLIBLD) $(LZMALD) $(LZ4LD) -zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -ifneq (,$(filter Windows%,$(OS))) -zstd : $(RES_FILE) -endif - -ifndef BUILD_DIR -# generate BUILD_DIR from flags - -zstd: - $(SET_CACHE_DIRECTORY) - -else -# BUILD_DIR is defined - -ZSTD_OBJ := $(addprefix $(BUILD_DIR)/, $(ZSTD_ALL_OBJ)) -$(BUILD_DIR)/zstd : $(ZSTD_OBJ) - @echo "$(THREAD_MSG)" - @echo "$(ZLIB_MSG)" - @echo "$(LZMA_MSG)" - @echo "$(LZ4_MSG)" - @echo LINK $@ - $(CC) $(FLAGS) $^ $(LDLIBS) -o $@$(EXT) - -ifeq ($(HAVE_HASH),1) -SRCBIN_HASH = $(shell cat $(BUILD_DIR)/zstd$(EXT) 2> $(VOID) | $(HASH) | cut -f 1 -d " ") -DSTBIN_HASH = $(shell cat zstd$(EXT) 2> $(VOID) | $(HASH) | cut -f 1 -d " ") -BIN_ISDIFFERENT = $(if $(filter $(SRCBIN_HASH),$(DSTBIN_HASH)),0,1) -else -BIN_ISDIFFERENT = 1 -endif - -zstd : $(BUILD_DIR)/zstd - if [ $(BIN_ISDIFFERENT) -eq 1 ]; then \ - cp -f $<$(EXT) $@$(EXT); \ - echo zstd build completed; \ - else \ - echo zstd already built; \ - fi - -endif # BUILD_DIR - - -CLEAN += zstd -.PHONY: zstd-release -zstd-release: DEBUGFLAGS := -DBACKTRACE_ENABLE=0 -zstd-release: DEBUGFLAGS_LD := -zstd-release: zstd - -CLEAN += zstd32 -zstd32 : CPPFLAGS += $(THREAD_CPP) -zstd32 : LDFLAGS += $(THREAD_LD) -zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -ifneq (,$(filter Windows%,$(OS))) -zstd32 : $(RES32_FILE) -endif -zstd32 : $(ZSTDLIB_FULL_SRC) $(ZSTD_CLI_SRC) - $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) - -## zstd-nolegacy: same scope as zstd, with removed support of legacy formats -CLEAN += zstd-nolegacy -zstd-nolegacy : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) -zstd-nolegacy : CPPFLAGS += -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=0 -zstd-nolegacy : $(ZSTDLIB_CORE_SRC) $(ZDICT_SRC) $(ZSTD_CLI_OBJ) - $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) - -.PHONY: zstd-nomt -zstd-nomt : THREAD_CPP := -zstd-nomt : THREAD_LD := -zstd-nomt : THREAD_MSG := - multi-threading disabled -zstd-nomt : zstd - -.PHONY: zstd-nogz -zstd-nogz : ZLIBCPP := -zstd-nogz : ZLIBLD := -zstd-nogz : ZLIB_MSG := - gzip support is disabled -zstd-nogz : zstd - -.PHONY: zstd-noxz -zstd-noxz : LZMACPP := -zstd-noxz : LZMALD := -zstd-noxz : LZMA_MSG := - xz/lzma support is disabled -zstd-noxz : zstd - -## zstd-dll: zstd executable linked to dynamic library libzstd (must have same version) -.PHONY: zstd-dll -zstd-dll : LDFLAGS+= -L$(LIB_BINDIR) -zstd-dll : LDLIBS += -lzstd -zstd-dll : ZSTDLIB_LOCAL_SRC = xxhash.c pool.c threading.c -zstd-dll : zstd - - -## zstd-pgo: zstd executable optimized with PGO. -.PHONY: zstd-pgo -zstd-pgo : LLVM_PROFDATA?=llvm-profdata -zstd-pgo : PROF_GENERATE_FLAGS=-fprofile-generate $(if $(findstring gcc,$(CC)),-fprofile-dir=.) -zstd-pgo : PROF_USE_FLAGS=-fprofile-use $(if $(findstring gcc,$(CC)),-fprofile-dir=. -Wno-error=missing-profile -Wno-error=coverage-mismatch) -zstd-pgo : - $(MAKE) clean HASH_DIR=$(HASH_DIR) - $(MAKE) zstd HASH_DIR=$(HASH_DIR) MOREFLAGS="$(PROF_GENERATE_FLAGS)" - ./zstd -b19i1 $(PROFILE_WITH) - ./zstd -b16i1 $(PROFILE_WITH) - ./zstd -b9i2 $(PROFILE_WITH) - ./zstd -b $(PROFILE_WITH) - ./zstd -b7i2 $(PROFILE_WITH) - ./zstd -b5 $(PROFILE_WITH) -ifndef BUILD_DIR - $(RM) zstd obj/$(HASH_DIR)/zstd obj/$(HASH_DIR)/*.o -else - $(RM) zstd $(BUILD_DIR)/zstd $(BUILD_DIR)/*.o -endif - case $(CC) in *clang*) if ! [ -e default.profdata ]; then $(LLVM_PROFDATA) merge -output=default.profdata default*.profraw; fi ;; esac - $(MAKE) zstd HASH_DIR=$(HASH_DIR) MOREFLAGS="$(PROF_USE_FLAGS)" - -## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format. -CLEAN += zstd-small zstd-frugal -zstd-small: CFLAGS = -Os -Wl,-s -zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c fileio_asyncio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOTRACE -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=0 $^ -o $@$(EXT) - -CLEAN += zstd-decompress -zstd-decompress: $(ZSTDLIB_COMMON_SRC) $(ZSTDLIB_DECOMPRESS_SRC) zstdcli.c util.c timefn.c fileio.c fileio_asyncio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS -DZSTD_NOTRACE -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=0 $^ -o $@$(EXT) - -CLEAN += zstd-compress -zstd-compress: $(ZSTDLIB_COMMON_SRC) $(ZSTDLIB_COMPRESS_SRC) zstdcli.c util.c timefn.c fileio.c fileio_asyncio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS -DZSTD_NOTRACE -UZSTD_LEGACY_SUPPORT -DZSTD_LEGACY_SUPPORT=0 $^ -o $@$(EXT) - -## zstd-dictBuilder: executable supporting dictionary creation and compression (only) -CLEAN += zstd-dictBuilder -zstd-dictBuilder: $(ZSTDLIB_COMMON_SRC) $(ZSTDLIB_COMPRESS_SRC) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c fileio_asyncio.c dibio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODECOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT) - -CLEAN += zstdmt -zstdmt: zstd - ln -sf zstd zstdmt - -.PHONY: generate_res -generate_res: $(RES64_FILE) $(RES32_FILE) - -ifneq (,$(filter Windows%,$(OS))) -RC ?= windres -# https://stackoverflow.com/questions/708238/how-do-i-add-an-icon-to-a-mingw-gcc-compiled-executable -$(RES64_FILE): windres/zstd.rc - $(RC) -o $@ -I ../lib -I windres -i $< -O coff -F pe-x86-64 -$(RES32_FILE): windres/zstd.rc - $(RC) -o $@ -I ../lib -I windres -i $< -O coff -F pe-i386 -endif - -.PHONY: clean -clean: - $(RM) $(CLEAN) core *.o tmp* result* dictionary *.zst \ - *.gcda default*.profraw default.profdata have_zlib - $(RM) -r obj/* - @echo Cleaning completed - -MD2ROFF = ronn -MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)" - -zstd.1: zstd.1.md ../lib/zstd.h - cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ - -zstdgrep.1: zstdgrep.1.md ../lib/zstd.h - cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ - -zstdless.1: zstdless.1.md ../lib/zstd.h - cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ - -.PHONY: man -man: zstd.1 zstdgrep.1 zstdless.1 - -.PHONY: clean-man -clean-man: - $(RM) zstd.1 - $(RM) zstdgrep.1 - $(RM) zstdless.1 - -.PHONY: preview-man -preview-man: clean-man man - man ./zstd.1 - man ./zstdgrep.1 - man ./zstdless.1 - - -# Generate .h dependencies automatically - -DEPFLAGS = -MT $@ -MMD -MP -MF - -$(BUILD_DIR)/%.o : %.c $(BUILD_DIR)/%.d | $(BUILD_DIR) - @echo CC $@ - $(COMPILE.c) $(DEPFLAGS) $(BUILD_DIR)/$*.d $(OUTPUT_OPTION) $< - -$(BUILD_DIR)/%.o : %.S | $(BUILD_DIR) - @echo AS $@ - $(COMPILE.S) $(OUTPUT_OPTION) $< - -MKDIR ?= mkdir -$(BUILD_DIR): ; $(MKDIR) -p $@ - -DEPFILES := $(ZSTD_OBJ:.o=.d) -$(DEPFILES): - -include $(wildcard $(DEPFILES)) - - - -#----------------------------------------------------------------------------- -# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets -#----------------------------------------------------------------------------- -ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT% CYGWIN_NT%,$(UNAME))) - -HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) -EGREP_OPTIONS ?= -ifeq ($(HAVE_COLORNEVER), 1) - EGREP_OPTIONS += --color=never -endif -EGREP = egrep $(EGREP_OPTIONS) -AWK = awk - -# Print a two column output of targets and their description. To add a target description, put a -# comment in the Makefile with the format "## <TARGET>: <DESCRIPTION>". For example: -# -## list: Print all targets and their descriptions (if provided) -.PHONY: list -list: - TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \ - | $(AWK) -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \ - | $(EGREP) -v -e '^[^[:alnum:]]' | sort); \ - { \ - printf "Target Name\tDescription\n"; \ - printf "%0.s-" {1..16}; printf "\t"; printf "%0.s-" {1..40}; printf "\n"; \ - for target in $$TARGETS; do \ - line=$$($(EGREP) "^##[[:space:]]+$$target:" $(lastword $(MAKEFILE_LIST))); \ - description=$$(echo $$line | $(AWK) '{i=index($$0,":"); print substr($$0,i+1)}' | xargs); \ - printf "$$target\t$$description\n"; \ - done \ - } | column -t -s $$'\t' - - -DESTDIR ?= -# directory variables : GNU conventions prefer lowercase -# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html -# support both lower and uppercase (BSD), use uppercase in script -prefix ?= /usr/local -PREFIX ?= $(prefix) -exec_prefix ?= $(PREFIX) -bindir ?= $(exec_prefix)/bin -BINDIR ?= $(bindir) -datarootdir ?= $(PREFIX)/share -mandir ?= $(datarootdir)/man -man1dir ?= $(mandir)/man1 - -ifneq (,$(filter OpenBSD NetBSD DragonFly SunOS,$(UNAME))) - MANDIR ?= $(PREFIX)/man - MAN1DIR ?= $(MANDIR)/man1 -else - MAN1DIR ?= $(man1dir) -endif - -ifneq (,$(filter SunOS,$(UNAME))) - INSTALL ?= ginstall -else - INSTALL ?= install -endif - -INSTALL_PROGRAM ?= $(INSTALL) -INSTALL_SCRIPT ?= $(INSTALL_PROGRAM) -INSTALL_DATA ?= $(INSTALL) -m 644 -INSTALL_MAN ?= $(INSTALL_DATA) - -.PHONY: install -install: - # generate zstd only if not already present - [ -e zstd ] || $(MAKE) zstd-release - [ -e $(DESTDIR)$(BINDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ - [ -e $(DESTDIR)$(MAN1DIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)/ - @echo Installing binaries - $(INSTALL_PROGRAM) zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT) - ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat$(EXT) - ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd$(EXT) - ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdmt$(EXT) - $(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless - $(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep - @echo Installing man pages - $(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1 - ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1 - ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1 - $(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1 - $(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1 - @echo zstd installation completed - -.PHONY: uninstall -uninstall: - $(RM) $(DESTDIR)$(BINDIR)/zstdgrep - $(RM) $(DESTDIR)$(BINDIR)/zstdless - $(RM) $(DESTDIR)$(BINDIR)/zstdcat - $(RM) $(DESTDIR)$(BINDIR)/unzstd - $(RM) $(DESTDIR)$(BINDIR)/zstdmt - $(RM) $(DESTDIR)$(BINDIR)/zstd - $(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1 - $(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1 - $(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1 - $(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1 - $(RM) $(DESTDIR)$(MAN1DIR)/zstd.1 - @echo zstd programs successfully uninstalled - -endif diff --git a/zlibWrapper/BUCK b/zlibWrapper/BUCK deleted file mode 100644 index a0710e1cf4cd..000000000000 --- a/zlibWrapper/BUCK +++ /dev/null @@ -1,22 +0,0 @@ -cxx_library( - name='zlib_wrapper', - visibility=['PUBLIC'], - exported_linker_flags=['-lz'], - header_namespace='', - exported_headers=['zstd_zlibwrapper.h'], - headers=[ - 'gzcompatibility.h', - 'gzguts.h', - ], - srcs=glob(['*.c']), - deps=[ - '//lib:zstd', - '//lib:zstd_common', - ], -) - -cxx_binary( - name='minigzip', - srcs=['examples/minigzip.c'], - deps=[':zlib_wrapper'], -) diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile deleted file mode 100644 index 830b294bb6f4..000000000000 --- a/zlibWrapper/Makefile +++ /dev/null @@ -1,120 +0,0 @@ -# Makefile for example of using zstd wrapper for zlib -# -# make - compiles examples -# make MOREFLAGS=-DZWRAP_USE_ZSTD=1 - compiles examples with zstd compression turned on -# make test - runs examples - - -# Paths to static and dynamic zlib and zstd libraries -# Use "make ZLIB_PATH=path/to/zlib ZLIB_LIBRARY=path/to/libz.so" to select a path to library -ZLIB_LIBRARY ?= -lz -ZLIB_PATH ?= . - -ZSTDLIBDIR = ../lib -ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.a -ZLIBWRAPPER_PATH = . -GZFILES = gzclose.o gzlib.o gzread.o gzwrite.o -EXAMPLE_PATH = examples -PROGRAMS_PATH = ../programs -TEST_FILE = ../doc/zstd_compression_format.md - -vpath %.c $(PROGRAMS_PATH) $(EXAMPLE_PATH) $(ZLIBWRAPPER_PATH) - - -CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH) \ - -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH) -STDFLAGS = -std=c89 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \ - -DNO_snprintf -DNO_vsnprintf # strict ANSI C89 is missing these prototypes -DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \ - -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \ - -Wstrict-aliasing=1 -CFLAGS ?= -O3 -CFLAGS += $(STDFLAGS) $(DEBUGFLAGS) -CPPFLAGS += $(MOREFLAGS) -LDLIBS += $(ZLIB_LIBRARY) - -# Define *.exe as extension for Windows systems -ifneq (,$(filter Windows%,$(OS))) -EXT =.exe -else -EXT = -endif - -default : release - -release : STDFLAGS = -release : DEBUGFLAGS = -release : all - -all: fitblk example zwrapbench minigzip - -test: example fitblk example_zstd fitblk_zstd zwrapbench minigzip minigzip_zstd - ./example - ./example_zstd - ./fitblk 10240 <$(TEST_FILE) - ./fitblk 40960 <$(TEST_FILE) - ./fitblk_zstd 10240 <$(TEST_FILE) - ./fitblk_zstd 40960 <$(TEST_FILE) - @echo ---- minigzip start ---- - ./minigzip_zstd example$(EXT) - #cp example$(EXT).gz example$(EXT)_zstd.gz - ./minigzip_zstd -d example$(EXT).gz - ./minigzip example$(EXT) - #cp example$(EXT).gz example$(EXT)_gz.gz - ./minigzip_zstd -d example$(EXT).gz - @echo ---- minigzip end ---- - ./zwrapbench -qi1b3B1K $(TEST_FILE) - ./zwrapbench -rqi1b1e3 ../lib - -.PHONY: test-valgrind -#test-valgrind: ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.so -test-valgrind: VALGRIND = LD_LIBRARY_PATH=$(ZSTDLIBDIR) valgrind --track-origins=yes --leak-check=full --error-exitcode=1 -test-valgrind: clean example fitblk example_zstd fitblk_zstd zwrapbench - @echo "\n ---- valgrind tests ----" - $(VALGRIND) ./example - $(VALGRIND) ./example_zstd - $(VALGRIND) ./fitblk 10240 <$(TEST_FILE) - $(VALGRIND) ./fitblk 40960 <$(TEST_FILE) - $(VALGRIND) ./fitblk_zstd 10240 <$(TEST_FILE) - $(VALGRIND) ./fitblk_zstd 40960 <$(TEST_FILE) - $(VALGRIND) ./zwrapbench -qi1b3B1K $(TEST_FILE) - $(VALGRIND) ./zwrapbench -rqi1b1e5 ../lib ../programs ../tests - -#.c.o: -# $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ - -minigzip: minigzip.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - -minigzip_zstd: minigzip.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - $(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION) - -example: example.o zstd_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - -example_zstd: example.o zstdTurnedOn_zlibwrapper.o $(GZFILES) $(ZSTDLIBRARY) - $(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION) - -fitblk: fitblk.o zstd_zlibwrapper.o $(ZSTDLIBRARY) - -fitblk_zstd: fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY) - $(LINK.o) $^ $(LDLIBS) $(OUTPUT_OPTION) - -zwrapbench: zwrapbench.o zstd_zlibwrapper.o util.o timefn.o datagen.o $(ZSTDLIBRARY) - - -zstd_zlibwrapper.o: zstd_zlibwrapper.h - -zstdTurnedOn_zlibwrapper.o: CPPFLAGS += -DZWRAP_USE_ZSTD=1 -zstdTurnedOn_zlibwrapper.o: zstd_zlibwrapper.c zstd_zlibwrapper.h - $(COMPILE.c) $< $(OUTPUT_OPTION) - - -$(ZSTDLIBRARY): - $(MAKE) -C $(ZSTDLIBDIR) libzstd.a - -$(ZSTDLIBDIR)/libzstd.so: - $(MAKE) -C $(ZSTDLIBDIR) libzstd - - -clean: - -$(RM) $(ZLIBWRAPPER_PATH)/*.o $(EXAMPLE_PATH)/*.o *.o foo.gz example$(EXT) example_zstd$(EXT) fitblk$(EXT) fitblk_zstd$(EXT) zwrapbench$(EXT) minigzip$(EXT) minigzip_zstd$(EXT) - @echo Cleaning completed diff --git a/zlibWrapper/examples/example.c b/zlibWrapper/examples/example.c deleted file mode 100644 index eaae69747b76..000000000000 --- a/zlibWrapper/examples/example.c +++ /dev/null @@ -1,598 +0,0 @@ -/* example.c contains minimal changes required to be compiled with zlibWrapper: - * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h" - * - test_flush() and test_sync() use functions not supported by zlibWrapper - therefore they are disabled while zstd compression is turned on */ - -/* example.c -- usage example of the zlib compression library - */ -/* - Copyright (c) 1995-2006, 2011 Jean-loup Gailly - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgement in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - */ - -/* @(#) $Id$ */ - -#include "zstd_zlibwrapper.h" -#include <stdio.h> - -#ifdef STDC -# include <string.h> -# include <stdlib.h> -#endif - -#if defined(VMS) || defined(RISCOS) -# define TESTFILE "foo-gz" -#else -# define TESTFILE "foo.gz" -#endif - -#define CHECK_ERR(err, msg) { \ - if (err != Z_OK) { \ - fprintf(stderr, "%s error: %d\n", msg, err); \ - exit(1); \ - } \ -} - -z_const char hello[] = "hello, hello! I said hello, hello!"; -/* "hello world" would be more standard, but the repeated "hello" - * stresses the compression code better, sorry... - */ - -const char dictionary[] = "hello, hello!"; -uLong dictId; /* Adler32 value of the dictionary */ - -void test_deflate _Z_OF((Byte *compr, uLong comprLen)); -void test_inflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_large_deflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_large_inflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_flush _Z_OF((Byte *compr, uLong *comprLen)); -void test_sync _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_dict_deflate _Z_OF((Byte *compr, uLong comprLen)); -void test_dict_inflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -int main _Z_OF((int argc, char *argv[])); - - -#ifdef Z_SOLO - -void *myalloc _Z_OF((void *, unsigned, unsigned)); -void myfree _Z_OF((void *, void *)); - -void *myalloc(void *q, unsigned n, unsigned m) -{ - void *buf = calloc(n, m); - q = Z_NULL; - /* printf("myalloc %p n=%d m=%d\n", buf, n, m); */ - return buf; -} - -void myfree(void *q, void *p) -{ - /* printf("myfree %p\n", p); */ - q = Z_NULL; - free(p); -} - -static alloc_func zalloc = myalloc; -static free_func zfree = myfree; - -#else /* !Z_SOLO */ - -static alloc_func zalloc = (alloc_func)0; -static free_func zfree = (free_func)0; - -void test_compress _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_gzio _Z_OF((const char *fname, - Byte *uncompr, uLong uncomprLen)); - -/* =========================================================================== - * Test compress() and uncompress() - */ -void test_compress(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) { - int err; - uLong len = (uLong)strlen(hello)+1; - - err = compress(compr, &comprLen, (const Bytef*)hello, len); - CHECK_ERR(err, "compress"); - - strcpy((char*)uncompr, "garbage"); - - err = uncompress(uncompr, &uncomprLen, compr, comprLen); - CHECK_ERR(err, "uncompress"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad uncompress\n"); - exit(1); - } else { - printf("uncompress(): %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Test read/write of .gz files - */ -void test_gzio(const char *fname, Byte *uncompr, uLong uncomprLen) { -#ifdef NO_GZCOMPRESS - fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n"); -#else - int err; - int len = (int)strlen(hello)+1; - gzFile file; - z_off_t pos; - - file = gzopen(fname, "wb"); - if (file == NULL) { - fprintf(stderr, "gzopen error\n"); - exit(1); - } - gzputc(file, 'h'); - if (gzputs(file, "ello") != 4) { - fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err)); - exit(1); - } - if (gzprintf(file, ", %s! I said hello, hello!", "hello") != 8+21) { - fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err)); - exit(1); - } - gzseek(file, 1L, SEEK_CUR); /* add one zero byte */ - gzclose(file); - - file = gzopen(fname, "rb"); - if (file == NULL) { - fprintf(stderr, "gzopen error\n"); - exit(1); - } - strcpy((char*)uncompr, "garbage"); - - if (gzread(file, uncompr, (unsigned)uncomprLen) != len) { - fprintf(stderr, "gzread err: %s\n", gzerror(file, &err)); - exit(1); - } - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad gzread: %s\n", (char*)uncompr); - exit(1); - } else { - printf("gzread(): %s\n", (char*)uncompr); - } - - pos = gzseek(file, -8L, SEEK_CUR); - if (pos != 6+21 || gztell(file) != pos) { - fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n", - (long)pos, (long)gztell(file)); - exit(1); - } - - if (gzgetc(file) != ' ') { - fprintf(stderr, "gzgetc error\n"); - exit(1); - } - - if (gzungetc(' ', file) != ' ') { - fprintf(stderr, "gzungetc error\n"); - exit(1); - } - - gzgets(file, (char*)uncompr, (int)uncomprLen); - if (strlen((char*)uncompr) != 7) { /* " hello!" */ - fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err)); - exit(1); - } - if (strcmp((char*)uncompr, hello + 6+21)) { - fprintf(stderr, "bad gzgets after gzseek\n"); - exit(1); - } else { - printf("gzgets() after gzseek: %s\n", (char*)uncompr); - } - - gzclose(file); -#endif -} - -#endif /* Z_SOLO */ - -/* =========================================================================== - * Test deflate() with small buffers - */ -void test_deflate(Byte *compr, uLong comprLen) { - z_stream c_stream; /* compression stream */ - int err; - uLong len = (uLong)strlen(hello)+1; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_in = (z_const unsigned char *)hello; - c_stream.next_out = compr; - - while (c_stream.total_in != len && c_stream.total_out < comprLen) { - c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */ - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - } - /* Finish the stream, still forcing small buffers: */ - for (;;) { - c_stream.avail_out = 1; - err = deflate(&c_stream, Z_FINISH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "deflate"); - } - - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with small buffers - */ -void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) { - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = 0; - d_stream.next_out = uncompr; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { - d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "inflate"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad inflate\n"); - exit(1); - } else { - printf("inflate(): %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Test deflate() with large buffers and dynamic change of compression level - */ -void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) { - z_stream c_stream; /* compression stream */ - int err; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_BEST_SPEED); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_out = compr; - c_stream.avail_out = (uInt)comprLen; - - /* At this point, uncompr is still mostly zeroes, so it should compress - * very well: - */ - c_stream.next_in = uncompr; - c_stream.avail_in = (uInt)uncomprLen; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - if (c_stream.avail_in != 0) { - fprintf(stderr, "deflate not greedy\n"); - exit(1); - } - - /* Feed in already compressed data and switch to no compression: */ - deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); - c_stream.next_in = compr; - c_stream.avail_in = (uInt)comprLen/2; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - - /* Switch back to compressing mode: */ - deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED); - c_stream.next_in = uncompr; - c_stream.avail_in = (uInt)uncomprLen; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "deflate should report Z_STREAM_END\n"); - exit(1); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with large buffers - */ -void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) { - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = (uInt)comprLen; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - for (;;) { - d_stream.next_out = uncompr; /* discard the output */ - d_stream.avail_out = (uInt)uncomprLen; - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "large inflate"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (d_stream.total_out != 2*uncomprLen + comprLen/2) { - fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out); - exit(1); - } else { - printf("large_inflate(): OK\n"); - } -} - -/* =========================================================================== - * Test deflate() with full flush - */ -void test_flush(Byte *compr, uLong *comprLen) { - z_stream c_stream; /* compression stream */ - int err; - uInt len = (uInt)strlen(hello)+1; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_in = (z_const unsigned char *)hello; - c_stream.next_out = compr; - c_stream.avail_in = 3; - c_stream.avail_out = (uInt)*comprLen; - err = deflate(&c_stream, Z_FULL_FLUSH); - CHECK_ERR(err, "deflate"); - - compr[3]++; /* force an error in first compressed block */ - c_stream.avail_in = len - 3; - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - CHECK_ERR(err, "deflate"); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); - - *comprLen = c_stream.total_out; -} - -/* =========================================================================== - * Test inflateSync() - */ -void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen) { - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = 2; /* just read the zlib header */ - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - d_stream.next_out = uncompr; - d_stream.avail_out = (uInt)uncomprLen; - - inflate(&d_stream, Z_NO_FLUSH); - CHECK_ERR(err, "inflate"); - - d_stream.avail_in = (uInt)comprLen-2; /* read all compressed data */ - err = inflateSync(&d_stream); /* but skip the damaged part */ - CHECK_ERR(err, "inflateSync"); - - err = inflate(&d_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "inflate reported %i != %i (Z_STREAM_END)\n", err, Z_STREAM_END); - /* Because of incorrect adler32 */ - exit(1); - } - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - printf("after inflateSync(): hel%s\n", (char *)uncompr); -} - -/* =========================================================================== - * Test deflate() with preset dictionary - */ -void test_dict_deflate(Byte *compr, uLong comprLen) { - z_stream c_stream; /* compression stream */ - int err; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_BEST_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - err = deflateSetDictionary(&c_stream, - (const Bytef*)dictionary, (int)sizeof(dictionary)); - CHECK_ERR(err, "deflateSetDictionary"); - - dictId = c_stream.adler; - c_stream.next_out = compr; - c_stream.avail_out = (uInt)comprLen; - - c_stream.next_in = (z_const unsigned char *)hello; - c_stream.avail_in = (uInt)strlen(hello)+1; - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "deflate should report Z_STREAM_END\n"); - exit(1); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with a preset dictionary - */ -void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) { - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = (uInt)comprLen; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - d_stream.next_out = uncompr; - d_stream.avail_out = (uInt)uncomprLen; - - for (;;) { - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - if (err == Z_NEED_DICT) { - if (d_stream.adler != dictId) { - fprintf(stderr, "unexpected dictionary"); - exit(1); - } - err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary, - (int)sizeof(dictionary)); - } - CHECK_ERR(err, "inflate with dict"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad inflate with dict\n"); - exit(1); - } else { - printf("inflate with dictionary: %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Usage: example [output.gz [input.gz]] - */ - -int main(int argc, char *argv[]) { - Byte *compr, *uncompr; - uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */ - uLong uncomprLen = comprLen; - static const char* myVersion = ZLIB_VERSION; - - if (zlibVersion()[0] != myVersion[0]) { - fprintf(stderr, "incompatible zlib version\n"); - exit(1); - - } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) { - fprintf(stderr, "warning: different zlib version\n"); - } - - printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", - ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); - if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion()); - - compr = (Byte*)calloc((uInt)comprLen, 1); - uncompr = (Byte*)calloc((uInt)uncomprLen, 1); - /* compr and uncompr are cleared to avoid reading uninitialized - * data and to ensure that uncompr compresses well. - */ - if (compr == Z_NULL || uncompr == Z_NULL) { - printf("out of memory\n"); - exit(1); - } - -#ifdef Z_SOLO - argc = strlen(argv[0]); -#else - test_compress(compr, comprLen, uncompr, uncomprLen); - - test_gzio((argc > 1 ? argv[1] : TESTFILE), - uncompr, uncomprLen); -#endif - - test_deflate(compr, comprLen); - test_inflate(compr, comprLen, uncompr, uncomprLen); - - test_large_deflate(compr, comprLen, uncompr, uncomprLen); - test_large_inflate(compr, comprLen, uncompr, uncomprLen); - - if (!ZWRAP_isUsingZSTDcompression()) { - test_flush(compr, &comprLen); - test_sync(compr, comprLen, uncompr, uncomprLen); - } - comprLen = uncomprLen; - - test_dict_deflate(compr, comprLen); - test_dict_inflate(compr, comprLen, uncompr, uncomprLen); - - free(compr); - free(uncompr); - - return 0; -} diff --git a/zlibWrapper/examples/example_original.c b/zlibWrapper/examples/example_original.c deleted file mode 100644 index 828b06c881ca..000000000000 --- a/zlibWrapper/examples/example_original.c +++ /dev/null @@ -1,599 +0,0 @@ -/* example.c -- usage example of the zlib compression library - */ -/* - Copyright (c) 1995-2006, 2011 Jean-loup Gailly - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgement in the product documentation would be - appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* @(#) $Id$ */ - -#include "zlib.h" -#include <stdio.h> - -#ifdef STDC -# include <string.h> -# include <stdlib.h> -#endif - -#if defined(VMS) || defined(RISCOS) -# define TESTFILE "foo-gz" -#else -# define TESTFILE "foo.gz" -#endif - -#define CHECK_ERR(err, msg) { \ - if (err != Z_OK) { \ - fprintf(stderr, "%s error: %d\n", msg, err); \ - exit(1); \ - } \ -} - -z_const char hello[] = "hello, hello!"; -/* "hello world" would be more standard, but the repeated "hello" - * stresses the compression code better, sorry... - */ - -const char dictionary[] = "hello"; -uLong dictId; /* Adler32 value of the dictionary */ - -void test_deflate _Z_OF((Byte *compr, uLong comprLen)); -void test_inflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_large_deflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_large_inflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_flush _Z_OF((Byte *compr, uLong *comprLen)); -void test_sync _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_dict_deflate _Z_OF((Byte *compr, uLong comprLen)); -void test_dict_inflate _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -int main _Z_OF((int argc, char *argv[])); - - -#ifdef Z_SOLO - -void *myalloc _Z_OF((void *, unsigned, unsigned)); -void myfree _Z_OF((void *, void *)); - -void *myalloc(q, n, m) - void *q; - unsigned n, m; -{ - q = Z_NULL; - return calloc(n, m); -} - -void myfree(void *q, void *p) -{ - q = Z_NULL; - free(p); -} - -static alloc_func zalloc = myalloc; -static free_func zfree = myfree; - -#else /* !Z_SOLO */ - -static alloc_func zalloc = (alloc_func)0; -static free_func zfree = (free_func)0; - -void test_compress _Z_OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_gzio _Z_OF((const char *fname, - Byte *uncompr, uLong uncomprLen)); - -/* =========================================================================== - * Test compress() and uncompress() - */ -void test_compress(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen) -{ - int err; - uLong len = (uLong)strlen(hello)+1; - - err = compress(compr, &comprLen, (const Bytef*)hello, len); - CHECK_ERR(err, "compress"); - - strcpy((char*)uncompr, "garbage"); - - err = uncompress(uncompr, &uncomprLen, compr, comprLen); - CHECK_ERR(err, "uncompress"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad uncompress\n"); - exit(1); - } else { - printf("uncompress(): %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Test read/write of .gz files - */ -void test_gzio(const char *fname /* compressed file name */, Byte *uncompr, - uLong uncomprLen) -{ -#ifdef NO_GZCOMPRESS - fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n"); -#else - int err; - int len = (int)strlen(hello)+1; - gzFile file; - z_off_t pos; - - file = gzopen(fname, "wb"); - if (file == NULL) { - fprintf(stderr, "gzopen error\n"); - exit(1); - } - gzputc(file, 'h'); - if (gzputs(file, "ello") != 4) { - fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err)); - exit(1); - } - if (gzprintf(file, ", %s!", "hello") != 8) { - fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err)); - exit(1); - } - gzseek(file, 1L, SEEK_CUR); /* add one zero byte */ - gzclose(file); - - file = gzopen(fname, "rb"); - if (file == NULL) { - fprintf(stderr, "gzopen error\n"); - exit(1); - } - strcpy((char*)uncompr, "garbage"); - - if (gzread(file, uncompr, (unsigned)uncomprLen) != len) { - fprintf(stderr, "gzread err: %s\n", gzerror(file, &err)); - exit(1); - } - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad gzread: %s\n", (char*)uncompr); - exit(1); - } else { - printf("gzread(): %s\n", (char*)uncompr); - } - - pos = gzseek(file, -8L, SEEK_CUR); - if (pos != 6 || gztell(file) != pos) { - fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n", - (long)pos, (long)gztell(file)); - exit(1); - } - - if (gzgetc(file) != ' ') { - fprintf(stderr, "gzgetc error\n"); - exit(1); - } - - if (gzungetc(' ', file) != ' ') { - fprintf(stderr, "gzungetc error\n"); - exit(1); - } - - gzgets(file, (char*)uncompr, (int)uncomprLen); - if (strlen((char*)uncompr) != 7) { /* " hello!" */ - fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err)); - exit(1); - } - if (strcmp((char*)uncompr, hello + 6)) { - fprintf(stderr, "bad gzgets after gzseek\n"); - exit(1); - } else { - printf("gzgets() after gzseek: %s\n", (char*)uncompr); - } - - gzclose(file); -#endif -} - -#endif /* Z_SOLO */ - -/* =========================================================================== - * Test deflate() with small buffers - */ -void test_deflate(Byte *compr, uLong comprLen) -{ - z_stream c_stream; /* compression stream */ - int err; - uLong len = (uLong)strlen(hello)+1; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_in = (z_const unsigned char *)hello; - c_stream.next_out = compr; - - while (c_stream.total_in != len && c_stream.total_out < comprLen) { - c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */ - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - } - /* Finish the stream, still forcing small buffers: */ - for (;;) { - c_stream.avail_out = 1; - err = deflate(&c_stream, Z_FINISH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "deflate"); - } - - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with small buffers - */ -void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen) -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = 0; - d_stream.next_out = uncompr; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { - d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "inflate"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad inflate\n"); - exit(1); - } else { - printf("inflate(): %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Test deflate() with large buffers and dynamic change of compression level - */ -void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) -{ - z_stream c_stream; /* compression stream */ - int err; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_BEST_SPEED); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_out = compr; - c_stream.avail_out = (uInt)comprLen; - - /* At this point, uncompr is still mostly zeroes, so it should compress - * very well: - */ - c_stream.next_in = uncompr; - c_stream.avail_in = (uInt)uncomprLen; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - if (c_stream.avail_in != 0) { - fprintf(stderr, "deflate not greedy\n"); - exit(1); - } - - /* Feed in already compressed data and switch to no compression: */ - deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); - c_stream.next_in = compr; - c_stream.avail_in = (uInt)comprLen/2; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - - /* Switch back to compressing mode: */ - deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED); - c_stream.next_in = uncompr; - c_stream.avail_in = (uInt)uncomprLen; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "deflate should report Z_STREAM_END\n"); - exit(1); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with large buffers - */ -void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = (uInt)comprLen; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - for (;;) { - d_stream.next_out = uncompr; /* discard the output */ - d_stream.avail_out = (uInt)uncomprLen; - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "large inflate"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (d_stream.total_out != 2*uncomprLen + comprLen/2) { - fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out); - exit(1); - } else { - printf("large_inflate(): OK\n"); - } -} - -/* =========================================================================== - * Test deflate() with full flush - */ -void test_flush(Byte *compr, uLong comprLen) -{ - z_stream c_stream; /* compression stream */ - int err; - uInt len = (uInt)strlen(hello)+1; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_in = (z_const unsigned char *)hello; - c_stream.next_out = compr; - c_stream.avail_in = 3; - c_stream.avail_out = (uInt)*comprLen; - err = deflate(&c_stream, Z_FULL_FLUSH); - CHECK_ERR(err, "deflate"); - - compr[3]++; /* force an error in first compressed block */ - c_stream.avail_in = len - 3; - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - CHECK_ERR(err, "deflate"); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); - - *comprLen = c_stream.total_out; -} - -/* =========================================================================== - * Test inflateSync() - */ -void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, uLong uncomprLen) -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = 2; /* just read the zlib header */ - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - d_stream.next_out = uncompr; - d_stream.avail_out = (uInt)uncomprLen; - - inflate(&d_stream, Z_NO_FLUSH); - CHECK_ERR(err, "inflate"); - - d_stream.avail_in = (uInt)comprLen-2; /* read all compressed data */ - err = inflateSync(&d_stream); /* but skip the damaged part */ - CHECK_ERR(err, "inflateSync"); - - err = inflate(&d_stream, Z_FINISH); - if (err != Z_DATA_ERROR) { - fprintf(stderr, "inflate should report DATA_ERROR\n"); - /* Because of incorrect adler32 */ - exit(1); - } - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - printf("after inflateSync(): hel%s\n", (char *)uncompr); -} - -/* =========================================================================== - * Test deflate() with preset dictionary - */ -void test_dict_deflate(Byte *compr, uLong comprLen) -{ - z_stream c_stream; /* compression stream */ - int err; - - c_stream.zalloc = zalloc; - c_stream.zfree = zfree; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_BEST_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - err = deflateSetDictionary(&c_stream, - (const Bytef*)dictionary, (int)sizeof(dictionary)); - CHECK_ERR(err, "deflateSetDictionary"); - - dictId = c_stream.adler; - c_stream.next_out = compr; - c_stream.avail_out = (uInt)comprLen; - - c_stream.next_in = (z_const unsigned char *)hello; - c_stream.avail_in = (uInt)strlen(hello)+1; - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "deflate should report Z_STREAM_END\n"); - exit(1); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with a preset dictionary - */ -void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr, - uLong uncomprLen) -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = zalloc; - d_stream.zfree = zfree; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = (uInt)comprLen; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - d_stream.next_out = uncompr; - d_stream.avail_out = (uInt)uncomprLen; - - for (;;) { - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - if (err == Z_NEED_DICT) { - if (d_stream.adler != dictId) { - fprintf(stderr, "unexpected dictionary"); - exit(1); - } - err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary, - (int)sizeof(dictionary)); - } - CHECK_ERR(err, "inflate with dict"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad inflate with dict\n"); - exit(1); - } else { - printf("inflate with dictionary: %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Usage: example [output.gz [input.gz]] - */ - -int main(int argc, char *argv[]) -{ - Byte *compr, *uncompr; - uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */ - uLong uncomprLen = comprLen; - static const char* myVersion = ZLIB_VERSION; - - if (zlibVersion()[0] != myVersion[0]) { - fprintf(stderr, "incompatible zlib version\n"); - exit(1); - - } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) { - fprintf(stderr, "warning: different zlib version\n"); - } - - printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", - ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); - - compr = (Byte*)calloc((uInt)comprLen, 1); - uncompr = (Byte*)calloc((uInt)uncomprLen, 1); - /* compr and uncompr are cleared to avoid reading uninitialized - * data and to ensure that uncompr compresses well. - */ - if (compr == Z_NULL || uncompr == Z_NULL) { - printf("out of memory\n"); - exit(1); - } - -#ifdef Z_SOLO - argc = strlen(argv[0]); -#else - test_compress(compr, comprLen, uncompr, uncomprLen); - - test_gzio((argc > 1 ? argv[1] : TESTFILE), - uncompr, uncomprLen); -#endif - - test_deflate(compr, comprLen); - test_inflate(compr, comprLen, uncompr, uncomprLen); - - test_large_deflate(compr, comprLen, uncompr, uncomprLen); - test_large_inflate(compr, comprLen, uncompr, uncomprLen); - - test_flush(compr, &comprLen); - test_sync(compr, comprLen, uncompr, uncomprLen); - comprLen = uncomprLen; - - test_dict_deflate(compr, comprLen); - test_dict_inflate(compr, comprLen, uncompr, uncomprLen); - - free(compr); - free(uncompr); - - return 0; -} diff --git a/zlibWrapper/examples/fitblk.c b/zlibWrapper/examples/fitblk.c deleted file mode 100644 index 8dc7071ee96c..000000000000 --- a/zlibWrapper/examples/fitblk.c +++ /dev/null @@ -1,254 +0,0 @@ -/* fitblk.c contains minimal changes required to be compiled with zlibWrapper: - * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h" - * - writing block to stdout was disabled */ - -/* fitblk.c: example of fitting compressed output to a specified size - Not copyrighted -- provided to the public domain - Version 1.1 25 November 2004 Mark Adler */ - -/* Version history: - 1.0 24 Nov 2004 First version - 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() - Use fixed-size, stack-allocated raw buffers - Simplify code moving compression to subroutines - Use assert() for internal errors - Add detailed description of approach - */ - -/* Approach to just fitting a requested compressed size: - - fitblk performs three compression passes on a portion of the input - data in order to determine how much of that input will compress to - nearly the requested output block size. The first pass generates - enough deflate blocks to produce output to fill the requested - output size plus a specified excess amount (see the EXCESS define - below). The last deflate block may go quite a bit past that, but - is discarded. The second pass decompresses and recompresses just - the compressed data that fit in the requested plus excess sized - buffer. The deflate process is terminated after that amount of - input, which is less than the amount consumed on the first pass. - The last deflate block of the result will be of a comparable size - to the final product, so that the header for that deflate block and - the compression ratio for that block will be about the same as in - the final product. The third compression pass decompresses the - result of the second step, but only the compressed data up to the - requested size minus an amount to allow the compressed stream to - complete (see the MARGIN define below). That will result in a - final compressed stream whose length is less than or equal to the - requested size. Assuming sufficient input and a requested size - greater than a few hundred bytes, the shortfall will typically be - less than ten bytes. - - If the input is short enough that the first compression completes - before filling the requested output size, then that compressed - stream is return with no recompression. - - EXCESS is chosen to be just greater than the shortfall seen in a - two pass approach similar to the above. That shortfall is due to - the last deflate block compressing more efficiently with a smaller - header on the second pass. EXCESS is set to be large enough so - that there is enough uncompressed data for the second pass to fill - out the requested size, and small enough so that the final deflate - block of the second pass will be close in size to the final deflate - block of the third and final pass. MARGIN is chosen to be just - large enough to assure that the final compression has enough room - to complete in all cases. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include "zstd_zlibwrapper.h" - -#define LOG_FITBLK(...) /*printf(__VA_ARGS__)*/ -#define local static - -/* print nastygram and leave */ -local void quit(char *why) -{ - fprintf(stderr, "fitblk abort: %s\n", why); - exit(1); -} - -#define RAWLEN 4096 /* intermediate uncompressed buffer size */ - -/* compress from file to def until provided buffer is full or end of - input reached; return last deflate() return value, or Z_ERRNO if - there was read error on the file */ -local int partcompress(FILE *in, z_streamp def) -{ - int ret, flush; - unsigned char raw[RAWLEN]; - - flush = Z_SYNC_FLUSH; - do { - def->avail_in = (uInt)fread(raw, 1, RAWLEN, in); - if (ferror(in)) - return Z_ERRNO; - def->next_in = raw; - if (feof(in)) - flush = Z_FINISH; - LOG_FITBLK("partcompress1 avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); - ret = deflate(def, flush); - LOG_FITBLK("partcompress2 ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); - assert(ret != Z_STREAM_ERROR); - } while (def->avail_out != 0 && flush == Z_SYNC_FLUSH); - return ret; -} - -/* recompress from inf's input to def's output; the input for inf and - the output for def are set in those structures before calling; - return last deflate() return value, or Z_MEM_ERROR if inflate() - was not able to allocate enough memory when it needed to */ -local int recompress(z_streamp inf, z_streamp def) -{ - int ret, flush; - unsigned char raw[RAWLEN]; - - flush = Z_NO_FLUSH; - LOG_FITBLK("recompress start\n"); - do { - /* decompress */ - inf->avail_out = RAWLEN; - inf->next_out = raw; - LOG_FITBLK("recompress1inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out); - ret = inflate(inf, Z_NO_FLUSH); - LOG_FITBLK("recompress2inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out); - assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && - ret != Z_NEED_DICT); - if (ret == Z_MEM_ERROR) - return ret; - - /* compress what was decompressed until done or no room */ - def->avail_in = RAWLEN - inf->avail_out; - def->next_in = raw; - if (inf->avail_out != 0) - flush = Z_FINISH; - LOG_FITBLK("recompress1deflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); - ret = deflate(def, flush); - LOG_FITBLK("recompress2deflate ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out); - assert(ret != Z_STREAM_ERROR); - } while (ret != Z_STREAM_END && def->avail_out != 0); - return ret; -} - -#define EXCESS 256 /* empirically determined stream overage */ -#define MARGIN 8 /* amount to back off for completion */ - -/* compress from stdin to fixed-size block on stdout */ -int main(int argc, char **argv) -{ - int ret; /* return code */ - unsigned size; /* requested fixed output block size */ - unsigned have; /* bytes written by deflate() call */ - unsigned char *blk; /* intermediate and final stream */ - unsigned char *tmp; /* close to desired size stream */ - z_stream def, inf; /* zlib deflate and inflate states */ - - /* get requested output size */ - if (argc != 2) - quit("need one argument: size of output block"); - ret = (int)strtol(argv[1], argv + 1, 10); - if (argv[1][0] != 0) - quit("argument must be a number"); - if (ret < 8) /* 8 is minimum zlib stream size */ - quit("need positive size of 8 or greater"); - size = (unsigned)ret; - - printf("zlib version %s\n", ZLIB_VERSION); - if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion()); - - /* allocate memory for buffers and compression engine */ - blk = (unsigned char*)malloc(size + EXCESS); - def.zalloc = Z_NULL; - def.zfree = Z_NULL; - def.opaque = Z_NULL; - ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); - if (ret != Z_OK || blk == NULL) - quit("out of memory"); - - /* compress from stdin until output full, or no more input */ - def.avail_out = size + EXCESS; - def.next_out = blk; - LOG_FITBLK("partcompress1 total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out); - ret = partcompress(stdin, &def); - printf("partcompress total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out); - if (ret == Z_ERRNO) - quit("error reading input"); - - /* if it all fit, then size was undersubscribed -- done! */ - if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { - /* write block to stdout */ - have = size + EXCESS - def.avail_out; - /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) - * quit("error writing output"); */ - - /* clean up and print results to stderr */ - ret = deflateEnd(&def); - assert(ret != Z_STREAM_ERROR); - free(blk); - fprintf(stderr, - "%u bytes unused out of %u requested (all input)\n", - size - have, size); - return 0; - } - - /* it didn't all fit -- set up for recompression */ - inf.zalloc = Z_NULL; - inf.zfree = Z_NULL; - inf.opaque = Z_NULL; - inf.avail_in = 0; - inf.next_in = Z_NULL; - ret = inflateInit(&inf); - tmp = (unsigned char*)malloc(size + EXCESS); - if (ret != Z_OK || tmp == NULL) - quit("out of memory"); - ret = deflateReset(&def); - assert(ret != Z_STREAM_ERROR); - - /* do first recompression close to the right amount */ - inf.avail_in = size + EXCESS; - inf.next_in = blk; - def.avail_out = size + EXCESS; - def.next_out = tmp; - LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); - ret = recompress(&inf, &def); - LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); - if (ret == Z_MEM_ERROR) - quit("out of memory"); - - /* set up for next recompression */ - ret = inflateReset(&inf); - assert(ret != Z_STREAM_ERROR); - ret = deflateReset(&def); - assert(ret != Z_STREAM_ERROR); - - /* do second and final recompression (third compression) */ - inf.avail_in = size - MARGIN; /* assure stream will complete */ - inf.next_in = tmp; - def.avail_out = size; - def.next_out = blk; - LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); - ret = recompress(&inf, &def); - LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out); - if (ret == Z_MEM_ERROR) - quit("out of memory"); - assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ - - /* done -- write block to stdout */ - have = size - def.avail_out; - /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) - * quit("error writing output"); */ - - /* clean up and print results to stderr */ - free(tmp); - ret = inflateEnd(&inf); - assert(ret != Z_STREAM_ERROR); - ret = deflateEnd(&def); - assert(ret != Z_STREAM_ERROR); - free(blk); - fprintf(stderr, - "%u bytes unused out of %u requested (%lu input)\n", - size - have, size, def.total_in); - return 0; -} diff --git a/zlibWrapper/examples/fitblk_original.c b/zlibWrapper/examples/fitblk_original.c deleted file mode 100644 index 723dc0028127..000000000000 --- a/zlibWrapper/examples/fitblk_original.c +++ /dev/null @@ -1,233 +0,0 @@ -/* fitblk.c: example of fitting compressed output to a specified size - Not copyrighted -- provided to the public domain - Version 1.1 25 November 2004 Mark Adler */ - -/* Version history: - 1.0 24 Nov 2004 First version - 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() - Use fixed-size, stack-allocated raw buffers - Simplify code moving compression to subroutines - Use assert() for internal errors - Add detailed description of approach - */ - -/* Approach to just fitting a requested compressed size: - - fitblk performs three compression passes on a portion of the input - data in order to determine how much of that input will compress to - nearly the requested output block size. The first pass generates - enough deflate blocks to produce output to fill the requested - output size plus a specified excess amount (see the EXCESS define - below). The last deflate block may go quite a bit past that, but - is discarded. The second pass decompresses and recompresses just - the compressed data that fit in the requested plus excess sized - buffer. The deflate process is terminated after that amount of - input, which is less than the amount consumed on the first pass. - The last deflate block of the result will be of a comparable size - to the final product, so that the header for that deflate block and - the compression ratio for that block will be about the same as in - the final product. The third compression pass decompresses the - result of the second step, but only the compressed data up to the - requested size minus an amount to allow the compressed stream to - complete (see the MARGIN define below). That will result in a - final compressed stream whose length is less than or equal to the - requested size. Assuming sufficient input and a requested size - greater than a few hundred bytes, the shortfall will typically be - less than ten bytes. - - If the input is short enough that the first compression completes - before filling the requested output size, then that compressed - stream is return with no recompression. - - EXCESS is chosen to be just greater than the shortfall seen in a - two pass approach similar to the above. That shortfall is due to - the last deflate block compressing more efficiently with a smaller - header on the second pass. EXCESS is set to be large enough so - that there is enough uncompressed data for the second pass to fill - out the requested size, and small enough so that the final deflate - block of the second pass will be close in size to the final deflate - block of the third and final pass. MARGIN is chosen to be just - large enough to assure that the final compression has enough room - to complete in all cases. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include "zlib.h" - -#define local static - -/* print nastygram and leave */ -local void quit(char *why) -{ - fprintf(stderr, "fitblk abort: %s\n", why); - exit(1); -} - -#define RAWLEN 4096 /* intermediate uncompressed buffer size */ - -/* compress from file to def until provided buffer is full or end of - input reached; return last deflate() return value, or Z_ERRNO if - there was read error on the file */ -local int partcompress(FILE *in, z_streamp def) -{ - int ret, flush; - unsigned char raw[RAWLEN]; - - flush = Z_NO_FLUSH; - do { - def->avail_in = fread(raw, 1, RAWLEN, in); - if (ferror(in)) - return Z_ERRNO; - def->next_in = raw; - if (feof(in)) - flush = Z_FINISH; - ret = deflate(def, flush); - assert(ret != Z_STREAM_ERROR); - } while (def->avail_out != 0 && flush == Z_NO_FLUSH); - return ret; -} - -/* recompress from inf's input to def's output; the input for inf and - the output for def are set in those structures before calling; - return last deflate() return value, or Z_MEM_ERROR if inflate() - was not able to allocate enough memory when it needed to */ -local int recompress(z_streamp inf, z_streamp def) -{ - int ret, flush; - unsigned char raw[RAWLEN]; - - flush = Z_NO_FLUSH; - do { - /* decompress */ - inf->avail_out = RAWLEN; - inf->next_out = raw; - ret = inflate(inf, Z_NO_FLUSH); - assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && - ret != Z_NEED_DICT); - if (ret == Z_MEM_ERROR) - return ret; - - /* compress what was decompressed until done or no room */ - def->avail_in = RAWLEN - inf->avail_out; - def->next_in = raw; - if (inf->avail_out != 0) - flush = Z_FINISH; - ret = deflate(def, flush); - assert(ret != Z_STREAM_ERROR); - } while (ret != Z_STREAM_END && def->avail_out != 0); - return ret; -} - -#define EXCESS 256 /* empirically determined stream overage */ -#define MARGIN 8 /* amount to back off for completion */ - -/* compress from stdin to fixed-size block on stdout */ -int main(int argc, char **argv) -{ - int ret; /* return code */ - unsigned size; /* requested fixed output block size */ - unsigned have; /* bytes written by deflate() call */ - unsigned char *blk; /* intermediate and final stream */ - unsigned char *tmp; /* close to desired size stream */ - z_stream def, inf; /* zlib deflate and inflate states */ - - /* get requested output size */ - if (argc != 2) - quit("need one argument: size of output block"); - ret = strtol(argv[1], argv + 1, 10); - if (argv[1][0] != 0) - quit("argument must be a number"); - if (ret < 8) /* 8 is minimum zlib stream size */ - quit("need positive size of 8 or greater"); - size = (unsigned)ret; - - /* allocate memory for buffers and compression engine */ - blk = malloc(size + EXCESS); - def.zalloc = Z_NULL; - def.zfree = Z_NULL; - def.opaque = Z_NULL; - ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); - if (ret != Z_OK || blk == NULL) - quit("out of memory"); - - /* compress from stdin until output full, or no more input */ - def.avail_out = size + EXCESS; - def.next_out = blk; - ret = partcompress(stdin, &def); - if (ret == Z_ERRNO) - quit("error reading input"); - - /* if it all fit, then size was undersubscribed -- done! */ - if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { - /* write block to stdout */ - have = size + EXCESS - def.avail_out; - if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) - quit("error writing output"); - - /* clean up and print results to stderr */ - ret = deflateEnd(&def); - assert(ret != Z_STREAM_ERROR); - free(blk); - fprintf(stderr, - "%u bytes unused out of %u requested (all input)\n", - size - have, size); - return 0; - } - - /* it didn't all fit -- set up for recompression */ - inf.zalloc = Z_NULL; - inf.zfree = Z_NULL; - inf.opaque = Z_NULL; - inf.avail_in = 0; - inf.next_in = Z_NULL; - ret = inflateInit(&inf); - tmp = malloc(size + EXCESS); - if (ret != Z_OK || tmp == NULL) - quit("out of memory"); - ret = deflateReset(&def); - assert(ret != Z_STREAM_ERROR); - - /* do first recompression close to the right amount */ - inf.avail_in = size + EXCESS; - inf.next_in = blk; - def.avail_out = size + EXCESS; - def.next_out = tmp; - ret = recompress(&inf, &def); - if (ret == Z_MEM_ERROR) - quit("out of memory"); - - /* set up for next recompression */ - ret = inflateReset(&inf); - assert(ret != Z_STREAM_ERROR); - ret = deflateReset(&def); - assert(ret != Z_STREAM_ERROR); - - /* do second and final recompression (third compression) */ - inf.avail_in = size - MARGIN; /* assure stream will complete */ - inf.next_in = tmp; - def.avail_out = size; - def.next_out = blk; - ret = recompress(&inf, &def); - if (ret == Z_MEM_ERROR) - quit("out of memory"); - assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ - - /* done -- write block to stdout */ - have = size - def.avail_out; - if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) - quit("error writing output"); - - /* clean up and print results to stderr */ - free(tmp); - ret = inflateEnd(&inf); - assert(ret != Z_STREAM_ERROR); - ret = deflateEnd(&def); - assert(ret != Z_STREAM_ERROR); - free(blk); - fprintf(stderr, - "%u bytes unused out of %u requested (%lu input)\n", - size - have, size, def.total_in); - return 0; -} diff --git a/zlibWrapper/examples/minigzip.c b/zlibWrapper/examples/minigzip.c deleted file mode 100644 index ef48d74d2906..000000000000 --- a/zlibWrapper/examples/minigzip.c +++ /dev/null @@ -1,605 +0,0 @@ -/* minigzip.c contains minimal changes required to be compiled with zlibWrapper: - * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h" */ - -/* minigzip.c -- simulate gzip using the zlib compression library - * Copyright (C) 1995-2006, 2010, 2011 Jean-loup Gailly. - * For conditions of distribution and use, see https://www.zlib.net/zlib_license.html - */ - -/* - * minigzip is a minimal implementation of the gzip utility. This is - * only an example of using zlib and isn't meant to replace the - * full-featured gzip. No attempt is made to deal with file systems - * limiting names to 14 or 8+3 characters, etc... Error checking is - * very limited. So use minigzip only for testing; use gzip for the - * real thing. On MSDOS, use only on file names without extension - * or in pipe mode. - */ - -/* @(#) $Id$ */ - -#define _POSIX_SOURCE /* fileno */ - -#include "zstd_zlibwrapper.h" -#include <stdio.h> - -#ifdef STDC -# include <string.h> -# include <stdlib.h> -#endif - -#ifdef USE_MMAP -# include <sys/types.h> -# include <sys/mman.h> -# include <sys/stat.h> -#endif - -#if defined(MSDOS) || defined(OS2) || defined(_WIN32) || defined(__CYGWIN__) -# include <fcntl.h> -# include <io.h> -# ifdef UNDER_CE -# include <stdlib.h> -# endif -# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) -#else -# define SET_BINARY_MODE(file) -#endif - -#ifdef _MSC_VER -# define snprintf _snprintf -#endif - -#ifdef VMS -# define unlink delete -# define GZ_SUFFIX "-gz" -#endif -#ifdef RISCOS -# define unlink remove -# define GZ_SUFFIX "-gz" -# define fileno(file) file->__file -#endif -#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include <unix.h> /* for fileno */ -#endif - -#if !defined(Z_HAVE_UNISTD_H) && !defined(_LARGEFILE64_SOURCE) -#ifndef _WIN32 /* unlink already in stdio.h for WIN32 */ - extern int unlink _Z_OF((const char *)); -#endif -#endif - -#if defined(UNDER_CE) -# include <windows.h> -# define perror(s) pwinerror(s) - -/* Map the Windows error number in ERROR to a locale-dependent error - message string and return a pointer to it. Typically, the values - for ERROR come from GetLastError. - - The string pointed to shall not be modified by the application, - but may be overwritten by a subsequent call to strwinerror - - The strwinerror function does not change the current setting - of GetLastError. */ - -static char *strwinerror(DWORD error) -{ - static char buf[1024]; - - wchar_t *msgbuf; - DWORD lasterr = GetLastError(); - DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM - | FORMAT_MESSAGE_ALLOCATE_BUFFER, - NULL, - error, - 0, /* Default language */ - (LPVOID)&msgbuf, - 0, - NULL); - if (chars != 0) { - /* If there is an \r\n appended, zap it. */ - if (chars >= 2 - && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { - chars -= 2; - msgbuf[chars] = 0; - } - - if (chars > sizeof (buf) - 1) { - chars = sizeof (buf) - 1; - msgbuf[chars] = 0; - } - - wcstombs(buf, msgbuf, chars + 1); - LocalFree(msgbuf); - } - else { - sprintf(buf, "unknown win32 error (%ld)", error); - } - - SetLastError(lasterr); - return buf; -} - -static void pwinerror (const char *s) -{ - if (s && *s) - fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ())); - else - fprintf(stderr, "%s\n", strwinerror(GetLastError ())); -} - -#endif /* UNDER_CE */ - -#ifndef GZ_SUFFIX -# define GZ_SUFFIX ".gz" -#endif -#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) - -#define BUFLEN 16384 -#define MAX_NAME_LEN 1024 - -#ifdef MAXSEG_64K -# define local static - /* Needed for systems with limitation on stack size. */ -#else -# define local -#endif - -#ifdef Z_SOLO -/* for Z_SOLO, create simplified gz* functions using deflate and inflate */ - -#if defined(Z_HAVE_UNISTD_H) || defined(Z_LARGE) -# include <unistd.h> /* for unlink() */ -#endif - -void *myalloc _Z_OF((void *, unsigned, unsigned)); -void myfree _Z_OF((void *, void *)); - -void *myalloc(q, n, m) - void *q; - unsigned n, m; -{ - q = Z_NULL; - return calloc(n, m); -} - -void myfree(q, p) - void *q, *p; -{ - q = Z_NULL; - free(p); -} - -typedef struct gzFile_s { - FILE *file; - int write; - int err; - char *msg; - z_stream strm; -} *gzFile; - -gzFile gzopen _Z_OF((const char *, const char *)); -gzFile gzdopen _Z_OF((int, const char *)); -gzFile gz_open _Z_OF((const char *, int, const char *)); - -gzFile gzopen(path, mode) -const char *path; -const char *mode; -{ - return gz_open(path, -1, mode); -} - -gzFile gzdopen(fd, mode) -int fd; -const char *mode; -{ - return gz_open(NULL, fd, mode); -} - -gzFile gz_open(const char *path, int fd, const char *mode) { - gzFile gz; - int ret; - - gz = malloc(sizeof(struct gzFile_s)); - if (gz == NULL) - return NULL; - gz->write = strchr(mode, 'w') != NULL; - gz->strm.zalloc = myalloc; - gz->strm.zfree = myfree; - gz->strm.opaque = Z_NULL; - if (gz->write) - ret = deflateInit2(&(gz->strm), -1, 8, 15 + 16, 8, 0); - else { - gz->strm.next_in = 0; - gz->strm.avail_in = Z_NULL; - ret = inflateInit2(&(gz->strm), 15 + 16); - } - if (ret != Z_OK) { - free(gz); - return NULL; - } - gz->file = path == NULL ? fdopen(fd, gz->write ? "wb" : "rb") : - fopen(path, gz->write ? "wb" : "rb"); - if (gz->file == NULL) { - gz->write ? deflateEnd(&(gz->strm)) : inflateEnd(&(gz->strm)); - free(gz); - return NULL; - } - gz->err = 0; - gz->msg = ""; - return gz; -} - -int gzwrite _Z_OF((gzFile, const void *, unsigned)); - -int gzwrite(gzFile gz, const void *buf, unsigned len) { - z_stream *strm; - unsigned char out[BUFLEN] = { 0 }; - - if (gz == NULL || !gz->write) - return 0; - strm = &(gz->strm); - strm->next_in = (void *)buf; - strm->avail_in = len; - do { - strm->next_out = out; - strm->avail_out = BUFLEN; - (void)deflate(strm, Z_NO_FLUSH); - fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); - } while (strm->avail_out == 0); - return len; -} - -int gzread _Z_OF((gzFile, void *, unsigned)); - -int gzread(gzFile gz, void *buf, unsigned len) { - int ret; - unsigned got; - unsigned char in[1]; - z_stream *strm; - - if (gz == NULL || gz->write) - return 0; - if (gz->err) - return 0; - strm = &(gz->strm); - strm->next_out = (void *)buf; - strm->avail_out = len; - do { - got = fread(in, 1, 1, gz->file); - if (got == 0) - break; - strm->next_in = in; - strm->avail_in = 1; - ret = inflate(strm, Z_NO_FLUSH); - if (ret == Z_DATA_ERROR) { - gz->err = Z_DATA_ERROR; - gz->msg = strm->msg; - return 0; - } - if (ret == Z_STREAM_END) - inflateReset(strm); - } while (strm->avail_out); - return len - strm->avail_out; -} - -int gzclose _Z_OF((gzFile)); - -int gzclose(gzFile gz) { - z_stream *strm; - unsigned char out[BUFLEN] = { 0 }; - - if (gz == NULL) - return Z_STREAM_ERROR; - strm = &(gz->strm); - if (gz->write) { - strm->next_in = Z_NULL; - strm->avail_in = 0; - do { - strm->next_out = out; - strm->avail_out = BUFLEN; - (void)deflate(strm, Z_FINISH); - fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); - } while (strm->avail_out == 0); - deflateEnd(strm); - } - else - inflateEnd(strm); - fclose(gz->file); - free(gz); - return Z_OK; -} - -const char *gzerror _Z_OF((gzFile, int *)); - -const char *gzerror(gzFile gz, int *err) -{ - *err = gz->err; - return gz->msg; -} - -#endif - -char *prog; - -void error _Z_OF((const char *msg)); -void gz_compress _Z_OF((FILE *in, gzFile out)); -#ifdef USE_MMAP -int gz_compress_mmap _Z_OF((FILE *in, gzFile out)); -#endif -void gz_uncompress _Z_OF((gzFile in, FILE *out)); -void file_compress _Z_OF((char *file, char *mode)); -void file_uncompress _Z_OF((char *file)); -int main _Z_OF((int argc, char *argv[])); - -/* =========================================================================== - * Display error message and exit - */ -void error(const char *msg) -{ - fprintf(stderr, "%s: %s\n", prog, msg); - exit(1); -} - -/* =========================================================================== - * Compress input to output then close both files. - */ - -void gz_compress(FILE *in, gzFile out) -{ - local char buf[BUFLEN]; - int len; - int err; - -#ifdef USE_MMAP - /* Try first compressing with mmap. If mmap fails (minigzip used in a - * pipe), use the normal fread loop. - */ - if (gz_compress_mmap(in, out) == Z_OK) return; -#endif - for (;;) { - len = (int)fread(buf, 1, sizeof(buf), in); - if (ferror(in)) { - perror("fread"); - exit(1); - } - if (len == 0) break; - - if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err)); - } - fclose(in); - if (gzclose(out) != Z_OK) error("failed gzclose"); -} - -#ifdef USE_MMAP /* MMAP version, Miguel Albrecht <malbrech@eso.org> */ - -/* Try compressing the input file at once using mmap. Return Z_OK if - * if success, Z_ERRNO otherwise. - */ -int gz_compress_mmap(FILE *in, gzFile out) { - int len; - int err; - int ifd = fileno(in); - caddr_t buf; /* mmap'ed buffer for the entire input file */ - off_t buf_len; /* length of the input file */ - struct stat sb; - - /* Determine the size of the file, needed for mmap: */ - if (fstat(ifd, &sb) < 0) return Z_ERRNO; - buf_len = sb.st_size; - if (buf_len <= 0) return Z_ERRNO; - - /* Now do the actual mmap: */ - buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0); - if (buf == (caddr_t)(-1)) return Z_ERRNO; - - /* Compress the whole file at once: */ - len = gzwrite(out, (char *)buf, (unsigned)buf_len); - - if (len != (int)buf_len) error(gzerror(out, &err)); - - munmap(buf, buf_len); - fclose(in); - if (gzclose(out) != Z_OK) error("failed gzclose"); - return Z_OK; -} -#endif /* USE_MMAP */ - -/* =========================================================================== - * Uncompress input to output then close both files. - */ -void gz_uncompress(gzFile in, FILE *out) { - local char buf[BUFLEN]; - int len; - int err; - - for (;;) { - len = gzread(in, buf, sizeof(buf)); - if (len < 0) error (gzerror(in, &err)); - if (len == 0) break; - - if ((int)fwrite(buf, 1, (unsigned)len, out) != len) { - error("failed fwrite"); - } - } - if (fclose(out)) error("failed fclose"); - - if (gzclose(in) != Z_OK) error("failed gzclose"); -} - - -/* =========================================================================== - * Compress the given file: create a corresponding .gz file and remove the - * original. - */ -void file_compress(char *file, char *mode) { - local char outfile[MAX_NAME_LEN]; - FILE *in; - gzFile out; - - if (strlen(file) + strlen(GZ_SUFFIX) >= sizeof(outfile)) { - fprintf(stderr, "%s: filename too long\n", prog); - exit(1); - } - - strcpy(outfile, file); - strcat(outfile, GZ_SUFFIX); - - in = fopen(file, "rb"); - if (in == NULL) { - perror(file); - exit(1); - } - out = gzopen(outfile, mode); - if (out == NULL) { - fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile); - exit(1); - } - gz_compress(in, out); - - unlink(file); -} - - -/* =========================================================================== - * Uncompress the given file and remove the original. - */ -void file_uncompress(char *file) { - local char buf[MAX_NAME_LEN]; - char *infile, *outfile; - FILE *out; - gzFile in; - size_t len = strlen(file); - - if (len + strlen(GZ_SUFFIX) >= sizeof(buf)) { - fprintf(stderr, "%s: filename too long\n", prog); - exit(1); - } - - strcpy(buf, file); - - if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) { - infile = file; - outfile = buf; - outfile[len-3] = '\0'; - } else { - outfile = file; - infile = buf; - strcat(infile, GZ_SUFFIX); - } - in = gzopen(infile, "rb"); - if (in == NULL) { - fprintf(stderr, "%s: can't gzopen %s\n", prog, infile); - exit(1); - } - out = fopen(outfile, "wb"); - if (out == NULL) { - perror(file); - exit(1); - } - - gz_uncompress(in, out); - - unlink(infile); -} - - -/* =========================================================================== - * Usage: minigzip [-c] [-d] [-f] [-h] [-r] [-1 to -9] [files...] - * -c : write to standard output - * -d : decompress - * -f : compress with Z_FILTERED - * -h : compress with Z_HUFFMAN_ONLY - * -r : compress with Z_RLE - * -1 to -9 : compression level - */ - -int main(int argc, char *argv[]) { - int copyout = 0; - int uncompr = 0; - gzFile file; - char *bname, outmode[20]; - - strcpy(outmode, "wb6 "); - - prog = argv[0]; - bname = strrchr(argv[0], '/'); - if (bname) - bname++; - else - bname = argv[0]; - argc--, argv++; - - if (!strcmp(bname, "gunzip")) - uncompr = 1; - else if (!strcmp(bname, "zcat")) - copyout = uncompr = 1; - - while (argc > 0) { - if (strcmp(*argv, "-c") == 0) - copyout = 1; - else if (strcmp(*argv, "-d") == 0) - uncompr = 1; - else if (strcmp(*argv, "-f") == 0) - outmode[3] = 'f'; - else if (strcmp(*argv, "-h") == 0) - outmode[3] = 'h'; - else if (strcmp(*argv, "-r") == 0) - outmode[3] = 'R'; - else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' && - (*argv)[2] == 0) - outmode[2] = (*argv)[1]; - else - break; - argc--, argv++; - } - if (outmode[3] == ' ') - outmode[3] = 0; - if (argc == 0) { - SET_BINARY_MODE(stdin); - SET_BINARY_MODE(stdout); - if (uncompr) { - file = gzdopen(fileno(stdin), "rb"); - if (file == NULL) error("can't gzdopen stdin"); - gz_uncompress(file, stdout); - } else { - file = gzdopen(fileno(stdout), outmode); - if (file == NULL) error("can't gzdopen stdout"); - gz_compress(stdin, file); - } - } else { - if (copyout) { - SET_BINARY_MODE(stdout); - } - do { - if (uncompr) { - if (copyout) { - file = gzopen(*argv, "rb"); - if (file == NULL) - fprintf(stderr, "%s: can't gzopen %s\n", prog, *argv); - else - gz_uncompress(file, stdout); - } else { - file_uncompress(*argv); - } - } else { - if (copyout) { - FILE * in = fopen(*argv, "rb"); - - if (in == NULL) { - perror(*argv); - } else { - file = gzdopen(fileno(stdout), outmode); - if (file == NULL) error("can't gzdopen stdout"); - - gz_compress(in, file); - } - - } else { - file_compress(*argv, outmode); - } - } - } while (argv++, --argc); - } - return 0; -} diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c deleted file mode 100644 index 33dcb2a7339c..000000000000 --- a/zlibWrapper/examples/zwrapbench.c +++ /dev/null @@ -1,1018 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - - -/* ************************************* -* Includes -***************************************/ -#include "util.h" /* Compiler options, UTIL_GetFileSize, UTIL_sleep */ -#include <stdlib.h> /* malloc, free */ -#include <string.h> /* memset */ -#include <stdio.h> /* fprintf, fopen, ftello64 */ -#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ -#include <ctype.h> /* toupper */ -#include <errno.h> /* errno */ - -#include "timefn.h" /* UTIL_time_t, UTIL_getTime, UTIL_clockSpanMicro, UTIL_waitForNextTick */ -#include "mem.h" -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -#include "datagen.h" /* RDG_genBuffer */ -#include "xxhash.h" - -#include "../zstd_zlibwrapper.h" - - - -/*-************************************ -* Tuning parameters -**************************************/ -#ifndef ZSTDCLI_CLEVEL_DEFAULT -# define ZSTDCLI_CLEVEL_DEFAULT 3 -#endif - - -/*-************************************ -* Constants -**************************************/ -#define COMPRESSOR_NAME "Zstandard wrapper for zlib command line interface" -#ifndef ZSTD_VERSION -# define ZSTD_VERSION "v" ZSTD_VERSION_STRING -#endif -#define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR - -#ifndef ZSTD_GIT_COMMIT -# define ZSTD_GIT_COMMIT_STRING "" -#else -# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) -#endif - -#define NBLOOPS 3 -#define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */ -#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */ -#define COOLPERIOD_SEC 10 - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); - -static U32 g_compressibilityDefault = 50; - - -/* ************************************* -* console display -***************************************/ -#define DEFAULT_DISPLAY_LEVEL 2 -#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } -static unsigned g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ -static FILE* displayOut; - -#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ - if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \ - { g_time = clock(); DISPLAY(__VA_ARGS__); \ - if (g_displayLevel>=4) fflush(displayOut); } } -static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; -static clock_t g_time = 0; - - -/* ************************************* -* Exceptions -***************************************/ -#ifndef DEBUG -# define DEBUG 0 -#endif -#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } -#define EXM_THROW(error, ...) \ -{ \ - DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ - DISPLAYLEVEL(1, "Error %i : ", error); \ - DISPLAYLEVEL(1, __VA_ARGS__); \ - DISPLAYLEVEL(1, "\n"); \ - exit(error); \ -} - - -/* ************************************* -* Benchmark Parameters -***************************************/ -static unsigned g_nbIterations = NBLOOPS; -static size_t g_blockSize = 0; -int g_additionalParam = 0; - -static void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } - -static void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } - -static void BMK_SetNbIterations(unsigned nbLoops) -{ - g_nbIterations = nbLoops; - DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbIterations); -} - -static void BMK_SetBlockSize(size_t blockSize) -{ - g_blockSize = blockSize; - DISPLAYLEVEL(2, "using blocks of size %u KB \n", (unsigned)(blockSize>>10)); -} - - -/* ******************************************************** -* Bench functions -**********************************************************/ -#undef MIN -#undef MAX -#define MIN(a,b) ((a)<(b) ? (a) : (b)) -#define MAX(a,b) ((a)>(b) ? (a) : (b)) - -typedef struct -{ - z_const char* srcPtr; - size_t srcSize; - char* cPtr; - size_t cRoom; - size_t cSize; - char* resPtr; - size_t resSize; -} blockParam_t; - -typedef enum { BMK_ZSTD, BMK_ZSTD_STREAM, BMK_ZLIB, BMK_ZWRAP_ZLIB, BMK_ZWRAP_ZSTD, BMK_ZLIB_REUSE, BMK_ZWRAP_ZLIB_REUSE, BMK_ZWRAP_ZSTD_REUSE } BMK_compressor; - - -static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize, - const char* displayName, int cLevel, - const size_t* fileSizes, U32 nbFiles, - const void* dictBuffer, size_t dictBufferSize, BMK_compressor compressor) -{ - size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; - size_t const avgSize = MIN(g_blockSize, (srcSize / nbFiles)); - U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; - blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ - void* const compressedBuffer = malloc(maxCompressedSize); - void* const resultBuffer = malloc(srcSize); - ZSTD_CCtx* const ctx = ZSTD_createCCtx(); - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - U32 nbBlocks; - - /* checks */ - if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx) - EXM_THROW(31, "allocation error : not enough memory"); - - /* init */ - if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ - - /* Init blockTable data */ - { z_const char* srcPtr = (z_const char*)srcBuffer; - char* cPtr = (char*)compressedBuffer; - char* resPtr = (char*)resultBuffer; - U32 fileNb; - for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) { - size_t remaining = fileSizes[fileNb]; - U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize); - U32 const blockEnd = nbBlocks + nbBlocksforThisFile; - for ( ; nbBlocks<blockEnd; nbBlocks++) { - size_t const thisBlockSize = MIN(remaining, blockSize); - blockTable[nbBlocks].srcPtr = srcPtr; - blockTable[nbBlocks].cPtr = cPtr; - blockTable[nbBlocks].resPtr = resPtr; - blockTable[nbBlocks].srcSize = thisBlockSize; - blockTable[nbBlocks].cRoom = ZSTD_compressBound(thisBlockSize); - srcPtr += thisBlockSize; - cPtr += blockTable[nbBlocks].cRoom; - resPtr += thisBlockSize; - remaining -= thisBlockSize; - } } } - - /* warming up memory */ - RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); - - /* Bench */ - { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); - U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); - UTIL_time_t coolTime; - U64 const maxTime = (g_nbIterations * TIMELOOP_MICROSEC) + 100; - U64 totalCTime=0, totalDTime=0; - U32 cCompleted=0, dCompleted=0; -# define NB_MARKS 4 - const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; - U32 markNb = 0; - size_t cSize = 0; - double ratio = 0.; - - coolTime = UTIL_getTime(); - DISPLAYLEVEL(2, "\r%79s\r", ""); - while (!cCompleted | !dCompleted) { - UTIL_time_t clockStart; - U64 clockLoop = g_nbIterations ? TIMELOOP_MICROSEC : 1; - - /* overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DISPLAYLEVEL(2, "\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); - } - - /* Compression */ - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (unsigned)srcSize); - if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ - - UTIL_sleepMilli(1); /* give processor time to other processes */ - UTIL_waitForNextTick(); - clockStart = UTIL_getTime(); - - if (!cCompleted) { /* still some time to do compression tests */ - U32 nbLoops = 0; - if (compressor == BMK_ZSTD) { - ZSTD_parameters const zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); - ZSTD_customMem const cmem = { NULL, NULL, NULL }; - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_auto, zparams.cParams, cmem); - if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure"); - - do { - U32 blockNb; - size_t rSize; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - if (dictBufferSize) { - rSize = ZSTD_compress_usingCDict(ctx, - blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, - blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, - cdict); - } else { - rSize = ZSTD_compressCCtx (ctx, - blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, - blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, cLevel); - } - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingCDict() failed : %s", ZSTD_getErrorName(rSize)); - blockTable[blockNb].cSize = rSize; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - ZSTD_freeCDict(cdict); - } else if (compressor == BMK_ZSTD_STREAM) { - ZSTD_parameters const zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); - ZSTD_inBuffer inBuffer; - ZSTD_outBuffer outBuffer; - ZSTD_CStream* zbc = ZSTD_createCStream(); - size_t rSize; - ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); - - if (!cctxParams) EXM_THROW(1, "ZSTD_createCCtxParams() allocation failure"); - if (zbc == NULL) EXM_THROW(1, "ZSTD_createCStream() allocation failure"); - - { int initErr = 0; - initErr |= ZSTD_isError(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only)); - initErr |= ZSTD_isError(ZSTD_CCtxParams_init_advanced(cctxParams, zparams)); - initErr |= ZSTD_isError(ZSTD_CCtx_setParametersUsingCCtxParams(zbc, cctxParams)); - initErr |= ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(zbc, avgSize)); - initErr |= ZSTD_isError(ZSTD_CCtx_loadDictionary(zbc, dictBuffer, dictBufferSize)); - - ZSTD_freeCCtxParams(cctxParams); - if (initErr) EXM_THROW(1, "CCtx init failed!"); - } - - do { - U32 blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - rSize = ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_CCtx_reset() failed : %s", ZSTD_getErrorName(rSize)); - rSize = ZSTD_CCtx_setPledgedSrcSize(zbc, blockTable[blockNb].srcSize); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_CCtx_setPledgedSrcSize() failed : %s", ZSTD_getErrorName(rSize)); - inBuffer.src = blockTable[blockNb].srcPtr; - inBuffer.size = blockTable[blockNb].srcSize; - inBuffer.pos = 0; - outBuffer.dst = blockTable[blockNb].cPtr; - outBuffer.size = blockTable[blockNb].cRoom; - outBuffer.pos = 0; - rSize = ZSTD_compressStream(zbc, &outBuffer, &inBuffer); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compressStream() failed : %s", ZSTD_getErrorName(rSize)); - rSize = ZSTD_endStream(zbc, &outBuffer); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_endStream() failed : %s", ZSTD_getErrorName(rSize)); - blockTable[blockNb].cSize = outBuffer.pos; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - ZSTD_freeCStream(zbc); - } else if (compressor == BMK_ZWRAP_ZLIB_REUSE || compressor == BMK_ZWRAP_ZSTD_REUSE || compressor == BMK_ZLIB_REUSE) { - z_stream def; - int ret; - int useSetDict = (dictBuffer != NULL); - if (compressor == BMK_ZLIB_REUSE || compressor == BMK_ZWRAP_ZLIB_REUSE) ZWRAP_useZSTDcompression(0); - else ZWRAP_useZSTDcompression(1); - def.zalloc = Z_NULL; - def.zfree = Z_NULL; - def.opaque = Z_NULL; - ret = deflateInit(&def, cLevel); - if (ret != Z_OK) EXM_THROW(1, "deflateInit failure"); - /* if (ZWRAP_isUsingZSTDcompression()) { - ret = ZWRAP_setPledgedSrcSize(&def, avgSize); - if (ret != Z_OK) EXM_THROW(1, "ZWRAP_setPledgedSrcSize failure"); - } */ - do { - U32 blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - if (ZWRAP_isUsingZSTDcompression()) - ret = ZWRAP_deflateReset_keepDict(&def); /* reuse dictionary to make compression faster */ - else - ret = deflateReset(&def); - if (ret != Z_OK) EXM_THROW(1, "deflateReset failure"); - if (useSetDict) { - ret = deflateSetDictionary(&def, (const z_Bytef*)dictBuffer, dictBufferSize); - if (ret != Z_OK) EXM_THROW(1, "deflateSetDictionary failure"); - if (ZWRAP_isUsingZSTDcompression()) useSetDict = 0; /* zstd doesn't require deflateSetDictionary after ZWRAP_deflateReset_keepDict */ - } - def.next_in = (z_const z_Bytef*) blockTable[blockNb].srcPtr; - def.avail_in = (uInt)blockTable[blockNb].srcSize; - def.total_in = 0; - def.next_out = (z_Bytef*) blockTable[blockNb].cPtr; - def.avail_out = (uInt)blockTable[blockNb].cRoom; - def.total_out = 0; - ret = deflate(&def, Z_FINISH); - if (ret != Z_STREAM_END) EXM_THROW(1, "deflate failure ret=%d srcSize=%d" , ret, (int)blockTable[blockNb].srcSize); - blockTable[blockNb].cSize = def.total_out; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - ret = deflateEnd(&def); - if (ret != Z_OK) EXM_THROW(1, "deflateEnd failure"); - } else { - z_stream def; - if (compressor == BMK_ZLIB || compressor == BMK_ZWRAP_ZLIB) ZWRAP_useZSTDcompression(0); - else ZWRAP_useZSTDcompression(1); - do { - U32 blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - int ret; - def.zalloc = Z_NULL; - def.zfree = Z_NULL; - def.opaque = Z_NULL; - ret = deflateInit(&def, cLevel); - if (ret != Z_OK) EXM_THROW(1, "deflateInit failure"); - if (dictBuffer) { - ret = deflateSetDictionary(&def, (const z_Bytef*)dictBuffer, dictBufferSize); - if (ret != Z_OK) EXM_THROW(1, "deflateSetDictionary failure"); - } - def.next_in = (z_const z_Bytef*) blockTable[blockNb].srcPtr; - def.avail_in = (uInt)blockTable[blockNb].srcSize; - def.total_in = 0; - def.next_out = (z_Bytef*) blockTable[blockNb].cPtr; - def.avail_out = (uInt)blockTable[blockNb].cRoom; - def.total_out = 0; - ret = deflate(&def, Z_FINISH); - if (ret != Z_STREAM_END) EXM_THROW(1, "deflate failure"); - ret = deflateEnd(&def); - if (ret != Z_OK) EXM_THROW(1, "deflateEnd failure"); - blockTable[blockNb].cSize = def.total_out; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - } - { U64 const clockSpan = UTIL_clockSpanMicro(clockStart); - if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops; - totalCTime += clockSpan; - cCompleted = totalCTime>maxTime; - } } - - cSize = 0; - { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; } - ratio = (double)srcSize / (double)cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r", - marks[markNb], displayName, (unsigned)srcSize, (unsigned)cSize, ratio, - (double)srcSize / (double)fastestC ); - - (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#if 1 - /* Decompression */ - if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ - - UTIL_sleepMilli(1); /* give processor time to other processes */ - UTIL_waitForNextTick(); - clockStart = UTIL_getTime(); - - if (!dCompleted) { - U32 nbLoops = 0; - if (compressor == BMK_ZSTD) { - ZSTD_DDict* ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); - if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); - do { - unsigned blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - size_t const regenSize = ZSTD_decompress_usingDDict(dctx, - blockTable[blockNb].resPtr, blockTable[blockNb].srcSize, - blockTable[blockNb].cPtr, blockTable[blockNb].cSize, - ddict); - if (ZSTD_isError(regenSize)) { - DISPLAY("ZSTD_decompress_usingDDict() failed on block %u : %s \n", - blockNb, ZSTD_getErrorName(regenSize)); - clockLoop = 0; /* force immediate test end */ - break; - } - blockTable[blockNb].resSize = regenSize; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - ZSTD_freeDDict(ddict); - } else if (compressor == BMK_ZSTD_STREAM) { - ZSTD_inBuffer inBuffer; - ZSTD_outBuffer outBuffer; - ZSTD_DStream* zbd = ZSTD_createDStream(); - size_t rSize; - if (zbd == NULL) EXM_THROW(1, "ZSTD_createDStream() allocation failure"); - rSize = ZSTD_DCtx_reset(zbd, ZSTD_reset_session_only); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_DCtx_reset() failed : %s", ZSTD_getErrorName(rSize)); - rSize = ZSTD_DCtx_loadDictionary(zbd, dictBuffer, dictBufferSize); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_DCtx_loadDictionary() failed : %s", ZSTD_getErrorName(rSize)); - do { - U32 blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - rSize = ZSTD_DCtx_reset(zbd, ZSTD_reset_session_only); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_DCtx_reset() failed : %s", ZSTD_getErrorName(rSize)); - inBuffer.src = blockTable[blockNb].cPtr; - inBuffer.size = blockTable[blockNb].cSize; - inBuffer.pos = 0; - outBuffer.dst = blockTable[blockNb].resPtr; - outBuffer.size = blockTable[blockNb].srcSize; - outBuffer.pos = 0; - rSize = ZSTD_decompressStream(zbd, &outBuffer, &inBuffer); - if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_decompressStream() failed : %s", ZSTD_getErrorName(rSize)); - blockTable[blockNb].resSize = outBuffer.pos; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - ZSTD_freeDStream(zbd); - } else if (compressor == BMK_ZWRAP_ZLIB_REUSE || compressor == BMK_ZWRAP_ZSTD_REUSE || compressor == BMK_ZLIB_REUSE) { - z_stream inf; - int ret; - if (compressor == BMK_ZLIB_REUSE) ZWRAP_setDecompressionType(ZWRAP_FORCE_ZLIB); - else ZWRAP_setDecompressionType(ZWRAP_AUTO); - inf.zalloc = Z_NULL; - inf.zfree = Z_NULL; - inf.opaque = Z_NULL; - ret = inflateInit(&inf); - if (ret != Z_OK) EXM_THROW(1, "inflateInit failure"); - do { - U32 blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - if (ZWRAP_isUsingZSTDdecompression(&inf)) - ret = ZWRAP_inflateReset_keepDict(&inf); /* reuse dictionary to make decompression faster; inflate will return Z_NEED_DICT only for the first time */ - else - ret = inflateReset(&inf); - if (ret != Z_OK) EXM_THROW(1, "inflateReset failure"); - inf.next_in = (z_const z_Bytef*) blockTable[blockNb].cPtr; - inf.avail_in = (uInt)blockTable[blockNb].cSize; - inf.total_in = 0; - inf.next_out = (z_Bytef*) blockTable[blockNb].resPtr; - inf.avail_out = (uInt)blockTable[blockNb].srcSize; - inf.total_out = 0; - ret = inflate(&inf, Z_FINISH); - if (ret == Z_NEED_DICT) { - ret = inflateSetDictionary(&inf, (const z_Bytef*)dictBuffer, dictBufferSize); - if (ret != Z_OK) EXM_THROW(1, "inflateSetDictionary failure"); - ret = inflate(&inf, Z_FINISH); - } - if (ret != Z_STREAM_END) EXM_THROW(1, "inflate failure"); - blockTable[blockNb].resSize = inf.total_out; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - ret = inflateEnd(&inf); - if (ret != Z_OK) EXM_THROW(1, "inflateEnd failure"); - } else { - z_stream inf; - if (compressor == BMK_ZLIB) ZWRAP_setDecompressionType(ZWRAP_FORCE_ZLIB); - else ZWRAP_setDecompressionType(ZWRAP_AUTO); - do { - U32 blockNb; - for (blockNb=0; blockNb<nbBlocks; blockNb++) { - int ret; - inf.zalloc = Z_NULL; - inf.zfree = Z_NULL; - inf.opaque = Z_NULL; - ret = inflateInit(&inf); - if (ret != Z_OK) EXM_THROW(1, "inflateInit failure"); - inf.next_in = (z_const z_Bytef*) blockTable[blockNb].cPtr; - inf.avail_in = (uInt)blockTable[blockNb].cSize; - inf.total_in = 0; - inf.next_out = (z_Bytef*) blockTable[blockNb].resPtr; - inf.avail_out = (uInt)blockTable[blockNb].srcSize; - inf.total_out = 0; - ret = inflate(&inf, Z_FINISH); - if (ret == Z_NEED_DICT) { - ret = inflateSetDictionary(&inf, (const z_Bytef*) dictBuffer, dictBufferSize); - if (ret != Z_OK) EXM_THROW(1, "inflateSetDictionary failure"); - ret = inflate(&inf, Z_FINISH); - } - if (ret != Z_STREAM_END) EXM_THROW(1, "inflate failure"); - ret = inflateEnd(&inf); - if (ret != Z_OK) EXM_THROW(1, "inflateEnd failure"); - blockTable[blockNb].resSize = inf.total_out; - } - nbLoops++; - } while (UTIL_clockSpanMicro(clockStart) < clockLoop); - } - { U64 const clockSpan = UTIL_clockSpanMicro(clockStart); - if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops; - totalDTime += clockSpan; - dCompleted = totalDTime>maxTime; - } } - - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", - marks[markNb], displayName, (unsigned)srcSize, (unsigned)cSize, ratio, - (double)srcSize / (double)fastestC, - (double)srcSize / (double)fastestD ); - - /* CRC Checking */ - { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); - if (crcOrig!=crcCheck) { - size_t u; - DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); - for (u=0; u<srcSize; u++) { - if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) { - unsigned segNb, bNb, pos; - size_t bacc = 0; - DISPLAY("Decoding error at pos %u ", (unsigned)u); - for (segNb = 0; segNb < nbBlocks; segNb++) { - if (bacc + blockTable[segNb].srcSize > u) break; - bacc += blockTable[segNb].srcSize; - } - pos = (U32)(u - bacc); - bNb = pos / (128 KB); - DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); - break; - } - if (u==srcSize-1) { /* should never happen */ - DISPLAY("no difference detected\n"); - } } - break; - } } /* CRC Checking */ -#endif - } /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */ - - if (g_displayLevel == 1) { - double cSpeed = (double)srcSize / (double)fastestC; - double dSpeed = (double)srcSize / (double)fastestD; - if (g_additionalParam) - DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); - else - DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); - } - DISPLAYLEVEL(2, "%2i#\n", cLevel); - } /* Bench */ - - /* clean up */ - free(blockTable); - free(compressedBuffer); - free(resultBuffer); - ZSTD_freeCCtx(ctx); - ZSTD_freeDCtx(dctx); - return 0; -} - - -static size_t BMK_findMaxMem(U64 requiredMem) -{ - size_t const step = 64 MB; - BYTE* testmem = NULL; - - requiredMem = (((requiredMem >> 26) + 1) << 26); - requiredMem += step; - if (requiredMem > maxMemory) requiredMem = maxMemory; - - do { - testmem = (BYTE*)malloc((size_t)requiredMem); - requiredMem -= step; - } while (!testmem && requiredMem); /* do not allocate zero bytes */ - - free(testmem); - return (size_t)(requiredMem+1); /* avoid zero */ -} - -static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, - const char* displayName, int cLevel, int cLevelLast, - const size_t* fileSizes, unsigned nbFiles, - const void* dictBuffer, size_t dictBufferSize) -{ - int l; - - const char* pch = strrchr(displayName, '\\'); /* Windows */ - if (!pch) pch = strrchr(displayName, '/'); /* Linux */ - if (pch) displayName = pch+1; - - SET_REALTIME_PRIORITY; - - if (g_displayLevel == 1 && !g_additionalParam) - DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", - ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, - (unsigned)benchedSize, g_nbIterations, (unsigned)(g_blockSize>>10)); - - if (cLevelLast < cLevel) cLevelLast = cLevel; - - DISPLAY("benchmarking zstd %s (using ZSTD_CStream)\n", ZSTD_VERSION_STRING); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZSTD_STREAM); - } - - DISPLAY("benchmarking zstd %s (using ZSTD_CCtx)\n", ZSTD_VERSION_STRING); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZSTD); - } - - DISPLAY("benchmarking zstd %s (using zlibWrapper)\n", ZSTD_VERSION_STRING); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZWRAP_ZSTD_REUSE); - } - - DISPLAY("benchmarking zstd %s (zlibWrapper not reusing a context)\n", ZSTD_VERSION_STRING); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZWRAP_ZSTD); - } - - - if (cLevelLast > Z_BEST_COMPRESSION) cLevelLast = Z_BEST_COMPRESSION; - - DISPLAY("\n"); - DISPLAY("benchmarking zlib %s\n", ZLIB_VERSION); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZLIB_REUSE); - } - - DISPLAY("benchmarking zlib %s (zlib not reusing a context)\n", ZLIB_VERSION); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZLIB); - } - - DISPLAY("benchmarking zlib %s (using zlibWrapper)\n", ZLIB_VERSION); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZWRAP_ZLIB_REUSE); - } - - DISPLAY("benchmarking zlib %s (zlibWrapper not reusing a context)\n", ZLIB_VERSION); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_benchMem(srcBuffer, benchedSize, - displayName, l, - fileSizes, nbFiles, - dictBuffer, dictBufferSize, BMK_ZWRAP_ZLIB); - } -} - - -/*! BMK_loadFiles() : - Loads `buffer` with content of files listed within `fileNamesTable`. - At most, fills `buffer` entirely */ -static void BMK_loadFiles(void* buffer, size_t bufferSize, - size_t* fileSizes, - const char** fileNamesTable, unsigned nbFiles) -{ - size_t pos = 0, totalSize = 0; - unsigned n; - for (n=0; n<nbFiles; n++) { - FILE* f; - U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); - if (UTIL_isDirectory(fileNamesTable[n])) { - DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]); - fileSizes[n] = 0; - continue; - } - if (fileSize == UTIL_FILESIZE_UNKNOWN) { - DISPLAYLEVEL(2, "Cannot determine size of %s ... \n", fileNamesTable[n]); - fileSizes[n] = 0; - continue; - } - f = fopen(fileNamesTable[n], "rb"); - if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); - DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]); - if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ - { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); - if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); - pos += readSize; } - fileSizes[n] = (size_t)fileSize; - totalSize += (size_t)fileSize; - fclose(f); - } - - if (totalSize == 0) EXM_THROW(12, "no data to bench"); -} - -static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, int cLevel, int cLevelLast) -{ - void* srcBuffer; - size_t benchedSize; - void* dictBuffer = NULL; - size_t dictBufferSize = 0; - size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); - U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); - char mfName[20] = {0}; - - if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); - - /* Load dictionary */ - if (dictFileName != NULL) { - U64 const dictFileSize = UTIL_getFileSize(dictFileName); - if (dictFileSize > 64 MB) - EXM_THROW(10, "dictionary file %s too large", dictFileName); - dictBufferSize = (size_t)dictFileSize; - dictBuffer = malloc(dictBufferSize); - if (dictBuffer==NULL) - EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (unsigned)dictBufferSize); - BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1); - } - - /* Memory allocation & restrictions */ - benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; - if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; - if (benchedSize < totalSizeToLoad) - DISPLAY("Not enough memory; testing %u MB only...\n", (unsigned)(benchedSize >> 20)); - srcBuffer = malloc(benchedSize + !benchedSize); - if (!srcBuffer) EXM_THROW(12, "not enough memory"); - - /* Load input buffer */ - BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles); - - /* Bench */ - snprintf (mfName, sizeof(mfName), " %u files", nbFiles); - { const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; - BMK_benchCLevel(srcBuffer, benchedSize, - displayName, cLevel, cLevelLast, - fileSizes, nbFiles, - dictBuffer, dictBufferSize); - } - - /* clean up */ - free(srcBuffer); - free(dictBuffer); - free(fileSizes); -} - - -static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility) -{ - char name[20] = {0}; - size_t benchedSize = 10000000; - void* const srcBuffer = malloc(benchedSize); - - /* Memory allocation */ - if (!srcBuffer) EXM_THROW(21, "not enough memory"); - - /* Fill input buffer */ - RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); - - /* Bench */ - snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, NULL, 0); - - /* clean up */ - free(srcBuffer); -} - - -static int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, int cLevel, int cLevelLast) -{ - double const compressibility = (double)g_compressibilityDefault / 100; - - if (nbFiles == 0) - BMK_syntheticTest(cLevel, cLevelLast, compressibility); - else - BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast); - return 0; -} - - - - -/*-************************************ -* Command Line -**************************************/ -static int usage(const char* programName) -{ - DISPLAY(WELCOME_MESSAGE); - DISPLAY( "Usage :\n"); - DISPLAY( " %s [args] [FILE(s)] [-o file]\n", programName); - DISPLAY( "\n"); - DISPLAY( "FILE : a filename\n"); - DISPLAY( " with no FILE, or when FILE is - , read standard input\n"); - DISPLAY( "Arguments :\n"); - DISPLAY( " -D file: use `file` as Dictionary \n"); - DISPLAY( " -h/-H : display help/long help and exit\n"); - DISPLAY( " -V : display Version number and exit\n"); - DISPLAY( " -v : verbose mode; specify multiple times to increase log level (default:%d)\n", DEFAULT_DISPLAY_LEVEL); - DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); -#ifdef UTIL_HAS_CREATEFILELIST - DISPLAY( " -r : operate recursively on directories\n"); -#endif - DISPLAY( "\n"); - DISPLAY( "Benchmark arguments :\n"); - DISPLAY( " -b# : benchmark file(s), using # compression level (default : %d) \n", ZSTDCLI_CLEVEL_DEFAULT); - DISPLAY( " -e# : test all compression levels from -bX to # (default: %d)\n", ZSTDCLI_CLEVEL_DEFAULT); - DISPLAY( " -i# : minimum evaluation time in seconds (default : 3s)\n"); - DISPLAY( " -B# : cut file into independent chunks of size # (default: no chunking)\n"); - return 0; -} - -static int badusage(const char* programName) -{ - DISPLAYLEVEL(1, "Incorrect parameters\n"); - if (g_displayLevel >= 1) usage(programName); - return 1; -} - -static void waitEnter(void) -{ - int unused; - DISPLAY("Press enter to continue...\n"); - unused = getchar(); - (void)unused; -} - -/*! readU32FromChar() : - @return : unsigned integer value reach from input in `char` format - Will also modify `*stringPtr`, advancing it to position where it stopped reading. - Note : this function can overflow if digit string > MAX_UINT */ -static unsigned readU32FromChar(const char** stringPtr) -{ - unsigned result = 0; - while ((**stringPtr >='0') && (**stringPtr <='9')) - result *= 10, result += (unsigned)(**stringPtr - '0'), (*stringPtr)++ ; - return result; -} - - -#define CLEAN_RETURN(i) { operationResult = (i); goto _end; } - -int main(int argCount, char** argv) -{ - int argNb, - main_pause=0, - nextEntryIsDictionary=0, - operationResult=0, - nextArgumentIsFile=0; - int cLevel = ZSTDCLI_CLEVEL_DEFAULT; - int cLevelLast = 1; - unsigned recursive = 0; - FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount); - const char* programName = argv[0]; - const char* dictFileName = NULL; - char* dynNameSpace = NULL; - - /* init */ - if (filenames==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); } - displayOut = stderr; - - /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */ - { size_t pos; - for (pos = strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } } - programName += pos; - } - - /* command switches */ - for(argNb=1; argNb<argCount; argNb++) { - const char* argument = argv[argNb]; - if(!argument) continue; /* Protection if argument empty */ - - if (nextArgumentIsFile==0) { - - /* long commands (--long-word) */ - if (!strcmp(argument, "--")) { nextArgumentIsFile=1; continue; } - if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); } - if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage(programName)); } - if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; } - if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } - - /* Decode commands (note : aggregated commands are allowed) */ - if (argument[0]=='-') { - argument++; - - while (argument[0]!=0) { - switch(argument[0]) - { - /* Display help */ - case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); /* Version Only */ - case 'H': - case 'h': displayOut=stdout; CLEAN_RETURN(usage(programName)); - - /* Use file content as dictionary */ - case 'D': nextEntryIsDictionary = 1; argument++; break; - - /* Verbose mode */ - case 'v': g_displayLevel++; argument++; break; - - /* Quiet mode */ - case 'q': g_displayLevel--; argument++; break; - -#ifdef UTIL_HAS_CREATEFILELIST - /* recursive */ - case 'r': recursive=1; argument++; break; -#endif - - /* Benchmark */ - case 'b': - /* first compression Level */ - argument++; - cLevel = (int)readU32FromChar(&argument); - break; - - /* range bench (benchmark only) */ - case 'e': - /* last compression Level */ - argument++; - cLevelLast = (int)readU32FromChar(&argument); - break; - - /* Modify Nb Iterations (benchmark only) */ - case 'i': - argument++; - { U32 const iters = readU32FromChar(&argument); - BMK_setNotificationLevel(g_displayLevel); - BMK_SetNbIterations(iters); - } - break; - - /* cut input into blocks (benchmark only) */ - case 'B': - argument++; - { size_t bSize = readU32FromChar(&argument); - if (toupper(*argument)=='K') bSize<<=10, argument++; /* allows using KB notation */ - if (toupper(*argument)=='M') bSize<<=20, argument++; - if (toupper(*argument)=='B') argument++; - BMK_setNotificationLevel(g_displayLevel); - BMK_SetBlockSize(bSize); - } - break; - - /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ - case 'p': argument++; - if ((*argument>='0') && (*argument<='9')) { - BMK_setAdditionalParam((int)readU32FromChar(&argument)); - } else - main_pause=1; - break; - /* unknown command */ - default : CLEAN_RETURN(badusage(programName)); - } - } - continue; - } /* if (argument[0]=='-') */ - - } /* if (nextArgumentIsAFile==0) */ - - if (nextEntryIsDictionary) { - nextEntryIsDictionary = 0; - dictFileName = argument; - continue; - } - - /* add filename to list */ - UTIL_refFilename(filenames, argument); - } - - /* Welcome message (if verbose) */ - DISPLAYLEVEL(3, WELCOME_MESSAGE); - -#ifdef UTIL_HAS_CREATEFILELIST - if (recursive) { - UTIL_expandFNT(&filenames, 1); - } -#endif - - BMK_setNotificationLevel(g_displayLevel); - BMK_benchFiles(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, cLevelLast); - -_end: - if (main_pause) waitEnter(); - free(dynNameSpace); - UTIL_freeFileNamesTable(filenames); - return operationResult; -} |
