diff options
Diffstat (limited to 'contrib')
77 files changed, 0 insertions, 10922 deletions
diff --git a/contrib/cleanTabs b/contrib/cleanTabs deleted file mode 100755 index 215913a90ace4..0000000000000 --- a/contrib/cleanTabs +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -sed -i '' $'s/\t/ /g' ../lib/**/*.{h,c} ../programs/*.{h,c} ../tests/*.c ./**/*.{h,cpp} ../examples/*.c ../zlibWrapper/*.{h,c} diff --git a/contrib/docker/Dockerfile b/contrib/docker/Dockerfile deleted file mode 100644 index e06a32c0dac7b..0000000000000 --- a/contrib/docker/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -# Dockerfile -# First image to build the binary -FROM alpine as builder - -RUN apk --no-cache add make gcc libc-dev -COPY . /src -RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install - -# Second minimal image to only keep the built binary -FROM alpine - -# Copy the built files -COPY --from=builder /pkg / - -# Copy the license as well -RUN mkdir -p /usr/local/share/licenses/zstd -COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/ - -# Just run `zstd` if no other command is given -CMD ["/usr/local/bin/zstd"] diff --git a/contrib/docker/README.md b/contrib/docker/README.md deleted file mode 100644 index 43f6d7a1ae1ab..0000000000000 --- a/contrib/docker/README.md +++ /dev/null @@ -1,20 +0,0 @@ - -## Requirement - -The `Dockerfile` script requires a version of `docker` >= 17.05 - -## Installing docker - -The official docker install docs use a ppa with a modern version available: -https://docs.docker.com/install/linux/docker-ce/ubuntu/ - -## How to run - -`docker build -t zstd .` - -## test - -``` -echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat -foo -``` diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile deleted file mode 100644 index 72ce04f2a56bd..0000000000000 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -ARG := - -CC ?= gcc -CFLAGS ?= -O3 -INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder - -RANDOM_FILE := ../randomDictBuilder/random.c -IO_FILE := ../randomDictBuilder/io.c - -all: run clean - -.PHONY: run -run: benchmark - echo "Benchmarking with $(ARG)" - ./benchmark $(ARG) - -.PHONY: test -test: benchmarkTest clean - -.PHONY: benchmarkTest -benchmarkTest: benchmark test.sh - sh test.sh - -benchmark: benchmark.o io.o random.o libzstd.a - $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark - -benchmark.o: benchmark.c - $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c - -random.o: $(RANDOM_FILE) - $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE) - -io.o: $(IO_FILE) - $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) - -libzstd.a: - $(MAKE) -C ../../../lib libzstd.a - mv ../../../lib/libzstd.a . - -.PHONY: clean -clean: - rm -f *.o benchmark libzstd.a - $(MAKE) -C ../../../lib clean - echo "Cleaning is completed" diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md deleted file mode 100644 index 6a6c7f1d21693..0000000000000 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md +++ /dev/null @@ -1,849 +0,0 @@ -Benchmarking Dictionary Builder - -### Permitted Argument: -Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in=" - -###Running Test: -make test - -###Usage: -Benchmark given input files: make ARG= followed by permitted arguments - -### Examples: -make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" - -###Benchmarking Result: -- First Cover is optimize cover, second Cover uses optimized d and k from first one. -- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10. -- Fourth column is chosen d and fifth column is chosen k - -github: -NODICT 0.000004 2.999642 -RANDOM 0.024560 8.791189 -LEGACY 0.727109 8.173529 -COVER 40.565676 10.652243 8 1298 -COVER 3.608284 10.652243 8 1298 -FAST f=15 a=1 4.181024 10.570882 8 1154 -FAST f=15 a=1 0.040788 10.570882 8 1154 -FAST f=15 a=2 3.548352 10.574287 6 1970 -FAST f=15 a=2 0.035535 10.574287 6 1970 -FAST f=15 a=3 3.287364 10.613950 6 1010 -FAST f=15 a=3 0.032182 10.613950 6 1010 -FAST f=15 a=4 3.184976 10.573883 6 1058 -FAST f=15 a=4 0.029878 10.573883 6 1058 -FAST f=15 a=5 3.045513 10.580640 8 1154 -FAST f=15 a=5 0.022162 10.580640 8 1154 -FAST f=15 a=6 3.003296 10.583677 6 1010 -FAST f=15 a=6 0.028091 10.583677 6 1010 -FAST f=15 a=7 2.952655 10.622551 6 1106 -FAST f=15 a=7 0.02724 10.622551 6 1106 -FAST f=15 a=8 2.945674 10.614657 6 1010 -FAST f=15 a=8 0.027264 10.614657 6 1010 -FAST f=15 a=9 3.153439 10.564018 8 1154 -FAST f=15 a=9 0.020635 10.564018 8 1154 -FAST f=15 a=10 2.950416 10.511454 6 1010 -FAST f=15 a=10 0.026606 10.511454 6 1010 -FAST f=16 a=1 3.970029 10.681035 8 1154 -FAST f=16 a=1 0.038188 10.681035 8 1154 -FAST f=16 a=2 3.422892 10.484978 6 1874 -FAST f=16 a=2 0.034702 10.484978 6 1874 -FAST f=16 a=3 3.215836 10.632631 8 1154 -FAST f=16 a=3 0.026084 10.632631 8 1154 -FAST f=16 a=4 3.081353 10.626533 6 1106 -FAST f=16 a=4 0.030032 10.626533 6 1106 -FAST f=16 a=5 3.041241 10.545027 8 1922 -FAST f=16 a=5 0.022882 10.545027 8 1922 -FAST f=16 a=6 2.989390 10.638284 6 1874 -FAST f=16 a=6 0.028308 10.638284 6 1874 -FAST f=16 a=7 3.001581 10.797136 6 1106 -FAST f=16 a=7 0.027479 10.797136 6 1106 -FAST f=16 a=8 2.984107 10.658356 8 1058 -FAST f=16 a=8 0.021099 10.658356 8 1058 -FAST f=16 a=9 2.925788 10.523869 6 1010 -FAST f=16 a=9 0.026905 10.523869 6 1010 -FAST f=16 a=10 2.889605 10.745841 6 1874 -FAST f=16 a=10 0.026846 10.745841 6 1874 -FAST f=17 a=1 4.031953 10.672080 8 1202 -FAST f=17 a=1 0.040658 10.672080 8 1202 -FAST f=17 a=2 3.458107 10.589352 8 1106 -FAST f=17 a=2 0.02926 10.589352 8 1106 -FAST f=17 a=3 3.291189 10.662714 8 1154 -FAST f=17 a=3 0.026531 10.662714 8 1154 -FAST f=17 a=4 3.154950 10.549456 8 1346 -FAST f=17 a=4 0.024991 10.549456 8 1346 -FAST f=17 a=5 3.092271 10.541670 6 1202 -FAST f=17 a=5 0.038285 10.541670 6 1202 -FAST f=17 a=6 3.166146 10.729112 6 1874 -FAST f=17 a=6 0.038217 10.729112 6 1874 -FAST f=17 a=7 3.035467 10.810485 6 1106 -FAST f=17 a=7 0.036655 10.810485 6 1106 -FAST f=17 a=8 3.035668 10.530532 6 1058 -FAST f=17 a=8 0.037715 10.530532 6 1058 -FAST f=17 a=9 2.987917 10.589802 8 1922 -FAST f=17 a=9 0.02217 10.589802 8 1922 -FAST f=17 a=10 2.981647 10.722579 8 1106 -FAST f=17 a=10 0.021948 10.722579 8 1106 -FAST f=18 a=1 4.067144 10.634943 8 1154 -FAST f=18 a=1 0.041386 10.634943 8 1154 -FAST f=18 a=2 3.507377 10.546230 6 1970 -FAST f=18 a=2 0.037572 10.546230 6 1970 -FAST f=18 a=3 3.323015 10.648061 8 1154 -FAST f=18 a=3 0.028306 10.648061 8 1154 -FAST f=18 a=4 3.216735 10.705402 6 1010 -FAST f=18 a=4 0.030755 10.705402 6 1010 -FAST f=18 a=5 3.175794 10.588154 8 1874 -FAST f=18 a=5 0.025315 10.588154 8 1874 -FAST f=18 a=6 3.127459 10.751104 8 1106 -FAST f=18 a=6 0.023897 10.751104 8 1106 -FAST f=18 a=7 3.083017 10.780402 6 1106 -FAST f=18 a=7 0.029158 10.780402 6 1106 -FAST f=18 a=8 3.069700 10.547226 8 1346 -FAST f=18 a=8 0.024046 10.547226 8 1346 -FAST f=18 a=9 3.056591 10.674759 6 1010 -FAST f=18 a=9 0.028496 10.674759 6 1010 -FAST f=18 a=10 3.063588 10.737578 8 1106 -FAST f=18 a=10 0.023033 10.737578 8 1106 -FAST f=19 a=1 4.164041 10.650333 8 1154 -FAST f=19 a=1 0.042906 10.650333 8 1154 -FAST f=19 a=2 3.585409 10.577066 6 1058 -FAST f=19 a=2 0.038994 10.577066 6 1058 -FAST f=19 a=3 3.439643 10.639403 8 1154 -FAST f=19 a=3 0.028427 10.639403 8 1154 -FAST f=19 a=4 3.268869 10.554410 8 1298 -FAST f=19 a=4 0.026866 10.554410 8 1298 -FAST f=19 a=5 3.238225 10.615109 6 1010 -FAST f=19 a=5 0.03078 10.615109 6 1010 -FAST f=19 a=6 3.199558 10.609782 6 1874 -FAST f=19 a=6 0.030099 10.609782 6 1874 -FAST f=19 a=7 3.132395 10.794753 6 1106 -FAST f=19 a=7 0.028964 10.794753 6 1106 -FAST f=19 a=8 3.148446 10.554842 8 1298 -FAST f=19 a=8 0.024277 10.554842 8 1298 -FAST f=19 a=9 3.108324 10.668763 6 1010 -FAST f=19 a=9 0.02896 10.668763 6 1010 -FAST f=19 a=10 3.159863 10.757347 8 1106 -FAST f=19 a=10 0.023351 10.757347 8 1106 -FAST f=20 a=1 4.462698 10.661788 8 1154 -FAST f=20 a=1 0.047174 10.661788 8 1154 -FAST f=20 a=2 3.820269 10.678612 6 1106 -FAST f=20 a=2 0.040807 10.678612 6 1106 -FAST f=20 a=3 3.644955 10.648424 8 1154 -FAST f=20 a=3 0.031398 10.648424 8 1154 -FAST f=20 a=4 3.546257 10.559756 8 1298 -FAST f=20 a=4 0.029856 10.559756 8 1298 -FAST f=20 a=5 3.485248 10.646637 6 1010 -FAST f=20 a=5 0.033756 10.646637 6 1010 -FAST f=20 a=6 3.490438 10.775824 8 1106 -FAST f=20 a=6 0.028338 10.775824 8 1106 -FAST f=20 a=7 3.631289 10.801795 6 1106 -FAST f=20 a=7 0.035228 10.801795 6 1106 -FAST f=20 a=8 3.758936 10.545116 8 1346 -FAST f=20 a=8 0.027495 10.545116 8 1346 -FAST f=20 a=9 3.707024 10.677454 6 1010 -FAST f=20 a=9 0.031326 10.677454 6 1010 -FAST f=20 a=10 3.586593 10.756017 8 1106 -FAST f=20 a=10 0.027122 10.756017 8 1106 -FAST f=21 a=1 5.701396 10.655398 8 1154 -FAST f=21 a=1 0.067744 10.655398 8 1154 -FAST f=21 a=2 5.270542 10.650743 6 1106 -FAST f=21 a=2 0.052999 10.650743 6 1106 -FAST f=21 a=3 4.945294 10.652380 8 1154 -FAST f=21 a=3 0.052678 10.652380 8 1154 -FAST f=21 a=4 4.894079 10.543185 8 1298 -FAST f=21 a=4 0.04997 10.543185 8 1298 -FAST f=21 a=5 4.785417 10.630321 6 1010 -FAST f=21 a=5 0.045294 10.630321 6 1010 -FAST f=21 a=6 4.789381 10.664477 6 1874 -FAST f=21 a=6 0.046578 10.664477 6 1874 -FAST f=21 a=7 4.302955 10.805179 6 1106 -FAST f=21 a=7 0.041205 10.805179 6 1106 -FAST f=21 a=8 4.034630 10.551211 8 1298 -FAST f=21 a=8 0.040121 10.551211 8 1298 -FAST f=21 a=9 4.523868 10.799114 6 1010 -FAST f=21 a=9 0.043592 10.799114 6 1010 -FAST f=21 a=10 4.760736 10.750255 8 1106 -FAST f=21 a=10 0.043483 10.750255 8 1106 -FAST f=22 a=1 6.743064 10.640537 8 1154 -FAST f=22 a=1 0.086967 10.640537 8 1154 -FAST f=22 a=2 6.121739 10.626638 6 1970 -FAST f=22 a=2 0.066337 10.626638 6 1970 -FAST f=22 a=3 5.248851 10.640688 8 1154 -FAST f=22 a=3 0.054935 10.640688 8 1154 -FAST f=22 a=4 5.436579 10.588333 8 1298 -FAST f=22 a=4 0.064113 10.588333 8 1298 -FAST f=22 a=5 5.812815 10.652653 6 1010 -FAST f=22 a=5 0.058189 10.652653 6 1010 -FAST f=22 a=6 5.745472 10.666437 6 1874 -FAST f=22 a=6 0.057188 10.666437 6 1874 -FAST f=22 a=7 5.716393 10.806911 6 1106 -FAST f=22 a=7 0.056 10.806911 6 1106 -FAST f=22 a=8 5.698799 10.530784 8 1298 -FAST f=22 a=8 0.0583 10.530784 8 1298 -FAST f=22 a=9 5.710533 10.777391 6 1010 -FAST f=22 a=9 0.054945 10.777391 6 1010 -FAST f=22 a=10 5.685395 10.745023 8 1106 -FAST f=22 a=10 0.056526 10.745023 8 1106 -FAST f=23 a=1 7.836923 10.638828 8 1154 -FAST f=23 a=1 0.099522 10.638828 8 1154 -FAST f=23 a=2 6.627834 10.631061 6 1970 -FAST f=23 a=2 0.066769 10.631061 6 1970 -FAST f=23 a=3 5.602533 10.647288 8 1154 -FAST f=23 a=3 0.064513 10.647288 8 1154 -FAST f=23 a=4 6.005580 10.568747 8 1298 -FAST f=23 a=4 0.062022 10.568747 8 1298 -FAST f=23 a=5 5.481816 10.676921 6 1010 -FAST f=23 a=5 0.058959 10.676921 6 1010 -FAST f=23 a=6 5.460444 10.666194 6 1874 -FAST f=23 a=6 0.057687 10.666194 6 1874 -FAST f=23 a=7 5.659822 10.800377 6 1106 -FAST f=23 a=7 0.06783 10.800377 6 1106 -FAST f=23 a=8 6.826940 10.522167 8 1298 -FAST f=23 a=8 0.070533 10.522167 8 1298 -FAST f=23 a=9 6.804757 10.577799 8 1682 -FAST f=23 a=9 0.069949 10.577799 8 1682 -FAST f=23 a=10 6.774933 10.742093 8 1106 -FAST f=23 a=10 0.068395 10.742093 8 1106 -FAST f=24 a=1 8.444110 10.632783 8 1154 -FAST f=24 a=1 0.094357 10.632783 8 1154 -FAST f=24 a=2 7.289578 10.631061 6 1970 -FAST f=24 a=2 0.098515 10.631061 6 1970 -FAST f=24 a=3 8.619780 10.646289 8 1154 -FAST f=24 a=3 0.098041 10.646289 8 1154 -FAST f=24 a=4 8.508455 10.555199 8 1298 -FAST f=24 a=4 0.093885 10.555199 8 1298 -FAST f=24 a=5 8.471145 10.674363 6 1010 -FAST f=24 a=5 0.088676 10.674363 6 1010 -FAST f=24 a=6 8.426727 10.667228 6 1874 -FAST f=24 a=6 0.087247 10.667228 6 1874 -FAST f=24 a=7 8.356826 10.803027 6 1106 -FAST f=24 a=7 0.085835 10.803027 6 1106 -FAST f=24 a=8 6.756811 10.522049 8 1298 -FAST f=24 a=8 0.07107 10.522049 8 1298 -FAST f=24 a=9 6.548169 10.571882 8 1682 -FAST f=24 a=9 0.0713 10.571882 8 1682 -FAST f=24 a=10 8.238079 10.736453 8 1106 -FAST f=24 a=10 0.07004 10.736453 8 1106 - - -hg-commands: -NODICT 0.000005 2.425276 -RANDOM 0.046332 3.490331 -LEGACY 0.720351 3.911682 -COVER 45.507731 4.132653 8 386 -COVER 1.868810 4.132653 8 386 -FAST f=15 a=1 4.561427 3.866894 8 1202 -FAST f=15 a=1 0.048946 3.866894 8 1202 -FAST f=15 a=2 3.574462 3.892119 8 1538 -FAST f=15 a=2 0.033677 3.892119 8 1538 -FAST f=15 a=3 3.230227 3.888791 6 1346 -FAST f=15 a=3 0.034312 3.888791 6 1346 -FAST f=15 a=4 3.042388 3.899739 8 1010 -FAST f=15 a=4 0.024307 3.899739 8 1010 -FAST f=15 a=5 2.800148 3.896220 8 818 -FAST f=15 a=5 0.022331 3.896220 8 818 -FAST f=15 a=6 2.706518 3.882039 8 578 -FAST f=15 a=6 0.020955 3.882039 8 578 -FAST f=15 a=7 2.701820 3.885430 6 866 -FAST f=15 a=7 0.026074 3.885430 6 866 -FAST f=15 a=8 2.604445 3.906932 8 1826 -FAST f=15 a=8 0.021789 3.906932 8 1826 -FAST f=15 a=9 2.598568 3.870324 6 1682 -FAST f=15 a=9 0.026004 3.870324 6 1682 -FAST f=15 a=10 2.575920 3.920783 8 1442 -FAST f=15 a=10 0.020228 3.920783 8 1442 -FAST f=16 a=1 4.630623 4.001430 8 770 -FAST f=16 a=1 0.047497 4.001430 8 770 -FAST f=16 a=2 3.674721 3.974431 8 1874 -FAST f=16 a=2 0.035761 3.974431 8 1874 -FAST f=16 a=3 3.338384 3.978703 8 1010 -FAST f=16 a=3 0.029436 3.978703 8 1010 -FAST f=16 a=4 3.004412 3.983035 8 1010 -FAST f=16 a=4 0.025744 3.983035 8 1010 -FAST f=16 a=5 2.881892 3.987710 8 770 -FAST f=16 a=5 0.023211 3.987710 8 770 -FAST f=16 a=6 2.807410 3.952717 8 1298 -FAST f=16 a=6 0.023199 3.952717 8 1298 -FAST f=16 a=7 2.819623 3.994627 8 770 -FAST f=16 a=7 0.021806 3.994627 8 770 -FAST f=16 a=8 2.740092 3.954032 8 1826 -FAST f=16 a=8 0.0226 3.954032 8 1826 -FAST f=16 a=9 2.682564 3.969879 6 1442 -FAST f=16 a=9 0.026324 3.969879 6 1442 -FAST f=16 a=10 2.657959 3.969755 8 674 -FAST f=16 a=10 0.020413 3.969755 8 674 -FAST f=17 a=1 4.729228 4.046000 8 530 -FAST f=17 a=1 0.049703 4.046000 8 530 -FAST f=17 a=2 3.764510 3.991519 8 1970 -FAST f=17 a=2 0.038195 3.991519 8 1970 -FAST f=17 a=3 3.416992 4.006296 6 914 -FAST f=17 a=3 0.036244 4.006296 6 914 -FAST f=17 a=4 3.145626 3.979182 8 1970 -FAST f=17 a=4 0.028676 3.979182 8 1970 -FAST f=17 a=5 2.995070 4.050070 8 770 -FAST f=17 a=5 0.025707 4.050070 8 770 -FAST f=17 a=6 2.911833 4.040024 8 770 -FAST f=17 a=6 0.02453 4.040024 8 770 -FAST f=17 a=7 2.894796 4.015884 8 818 -FAST f=17 a=7 0.023956 4.015884 8 818 -FAST f=17 a=8 2.789962 4.039303 8 530 -FAST f=17 a=8 0.023219 4.039303 8 530 -FAST f=17 a=9 2.787625 3.996762 8 1634 -FAST f=17 a=9 0.023651 3.996762 8 1634 -FAST f=17 a=10 2.754796 4.005059 8 1058 -FAST f=17 a=10 0.022537 4.005059 8 1058 -FAST f=18 a=1 4.779117 4.038214 8 242 -FAST f=18 a=1 0.048814 4.038214 8 242 -FAST f=18 a=2 3.829753 4.045768 8 722 -FAST f=18 a=2 0.036541 4.045768 8 722 -FAST f=18 a=3 3.495053 4.021497 8 770 -FAST f=18 a=3 0.032648 4.021497 8 770 -FAST f=18 a=4 3.221395 4.039623 8 770 -FAST f=18 a=4 0.027818 4.039623 8 770 -FAST f=18 a=5 3.059369 4.050414 8 530 -FAST f=18 a=5 0.026296 4.050414 8 530 -FAST f=18 a=6 3.019292 4.010714 6 962 -FAST f=18 a=6 0.031104 4.010714 6 962 -FAST f=18 a=7 2.949322 4.031439 6 770 -FAST f=18 a=7 0.030745 4.031439 6 770 -FAST f=18 a=8 2.876425 4.032088 6 386 -FAST f=18 a=8 0.027407 4.032088 6 386 -FAST f=18 a=9 2.850958 4.053372 8 674 -FAST f=18 a=9 0.023799 4.053372 8 674 -FAST f=18 a=10 2.884352 4.020148 8 1730 -FAST f=18 a=10 0.024401 4.020148 8 1730 -FAST f=19 a=1 4.815669 4.061203 8 674 -FAST f=19 a=1 0.051425 4.061203 8 674 -FAST f=19 a=2 3.951356 4.013822 8 1442 -FAST f=19 a=2 0.039968 4.013822 8 1442 -FAST f=19 a=3 3.554682 4.050425 8 722 -FAST f=19 a=3 0.032725 4.050425 8 722 -FAST f=19 a=4 3.242585 4.054677 8 722 -FAST f=19 a=4 0.028194 4.054677 8 722 -FAST f=19 a=5 3.105909 4.064524 8 818 -FAST f=19 a=5 0.02675 4.064524 8 818 -FAST f=19 a=6 3.059901 4.036857 8 1250 -FAST f=19 a=6 0.026396 4.036857 8 1250 -FAST f=19 a=7 3.016151 4.068234 6 770 -FAST f=19 a=7 0.031501 4.068234 6 770 -FAST f=19 a=8 2.962902 4.077509 8 530 -FAST f=19 a=8 0.023333 4.077509 8 530 -FAST f=19 a=9 2.899607 4.067328 8 530 -FAST f=19 a=9 0.024553 4.067328 8 530 -FAST f=19 a=10 2.950978 4.059901 8 434 -FAST f=19 a=10 0.023852 4.059901 8 434 -FAST f=20 a=1 5.259834 4.027579 8 1634 -FAST f=20 a=1 0.061123 4.027579 8 1634 -FAST f=20 a=2 4.382150 4.025093 8 1634 -FAST f=20 a=2 0.048009 4.025093 8 1634 -FAST f=20 a=3 4.104323 4.060842 8 530 -FAST f=20 a=3 0.040965 4.060842 8 530 -FAST f=20 a=4 3.853340 4.023504 6 914 -FAST f=20 a=4 0.041072 4.023504 6 914 -FAST f=20 a=5 3.728841 4.018089 6 1634 -FAST f=20 a=5 0.037469 4.018089 6 1634 -FAST f=20 a=6 3.683045 4.069138 8 578 -FAST f=20 a=6 0.028011 4.069138 8 578 -FAST f=20 a=7 3.726973 4.063160 8 722 -FAST f=20 a=7 0.028437 4.063160 8 722 -FAST f=20 a=8 3.555073 4.057690 8 386 -FAST f=20 a=8 0.027588 4.057690 8 386 -FAST f=20 a=9 3.551095 4.067253 8 482 -FAST f=20 a=9 0.025976 4.067253 8 482 -FAST f=20 a=10 3.490127 4.068518 8 530 -FAST f=20 a=10 0.025971 4.068518 8 530 -FAST f=21 a=1 7.343816 4.064945 8 770 -FAST f=21 a=1 0.085035 4.064945 8 770 -FAST f=21 a=2 5.930894 4.048206 8 386 -FAST f=21 a=2 0.067349 4.048206 8 386 -FAST f=21 a=3 6.770775 4.063417 8 578 -FAST f=21 a=3 0.077104 4.063417 8 578 -FAST f=21 a=4 6.889409 4.066761 8 626 -FAST f=21 a=4 0.0717 4.066761 8 626 -FAST f=21 a=5 6.714896 4.051813 8 914 -FAST f=21 a=5 0.071026 4.051813 8 914 -FAST f=21 a=6 6.539890 4.047263 8 1922 -FAST f=21 a=6 0.07127 4.047263 8 1922 -FAST f=21 a=7 6.511052 4.068373 8 482 -FAST f=21 a=7 0.065467 4.068373 8 482 -FAST f=21 a=8 6.458788 4.071597 8 482 -FAST f=21 a=8 0.063817 4.071597 8 482 -FAST f=21 a=9 6.377591 4.052905 8 434 -FAST f=21 a=9 0.063112 4.052905 8 434 -FAST f=21 a=10 6.360752 4.047773 8 530 -FAST f=21 a=10 0.063606 4.047773 8 530 -FAST f=22 a=1 10.523471 4.040812 8 962 -FAST f=22 a=1 0.14214 4.040812 8 962 -FAST f=22 a=2 9.454758 4.059396 8 914 -FAST f=22 a=2 0.118343 4.059396 8 914 -FAST f=22 a=3 9.043197 4.043019 8 1922 -FAST f=22 a=3 0.109798 4.043019 8 1922 -FAST f=22 a=4 8.716261 4.044819 8 770 -FAST f=22 a=4 0.099687 4.044819 8 770 -FAST f=22 a=5 8.529472 4.070576 8 530 -FAST f=22 a=5 0.093127 4.070576 8 530 -FAST f=22 a=6 8.424241 4.070565 8 722 -FAST f=22 a=6 0.093703 4.070565 8 722 -FAST f=22 a=7 8.403391 4.070591 8 578 -FAST f=22 a=7 0.089763 4.070591 8 578 -FAST f=22 a=8 8.285221 4.089171 8 530 -FAST f=22 a=8 0.087716 4.089171 8 530 -FAST f=22 a=9 8.282506 4.047470 8 722 -FAST f=22 a=9 0.089773 4.047470 8 722 -FAST f=22 a=10 8.241809 4.064151 8 818 -FAST f=22 a=10 0.090413 4.064151 8 818 -FAST f=23 a=1 12.389208 4.051635 6 530 -FAST f=23 a=1 0.147796 4.051635 6 530 -FAST f=23 a=2 11.300910 4.042835 6 914 -FAST f=23 a=2 0.133178 4.042835 6 914 -FAST f=23 a=3 10.879455 4.047415 8 626 -FAST f=23 a=3 0.129571 4.047415 8 626 -FAST f=23 a=4 10.522718 4.038269 6 914 -FAST f=23 a=4 0.118121 4.038269 6 914 -FAST f=23 a=5 10.348043 4.066884 8 434 -FAST f=23 a=5 0.112098 4.066884 8 434 -FAST f=23 a=6 10.238630 4.048635 8 1010 -FAST f=23 a=6 0.120281 4.048635 8 1010 -FAST f=23 a=7 10.213255 4.061809 8 530 -FAST f=23 a=7 0.1121 4.061809 8 530 -FAST f=23 a=8 10.107879 4.074104 8 818 -FAST f=23 a=8 0.116544 4.074104 8 818 -FAST f=23 a=9 10.063424 4.064811 8 674 -FAST f=23 a=9 0.109045 4.064811 8 674 -FAST f=23 a=10 10.035801 4.054918 8 530 -FAST f=23 a=10 0.108735 4.054918 8 530 -FAST f=24 a=1 14.963878 4.073490 8 722 -FAST f=24 a=1 0.206344 4.073490 8 722 -FAST f=24 a=2 13.833472 4.036100 8 962 -FAST f=24 a=2 0.17486 4.036100 8 962 -FAST f=24 a=3 13.404631 4.026281 6 1106 -FAST f=24 a=3 0.153961 4.026281 6 1106 -FAST f=24 a=4 13.041164 4.065448 8 674 -FAST f=24 a=4 0.155509 4.065448 8 674 -FAST f=24 a=5 12.879412 4.054636 8 674 -FAST f=24 a=5 0.148282 4.054636 8 674 -FAST f=24 a=6 12.773736 4.081376 8 530 -FAST f=24 a=6 0.142563 4.081376 8 530 -FAST f=24 a=7 12.711310 4.059834 8 770 -FAST f=24 a=7 0.149321 4.059834 8 770 -FAST f=24 a=8 12.635459 4.052050 8 1298 -FAST f=24 a=8 0.15095 4.052050 8 1298 -FAST f=24 a=9 12.558104 4.076516 8 722 -FAST f=24 a=9 0.144361 4.076516 8 722 -FAST f=24 a=10 10.661348 4.062137 8 818 -FAST f=24 a=10 0.108232 4.062137 8 818 - - -hg-changelog: -NODICT 0.000017 1.377590 -RANDOM 0.186171 2.097487 -LEGACY 1.670867 2.058907 -COVER 173.561948 2.189685 8 98 -COVER 4.811180 2.189685 8 98 -FAST f=15 a=1 18.685906 2.129682 8 434 -FAST f=15 a=1 0.173376 2.129682 8 434 -FAST f=15 a=2 12.928259 2.131890 8 482 -FAST f=15 a=2 0.102582 2.131890 8 482 -FAST f=15 a=3 11.132343 2.128027 8 386 -FAST f=15 a=3 0.077122 2.128027 8 386 -FAST f=15 a=4 10.120683 2.125797 8 434 -FAST f=15 a=4 0.065175 2.125797 8 434 -FAST f=15 a=5 9.479092 2.127697 8 386 -FAST f=15 a=5 0.057905 2.127697 8 386 -FAST f=15 a=6 9.159523 2.127132 8 1682 -FAST f=15 a=6 0.058604 2.127132 8 1682 -FAST f=15 a=7 8.724003 2.129914 8 434 -FAST f=15 a=7 0.0493 2.129914 8 434 -FAST f=15 a=8 8.595001 2.127137 8 338 -FAST f=15 a=8 0.0474 2.127137 8 338 -FAST f=15 a=9 8.356405 2.125512 8 482 -FAST f=15 a=9 0.046126 2.125512 8 482 -FAST f=15 a=10 8.207111 2.126066 8 338 -FAST f=15 a=10 0.043292 2.126066 8 338 -FAST f=16 a=1 18.464436 2.144040 8 242 -FAST f=16 a=1 0.172156 2.144040 8 242 -FAST f=16 a=2 12.844825 2.148171 8 194 -FAST f=16 a=2 0.099619 2.148171 8 194 -FAST f=16 a=3 11.082568 2.140837 8 290 -FAST f=16 a=3 0.079165 2.140837 8 290 -FAST f=16 a=4 10.066749 2.144405 8 386 -FAST f=16 a=4 0.068411 2.144405 8 386 -FAST f=16 a=5 9.501121 2.140720 8 386 -FAST f=16 a=5 0.061316 2.140720 8 386 -FAST f=16 a=6 9.179332 2.139478 8 386 -FAST f=16 a=6 0.056322 2.139478 8 386 -FAST f=16 a=7 8.849438 2.142412 8 194 -FAST f=16 a=7 0.050493 2.142412 8 194 -FAST f=16 a=8 8.810919 2.143454 8 434 -FAST f=16 a=8 0.051304 2.143454 8 434 -FAST f=16 a=9 8.553900 2.140339 8 194 -FAST f=16 a=9 0.047285 2.140339 8 194 -FAST f=16 a=10 8.398027 2.143130 8 386 -FAST f=16 a=10 0.046386 2.143130 8 386 -FAST f=17 a=1 18.644657 2.157192 8 98 -FAST f=17 a=1 0.173884 2.157192 8 98 -FAST f=17 a=2 13.071242 2.159830 8 146 -FAST f=17 a=2 0.10388 2.159830 8 146 -FAST f=17 a=3 11.332366 2.153654 6 194 -FAST f=17 a=3 0.08983 2.153654 6 194 -FAST f=17 a=4 10.362413 2.156813 8 242 -FAST f=17 a=4 0.070389 2.156813 8 242 -FAST f=17 a=5 9.808159 2.155098 6 338 -FAST f=17 a=5 0.072661 2.155098 6 338 -FAST f=17 a=6 9.451165 2.153845 6 146 -FAST f=17 a=6 0.064959 2.153845 6 146 -FAST f=17 a=7 9.163097 2.155424 6 242 -FAST f=17 a=7 0.064323 2.155424 6 242 -FAST f=17 a=8 9.047276 2.156640 8 242 -FAST f=17 a=8 0.053382 2.156640 8 242 -FAST f=17 a=9 8.807671 2.152396 8 146 -FAST f=17 a=9 0.049617 2.152396 8 146 -FAST f=17 a=10 8.649827 2.152370 8 146 -FAST f=17 a=10 0.047849 2.152370 8 146 -FAST f=18 a=1 18.809502 2.168116 8 98 -FAST f=18 a=1 0.175226 2.168116 8 98 -FAST f=18 a=2 13.756502 2.170870 6 242 -FAST f=18 a=2 0.119507 2.170870 6 242 -FAST f=18 a=3 12.059748 2.163094 6 98 -FAST f=18 a=3 0.093912 2.163094 6 98 -FAST f=18 a=4 11.410294 2.172372 8 98 -FAST f=18 a=4 0.073048 2.172372 8 98 -FAST f=18 a=5 10.560297 2.166388 8 98 -FAST f=18 a=5 0.065136 2.166388 8 98 -FAST f=18 a=6 10.071390 2.162672 8 98 -FAST f=18 a=6 0.059402 2.162672 8 98 -FAST f=18 a=7 10.084214 2.166624 6 194 -FAST f=18 a=7 0.073276 2.166624 6 194 -FAST f=18 a=8 9.953226 2.167454 8 98 -FAST f=18 a=8 0.053659 2.167454 8 98 -FAST f=18 a=9 8.982461 2.161593 6 146 -FAST f=18 a=9 0.05955 2.161593 6 146 -FAST f=18 a=10 8.986092 2.164373 6 242 -FAST f=18 a=10 0.059135 2.164373 6 242 -FAST f=19 a=1 18.908277 2.176021 8 98 -FAST f=19 a=1 0.177316 2.176021 8 98 -FAST f=19 a=2 13.471313 2.176103 8 98 -FAST f=19 a=2 0.106344 2.176103 8 98 -FAST f=19 a=3 11.571406 2.172812 8 98 -FAST f=19 a=3 0.083293 2.172812 8 98 -FAST f=19 a=4 10.632775 2.177770 6 146 -FAST f=19 a=4 0.079864 2.177770 6 146 -FAST f=19 a=5 10.030190 2.175574 6 146 -FAST f=19 a=5 0.07223 2.175574 6 146 -FAST f=19 a=6 9.717818 2.169997 8 98 -FAST f=19 a=6 0.060049 2.169997 8 98 -FAST f=19 a=7 9.397531 2.172770 8 146 -FAST f=19 a=7 0.057188 2.172770 8 146 -FAST f=19 a=8 9.281061 2.175822 8 98 -FAST f=19 a=8 0.053711 2.175822 8 98 -FAST f=19 a=9 9.165242 2.169849 6 146 -FAST f=19 a=9 0.059898 2.169849 6 146 -FAST f=19 a=10 9.048763 2.173394 8 98 -FAST f=19 a=10 0.049757 2.173394 8 98 -FAST f=20 a=1 21.166917 2.183923 6 98 -FAST f=20 a=1 0.205425 2.183923 6 98 -FAST f=20 a=2 15.642753 2.182349 6 98 -FAST f=20 a=2 0.135957 2.182349 6 98 -FAST f=20 a=3 14.053730 2.173544 6 98 -FAST f=20 a=3 0.11266 2.173544 6 98 -FAST f=20 a=4 15.270019 2.183656 8 98 -FAST f=20 a=4 0.107892 2.183656 8 98 -FAST f=20 a=5 15.497927 2.174661 6 98 -FAST f=20 a=5 0.100305 2.174661 6 98 -FAST f=20 a=6 13.973505 2.172391 8 98 -FAST f=20 a=6 0.087565 2.172391 8 98 -FAST f=20 a=7 14.083296 2.172443 8 98 -FAST f=20 a=7 0.078062 2.172443 8 98 -FAST f=20 a=8 12.560048 2.175581 8 98 -FAST f=20 a=8 0.070282 2.175581 8 98 -FAST f=20 a=9 13.078645 2.173975 6 146 -FAST f=20 a=9 0.081041 2.173975 6 146 -FAST f=20 a=10 12.823328 2.177778 8 98 -FAST f=20 a=10 0.074522 2.177778 8 98 -FAST f=21 a=1 29.825370 2.183057 6 98 -FAST f=21 a=1 0.334453 2.183057 6 98 -FAST f=21 a=2 29.476474 2.182752 8 98 -FAST f=21 a=2 0.286602 2.182752 8 98 -FAST f=21 a=3 25.937186 2.175867 8 98 -FAST f=21 a=3 0.17626 2.175867 8 98 -FAST f=21 a=4 20.413865 2.179780 8 98 -FAST f=21 a=4 0.206085 2.179780 8 98 -FAST f=21 a=5 20.541889 2.178328 6 146 -FAST f=21 a=5 0.199157 2.178328 6 146 -FAST f=21 a=6 21.090670 2.174443 6 146 -FAST f=21 a=6 0.190645 2.174443 6 146 -FAST f=21 a=7 20.221569 2.177384 6 146 -FAST f=21 a=7 0.184278 2.177384 6 146 -FAST f=21 a=8 20.322357 2.179456 6 98 -FAST f=21 a=8 0.178458 2.179456 6 98 -FAST f=21 a=9 20.683912 2.174396 6 146 -FAST f=21 a=9 0.190829 2.174396 6 146 -FAST f=21 a=10 20.840865 2.174905 8 98 -FAST f=21 a=10 0.172515 2.174905 8 98 -FAST f=22 a=1 36.822827 2.181612 6 98 -FAST f=22 a=1 0.437389 2.181612 6 98 -FAST f=22 a=2 30.616902 2.183142 8 98 -FAST f=22 a=2 0.324284 2.183142 8 98 -FAST f=22 a=3 28.472482 2.178130 8 98 -FAST f=22 a=3 0.236538 2.178130 8 98 -FAST f=22 a=4 25.847028 2.181878 8 98 -FAST f=22 a=4 0.263744 2.181878 8 98 -FAST f=22 a=5 27.095881 2.180775 8 98 -FAST f=22 a=5 0.24988 2.180775 8 98 -FAST f=22 a=6 25.939172 2.170916 8 98 -FAST f=22 a=6 0.240033 2.170916 8 98 -FAST f=22 a=7 27.064194 2.177849 8 98 -FAST f=22 a=7 0.242383 2.177849 8 98 -FAST f=22 a=8 25.140221 2.178216 8 98 -FAST f=22 a=8 0.237601 2.178216 8 98 -FAST f=22 a=9 25.505283 2.177455 6 146 -FAST f=22 a=9 0.223217 2.177455 6 146 -FAST f=22 a=10 24.529362 2.176705 6 98 -FAST f=22 a=10 0.222876 2.176705 6 98 -FAST f=23 a=1 39.127310 2.183006 6 98 -FAST f=23 a=1 0.417338 2.183006 6 98 -FAST f=23 a=2 32.468161 2.183524 6 98 -FAST f=23 a=2 0.351645 2.183524 6 98 -FAST f=23 a=3 31.577620 2.172604 6 98 -FAST f=23 a=3 0.319659 2.172604 6 98 -FAST f=23 a=4 30.129247 2.183932 6 98 -FAST f=23 a=4 0.307239 2.183932 6 98 -FAST f=23 a=5 29.103376 2.183529 6 146 -FAST f=23 a=5 0.285533 2.183529 6 146 -FAST f=23 a=6 29.776045 2.174367 8 98 -FAST f=23 a=6 0.276846 2.174367 8 98 -FAST f=23 a=7 28.940407 2.178022 6 146 -FAST f=23 a=7 0.274082 2.178022 6 146 -FAST f=23 a=8 29.256009 2.179462 6 98 -FAST f=23 a=8 0.26949 2.179462 6 98 -FAST f=23 a=9 29.347312 2.170407 8 98 -FAST f=23 a=9 0.265034 2.170407 8 98 -FAST f=23 a=10 29.140081 2.171762 8 98 -FAST f=23 a=10 0.259183 2.171762 8 98 -FAST f=24 a=1 44.871179 2.182115 6 98 -FAST f=24 a=1 0.509433 2.182115 6 98 -FAST f=24 a=2 38.694867 2.180549 8 98 -FAST f=24 a=2 0.406695 2.180549 8 98 -FAST f=24 a=3 38.363769 2.172821 8 98 -FAST f=24 a=3 0.359581 2.172821 8 98 -FAST f=24 a=4 36.580797 2.184142 8 98 -FAST f=24 a=4 0.340614 2.184142 8 98 -FAST f=24 a=5 33.125701 2.183301 8 98 -FAST f=24 a=5 0.324874 2.183301 8 98 -FAST f=24 a=6 34.776068 2.173019 6 146 -FAST f=24 a=6 0.340397 2.173019 6 146 -FAST f=24 a=7 34.417625 2.176561 6 146 -FAST f=24 a=7 0.308223 2.176561 6 146 -FAST f=24 a=8 35.470291 2.182161 6 98 -FAST f=24 a=8 0.307724 2.182161 6 98 -FAST f=24 a=9 34.927252 2.172682 6 146 -FAST f=24 a=9 0.300598 2.172682 6 146 -FAST f=24 a=10 33.238355 2.173395 6 98 -FAST f=24 a=10 0.249916 2.173395 6 98 - - -hg-manifest: -NODICT 0.000004 1.866377 -RANDOM 0.696346 2.309436 -LEGACY 7.064527 2.506977 -COVER 876.312865 2.582528 8 434 -COVER 35.684533 2.582528 8 434 -FAST f=15 a=1 76.618201 2.404013 8 1202 -FAST f=15 a=1 0.700722 2.404013 8 1202 -FAST f=15 a=2 49.213058 2.409248 6 1826 -FAST f=15 a=2 0.473393 2.409248 6 1826 -FAST f=15 a=3 41.753197 2.409677 8 1490 -FAST f=15 a=3 0.336848 2.409677 8 1490 -FAST f=15 a=4 38.648295 2.407996 8 1538 -FAST f=15 a=4 0.283952 2.407996 8 1538 -FAST f=15 a=5 36.144936 2.402895 8 1874 -FAST f=15 a=5 0.270128 2.402895 8 1874 -FAST f=15 a=6 35.484675 2.394873 8 1586 -FAST f=15 a=6 0.251637 2.394873 8 1586 -FAST f=15 a=7 34.280599 2.397311 8 1778 -FAST f=15 a=7 0.23984 2.397311 8 1778 -FAST f=15 a=8 32.122572 2.396089 6 1490 -FAST f=15 a=8 0.251508 2.396089 6 1490 -FAST f=15 a=9 29.909842 2.390092 6 1970 -FAST f=15 a=9 0.251233 2.390092 6 1970 -FAST f=15 a=10 30.102938 2.400086 6 1682 -FAST f=15 a=10 0.23688 2.400086 6 1682 -FAST f=16 a=1 67.750401 2.475460 6 1346 -FAST f=16 a=1 0.796035 2.475460 6 1346 -FAST f=16 a=2 52.812027 2.480860 6 1730 -FAST f=16 a=2 0.480384 2.480860 6 1730 -FAST f=16 a=3 44.179259 2.469304 8 1970 -FAST f=16 a=3 0.332657 2.469304 8 1970 -FAST f=16 a=4 37.612728 2.478208 6 1970 -FAST f=16 a=4 0.32498 2.478208 6 1970 -FAST f=16 a=5 35.056222 2.475568 6 1298 -FAST f=16 a=5 0.302824 2.475568 6 1298 -FAST f=16 a=6 34.713012 2.486079 8 1730 -FAST f=16 a=6 0.24755 2.486079 8 1730 -FAST f=16 a=7 33.713687 2.477180 6 1682 -FAST f=16 a=7 0.280358 2.477180 6 1682 -FAST f=16 a=8 31.571412 2.475418 8 1538 -FAST f=16 a=8 0.241241 2.475418 8 1538 -FAST f=16 a=9 31.608069 2.478263 8 1922 -FAST f=16 a=9 0.241764 2.478263 8 1922 -FAST f=16 a=10 31.358002 2.472263 8 1442 -FAST f=16 a=10 0.221661 2.472263 8 1442 -FAST f=17 a=1 66.185775 2.536085 6 1346 -FAST f=17 a=1 0.713549 2.536085 6 1346 -FAST f=17 a=2 50.365000 2.546105 8 1298 -FAST f=17 a=2 0.467846 2.546105 8 1298 -FAST f=17 a=3 42.712843 2.536250 8 1298 -FAST f=17 a=3 0.34047 2.536250 8 1298 -FAST f=17 a=4 39.514227 2.535555 8 1442 -FAST f=17 a=4 0.302989 2.535555 8 1442 -FAST f=17 a=5 35.189292 2.524925 8 1202 -FAST f=17 a=5 0.273451 2.524925 8 1202 -FAST f=17 a=6 35.791683 2.523466 8 1202 -FAST f=17 a=6 0.268261 2.523466 8 1202 -FAST f=17 a=7 37.416136 2.526625 6 1010 -FAST f=17 a=7 0.277558 2.526625 6 1010 -FAST f=17 a=8 37.084707 2.533274 6 1250 -FAST f=17 a=8 0.285104 2.533274 6 1250 -FAST f=17 a=9 34.183814 2.532765 8 1298 -FAST f=17 a=9 0.235133 2.532765 8 1298 -FAST f=17 a=10 31.149235 2.528722 8 1346 -FAST f=17 a=10 0.232679 2.528722 8 1346 -FAST f=18 a=1 72.942176 2.559857 6 386 -FAST f=18 a=1 0.718618 2.559857 6 386 -FAST f=18 a=2 51.690440 2.559572 8 290 -FAST f=18 a=2 0.403978 2.559572 8 290 -FAST f=18 a=3 45.344908 2.561040 8 962 -FAST f=18 a=3 0.357205 2.561040 8 962 -FAST f=18 a=4 39.804522 2.558446 8 1010 -FAST f=18 a=4 0.310526 2.558446 8 1010 -FAST f=18 a=5 38.134888 2.561811 8 626 -FAST f=18 a=5 0.273743 2.561811 8 626 -FAST f=18 a=6 35.091890 2.555518 8 722 -FAST f=18 a=6 0.260135 2.555518 8 722 -FAST f=18 a=7 34.639523 2.562938 8 290 -FAST f=18 a=7 0.234294 2.562938 8 290 -FAST f=18 a=8 36.076431 2.563567 8 1586 -FAST f=18 a=8 0.274075 2.563567 8 1586 -FAST f=18 a=9 36.376433 2.560950 8 722 -FAST f=18 a=9 0.240106 2.560950 8 722 -FAST f=18 a=10 32.624790 2.559340 8 578 -FAST f=18 a=10 0.234704 2.559340 8 578 -FAST f=19 a=1 70.513761 2.572441 8 194 -FAST f=19 a=1 0.726112 2.572441 8 194 -FAST f=19 a=2 59.263032 2.574560 8 482 -FAST f=19 a=2 0.451554 2.574560 8 482 -FAST f=19 a=3 51.509594 2.571546 6 194 -FAST f=19 a=3 0.393014 2.571546 6 194 -FAST f=19 a=4 55.393906 2.573386 8 482 -FAST f=19 a=4 0.38819 2.573386 8 482 -FAST f=19 a=5 43.201736 2.567589 8 674 -FAST f=19 a=5 0.292155 2.567589 8 674 -FAST f=19 a=6 42.911687 2.572666 6 434 -FAST f=19 a=6 0.303988 2.572666 6 434 -FAST f=19 a=7 44.687591 2.573613 6 290 -FAST f=19 a=7 0.308721 2.573613 6 290 -FAST f=19 a=8 37.372868 2.571039 6 194 -FAST f=19 a=8 0.287137 2.571039 6 194 -FAST f=19 a=9 36.074230 2.566473 6 482 -FAST f=19 a=9 0.280721 2.566473 6 482 -FAST f=19 a=10 33.731720 2.570306 8 194 -FAST f=19 a=10 0.224073 2.570306 8 194 -FAST f=20 a=1 79.670634 2.581146 6 290 -FAST f=20 a=1 0.899986 2.581146 6 290 -FAST f=20 a=2 58.827141 2.579782 8 386 -FAST f=20 a=2 0.602288 2.579782 8 386 -FAST f=20 a=3 51.289004 2.579627 8 722 -FAST f=20 a=3 0.446091 2.579627 8 722 -FAST f=20 a=4 47.711068 2.581508 8 722 -FAST f=20 a=4 0.473007 2.581508 8 722 -FAST f=20 a=5 47.402929 2.578062 6 434 -FAST f=20 a=5 0.497131 2.578062 6 434 -FAST f=20 a=6 54.797102 2.577365 8 482 -FAST f=20 a=6 0.515061 2.577365 8 482 -FAST f=20 a=7 51.370877 2.583050 8 386 -FAST f=20 a=7 0.402878 2.583050 8 386 -FAST f=20 a=8 51.437931 2.574875 6 242 -FAST f=20 a=8 0.453094 2.574875 6 242 -FAST f=20 a=9 44.105456 2.576700 6 242 -FAST f=20 a=9 0.456633 2.576700 6 242 -FAST f=20 a=10 44.447580 2.578305 8 338 -FAST f=20 a=10 0.409121 2.578305 8 338 -FAST f=21 a=1 113.031686 2.582449 6 242 -FAST f=21 a=1 1.456971 2.582449 6 242 -FAST f=21 a=2 97.700932 2.582124 8 194 -FAST f=21 a=2 1.072078 2.582124 8 194 -FAST f=21 a=3 96.563648 2.585479 8 434 -FAST f=21 a=3 0.949528 2.585479 8 434 -FAST f=21 a=4 90.597813 2.582366 6 386 -FAST f=21 a=4 0.76944 2.582366 6 386 -FAST f=21 a=5 86.815980 2.579043 8 434 -FAST f=21 a=5 0.858167 2.579043 8 434 -FAST f=21 a=6 91.235820 2.578378 8 530 -FAST f=21 a=6 0.684274 2.578378 8 530 -FAST f=21 a=7 84.392788 2.581243 8 386 -FAST f=21 a=7 0.814386 2.581243 8 386 -FAST f=21 a=8 82.052310 2.582547 8 338 -FAST f=21 a=8 0.822633 2.582547 8 338 -FAST f=21 a=9 74.696074 2.579319 8 194 -FAST f=21 a=9 0.811028 2.579319 8 194 -FAST f=21 a=10 76.211170 2.578766 8 290 -FAST f=21 a=10 0.809715 2.578766 8 290 -FAST f=22 a=1 138.976871 2.580478 8 194 -FAST f=22 a=1 1.748932 2.580478 8 194 -FAST f=22 a=2 120.164097 2.583633 8 386 -FAST f=22 a=2 1.333239 2.583633 8 386 -FAST f=22 a=3 111.986474 2.582566 6 194 -FAST f=22 a=3 1.305734 2.582566 6 194 -FAST f=22 a=4 108.548148 2.583068 6 194 -FAST f=22 a=4 1.314026 2.583068 6 194 -FAST f=22 a=5 103.173017 2.583495 6 290 -FAST f=22 a=5 1.228664 2.583495 6 290 -FAST f=22 a=6 108.421262 2.582349 8 530 -FAST f=22 a=6 1.076773 2.582349 8 530 -FAST f=22 a=7 103.284127 2.581022 8 386 -FAST f=22 a=7 1.112117 2.581022 8 386 -FAST f=22 a=8 96.330279 2.581073 8 290 -FAST f=22 a=8 1.109303 2.581073 8 290 -FAST f=22 a=9 97.651348 2.580075 6 194 -FAST f=22 a=9 0.933032 2.580075 6 194 -FAST f=22 a=10 101.660621 2.584886 8 194 -FAST f=22 a=10 0.796823 2.584886 8 194 -FAST f=23 a=1 159.322978 2.581474 6 242 -FAST f=23 a=1 2.015878 2.581474 6 242 -FAST f=23 a=2 134.331775 2.581619 8 194 -FAST f=23 a=2 1.545845 2.581619 8 194 -FAST f=23 a=3 127.724552 2.579888 6 338 -FAST f=23 a=3 1.444496 2.579888 6 338 -FAST f=23 a=4 126.077675 2.578137 6 242 -FAST f=23 a=4 1.364394 2.578137 6 242 -FAST f=23 a=5 124.914027 2.580843 8 338 -FAST f=23 a=5 1.116059 2.580843 8 338 -FAST f=23 a=6 122.874153 2.577637 6 338 -FAST f=23 a=6 1.164584 2.577637 6 338 -FAST f=23 a=7 123.099257 2.582715 6 386 -FAST f=23 a=7 1.354042 2.582715 6 386 -FAST f=23 a=8 122.026753 2.577681 8 194 -FAST f=23 a=8 1.210966 2.577681 8 194 -FAST f=23 a=9 121.164312 2.584599 6 290 -FAST f=23 a=9 1.174859 2.584599 6 290 -FAST f=23 a=10 117.462222 2.580358 8 194 -FAST f=23 a=10 1.075258 2.580358 8 194 -FAST f=24 a=1 169.539659 2.581642 6 194 -FAST f=24 a=1 1.916804 2.581642 6 194 -FAST f=24 a=2 160.539270 2.580421 6 290 -FAST f=24 a=2 1.71087 2.580421 6 290 -FAST f=24 a=3 155.455874 2.580449 6 242 -FAST f=24 a=3 1.60307 2.580449 6 242 -FAST f=24 a=4 147.630320 2.582953 6 338 -FAST f=24 a=4 1.396364 2.582953 6 338 -FAST f=24 a=5 133.767428 2.580589 6 290 -FAST f=24 a=5 1.19933 2.580589 6 290 -FAST f=24 a=6 146.437535 2.579453 8 194 -FAST f=24 a=6 1.385405 2.579453 8 194 -FAST f=24 a=7 147.227507 2.584155 8 386 -FAST f=24 a=7 1.48942 2.584155 8 386 -FAST f=24 a=8 138.005773 2.584115 8 194 -FAST f=24 a=8 1.352 2.584115 8 194 -FAST f=24 a=9 141.442625 2.582902 8 290 -FAST f=24 a=9 1.39647 2.582902 8 290 -FAST f=24 a=10 142.157446 2.582701 8 434 -FAST f=24 a=10 1.498889 2.582701 8 434 diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c deleted file mode 100644 index cd943797bdead..0000000000000 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c +++ /dev/null @@ -1,442 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* strcmp, strlen */ -#include <errno.h> /* errno */ -#include <ctype.h> -#include <time.h> -#include "random.h" -#include "dictBuilder.h" -#include "zstd_internal.h" /* includes zstd.h */ -#include "io.h" -#include "util.h" -#include "zdict.h" - - - -/*-************************************* -* Console display -***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - -static const U64 g_refreshRate = SEC_TO_MICRO / 6; -static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; - -#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \ - if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \ - { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ - if (displayLevel>=4) fflush(stderr); } } } - - -/*-************************************* -* Exceptions -***************************************/ -#ifndef DEBUG -# define DEBUG 0 -#endif -#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); -#define EXM_THROW(error, ...) \ -{ \ - DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ - DISPLAY("Error %i : ", error); \ - DISPLAY(__VA_ARGS__); \ - DISPLAY("\n"); \ - exit(error); \ -} - - -/*-************************************* -* Constants -***************************************/ -static const unsigned g_defaultMaxDictSize = 110 KB; -#define DEFAULT_CLEVEL 3 -#define DEFAULT_DISPLAYLEVEL 2 - - -/*-************************************* -* Struct -***************************************/ -typedef struct { - const void* dictBuffer; - size_t dictSize; -} dictInfo; - - -/*-************************************* -* Dictionary related operations -***************************************/ -/** createDictFromFiles() : - * Based on type of param given, train dictionary using the corresponding algorithm - * @return dictInfo containing dictionary buffer and dictionary size - */ -dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize, - ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams, - ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) { - unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel : - coverParams ? coverParams->zParams.notificationLevel : - legacyParams ? legacyParams->zParams.notificationLevel : - fastParams ? fastParams->zParams.notificationLevel : - DEFAULT_DISPLAYLEVEL; /* no dict */ - void* const dictBuffer = malloc(maxDictSize); - - dictInfo* dInfo = NULL; - - /* Checks */ - if (!dictBuffer) - EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */ - - { size_t dictSize; - if(randomParams) { - dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, *randomParams); - }else if(coverParams) { - /* Run the optimize version if either k or d is not provided */ - if (!coverParams->d || !coverParams->k){ - dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, coverParams); - } else { - dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, *coverParams); - } - } else if(legacyParams) { - dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, *legacyParams); - } else if(fastParams) { - /* Run the optimize version if either k or d is not provided */ - if (!fastParams->d || !fastParams->k) { - dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, fastParams); - } else { - dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, *fastParams); - } - } else { - dictSize = 0; - } - if (ZDICT_isError(dictSize)) { - DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ - free(dictBuffer); - return dInfo; - } - dInfo = (dictInfo *)malloc(sizeof(dictInfo)); - dInfo->dictBuffer = dictBuffer; - dInfo->dictSize = dictSize; - } - return dInfo; -} - - -/** compressWithDict() : - * Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level - * @return compression ratio - */ -double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) { - /* Local variables */ - size_t totalCompressedSize = 0; - size_t totalOriginalSize = 0; - const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0; - double cRatio; - size_t dstCapacity; - int i; - - /* Pointers */ - ZSTD_CDict *cdict = NULL; - ZSTD_CCtx* cctx = NULL; - size_t *offsets = NULL; - void* dst = NULL; - - /* Allocate dst with enough space to compress the maximum sized sample */ - { - size_t maxSampleSize = 0; - for (i = 0; i < srcInfo->nbSamples; i++) { - maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize); - } - dstCapacity = ZSTD_compressBound(maxSampleSize); - dst = malloc(dstCapacity); - } - - /* Calculate offset for each sample */ - offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t)); - offsets[0] = 0; - for (i = 1; i <= srcInfo->nbSamples; i++) { - offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1]; - } - - /* Create the cctx */ - cctx = ZSTD_createCCtx(); - if(!cctx || !dst) { - cRatio = -1; - goto _cleanup; - } - - /* Create CDict if there's a dictionary stored on buffer */ - if (hasDict) { - cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel); - if(!cdict) { - cRatio = -1; - goto _cleanup; - } - } - - /* Compress each sample and sum their sizes*/ - const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer; - for (i = 0; i < srcInfo->nbSamples; i++) { - size_t compressedSize; - if(hasDict) { - compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict); - } else { - compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel); - } - if (ZSTD_isError(compressedSize)) { - cRatio = -1; - goto _cleanup; - } - totalCompressedSize += compressedSize; - } - - /* Sum original sizes */ - for (i = 0; i<srcInfo->nbSamples; i++) { - totalOriginalSize += srcInfo->samplesSizes[i]; - } - - /* Calculate compression ratio */ - DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize); - DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize); - cRatio = (double)totalOriginalSize/(double)totalCompressedSize; - -_cleanup: - free(dst); - free(offsets); - ZSTD_freeCCtx(cctx); - ZSTD_freeCDict(cdict); - return cRatio; -} - - -/** FreeDictInfo() : - * Free memory allocated for dictInfo - */ -void freeDictInfo(dictInfo* info) { - if (!info) return; - if (info->dictBuffer) free((void*)(info->dictBuffer)); - free(info); -} - - - -/*-******************************************************** - * Benchmarking functions -**********************************************************/ -/** benchmarkDictBuilder() : - * Measure how long a dictionary builder takes and compression ratio with the dictionary built - * @return 0 if benchmark successfully, 1 otherwise - */ -int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam, - ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam, - ZDICT_fastCover_params_t *fastParam) { - /* Local variables */ - const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel : - coverParam ? coverParam->zParams.notificationLevel : - legacyParam ? legacyParam->zParams.notificationLevel : - fastParam ? fastParam->zParams.notificationLevel: - DEFAULT_DISPLAYLEVEL; /* no dict */ - const char* name = randomParam ? "RANDOM" : - coverParam ? "COVER" : - legacyParam ? "LEGACY" : - fastParam ? "FAST": - "NODICT"; /* no dict */ - const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel : - coverParam ? coverParam->zParams.compressionLevel : - legacyParam ? legacyParam->zParams.compressionLevel : - fastParam ? fastParam->zParams.compressionLevel: - DEFAULT_CLEVEL; /* no dict */ - int result = 0; - - /* Calculate speed */ - const UTIL_time_t begin = UTIL_getTime(); - dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam); - const U64 timeMicro = UTIL_clockSpanMicro(begin); - const double timeSec = timeMicro / (double)SEC_TO_MICRO; - if (!dInfo) { - DISPLAYLEVEL(1, "%s does not train successfully\n", name); - result = 1; - goto _cleanup; - } - DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec); - - /* Calculate compression ratio */ - const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel); - if (cRatio < 0) { - DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name); - result = 1; - goto _cleanup; - - } - DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio); - -_cleanup: - freeDictInfo(dInfo); - return result; -} - - - -int main(int argCount, const char* argv[]) -{ - const int displayLevel = DEFAULT_DISPLAYLEVEL; - const char* programName = argv[0]; - int result = 0; - - /* Initialize arguments to default values */ - unsigned k = 200; - unsigned d = 8; - unsigned f; - unsigned accel; - unsigned i; - const unsigned cLevel = DEFAULT_CLEVEL; - const unsigned dictID = 0; - const unsigned maxDictSize = g_defaultMaxDictSize; - - /* Initialize table to store input files */ - const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); - unsigned filenameIdx = 0; - - char* fileNamesBuf = NULL; - unsigned fileNamesNb = filenameIdx; - const int followLinks = 0; - const char** extendedFileList = NULL; - - /* Parse arguments */ - for (i = 1; i < argCount; i++) { - const char* argument = argv[i]; - if (longCommandWArg(&argument, "in=")) { - filenameTable[filenameIdx] = argument; - filenameIdx++; - continue; - } - DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n"); - return 1; - } - - /* Get the list of all files recursively (because followLinks==0)*/ - extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, - &fileNamesNb, followLinks); - if (extendedFileList) { - unsigned u; - for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]); - free((void*)filenameTable); - filenameTable = extendedFileList; - filenameIdx = fileNamesNb; - } - - /* get sampleInfo */ - size_t blockSize = 0; - sampleInfo* srcInfo= getSampleInfo(filenameTable, - filenameIdx, blockSize, maxDictSize, displayLevel); - - /* set up zParams */ - ZDICT_params_t zParams; - zParams.compressionLevel = cLevel; - zParams.notificationLevel = displayLevel; - zParams.dictID = dictID; - - /* with no dict */ - { - const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL); - if(noDictResult) { - result = 1; - goto _cleanup; - } - } - - /* for random */ - { - ZDICT_random_params_t randomParam; - randomParam.zParams = zParams; - randomParam.k = k; - const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL); - DISPLAYLEVEL(2, "k=%u\n", randomParam.k); - if(randomResult) { - result = 1; - goto _cleanup; - } - } - - /* for legacy */ - { - ZDICT_legacy_params_t legacyParam; - legacyParam.zParams = zParams; - legacyParam.selectivityLevel = 9; - const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL); - DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel); - if(legacyResult) { - result = 1; - goto _cleanup; - } - } - - /* for cover */ - { - /* for cover (optimizing k and d) */ - ZDICT_cover_params_t coverParam; - memset(&coverParam, 0, sizeof(coverParam)); - coverParam.zParams = zParams; - coverParam.splitPoint = 1.0; - coverParam.steps = 40; - coverParam.nbThreads = 1; - const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL); - DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100)); - if(coverOptResult) { - result = 1; - goto _cleanup; - } - - /* for cover (with k and d provided) */ - const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL); - DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100)); - if(coverResult) { - result = 1; - goto _cleanup; - } - - } - - /* for fastCover */ - for (f = 15; f < 25; f++){ - DISPLAYLEVEL(2, "current f is %u\n", f); - for (accel = 1; accel < 11; accel++) { - DISPLAYLEVEL(2, "current accel is %u\n", accel); - /* for fastCover (optimizing k and d) */ - ZDICT_fastCover_params_t fastParam; - memset(&fastParam, 0, sizeof(fastParam)); - fastParam.zParams = zParams; - fastParam.f = f; - fastParam.steps = 40; - fastParam.nbThreads = 1; - fastParam.accel = accel; - const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); - DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel); - if(fastOptResult) { - result = 1; - goto _cleanup; - } - - /* for fastCover (with k and d provided) */ - for (i = 0; i < 5; i++) { - const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); - DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel); - if(fastResult) { - result = 1; - goto _cleanup; - } - } - } - } - - - /* Free allocated memory */ -_cleanup: - UTIL_freeFileList(extendedFileList, fileNamesBuf); - freeSampleInfo(srcInfo); - return result; -} diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h b/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h deleted file mode 100644 index 781ec8c2f39e1..0000000000000 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h +++ /dev/null @@ -1,6 +0,0 @@ -/* ZDICT_trainFromBuffer_legacy() : - * issue : samplesBuffer need to be followed by a noisy guard band. - * work around : duplicate the buffer, and add the noise */ -size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_legacy_params_t params); diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh b/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh deleted file mode 100755 index 5eaf5930a3c63..0000000000000 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh +++ /dev/null @@ -1,2 +0,0 @@ -echo "Benchmark with in=../../lib/common" -./benchmark in=../../../lib/common diff --git a/contrib/experimental_dict_builders/fastCover/Makefile b/contrib/experimental_dict_builders/fastCover/Makefile deleted file mode 100644 index 3ba24790ce01d..0000000000000 --- a/contrib/experimental_dict_builders/fastCover/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -ARG := - -CC ?= gcc -CFLAGS ?= -O3 -g -INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder - -IO_FILE := ../randomDictBuilder/io.c - -TEST_INPUT := ../../../lib -TEST_OUTPUT := fastCoverDict - -all: main run clean - -.PHONY: test -test: main testrun testshell clean - -.PHONY: run -run: - echo "Building a fastCover dictionary with given arguments" - ./main $(ARG) - -main: main.o io.o fastCover.o libzstd.a - $(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main - -main.o: main.c - $(CC) $(CFLAGS) $(INCLUDES) -c main.c - -fastCover.o: fastCover.c - $(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c - -io.o: $(IO_FILE) - $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) - -libzstd.a: - $(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a - mv ../../../lib/libzstd.a . - -.PHONY: testrun -testrun: main - echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) " - ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT) - zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q - rm -f $(TEST_OUTPUT) - -.PHONY: testshell -testshell: test.sh - sh test.sh - echo "Finish running test.sh" - -.PHONY: clean -clean: - rm -f *.o main libzstd.a - $(MAKE) -C ../../../lib clean - echo "Cleaning is completed" diff --git a/contrib/experimental_dict_builders/fastCover/README.md b/contrib/experimental_dict_builders/fastCover/README.md deleted file mode 100644 index ad377743f2a71..0000000000000 --- a/contrib/experimental_dict_builders/fastCover/README.md +++ /dev/null @@ -1,24 +0,0 @@ -FastCover Dictionary Builder - -### Permitted Arguments: -Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in=" -Output Dictionary (out=dictName): if not provided, default to fastCoverDict -Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0 -Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB -Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200 -Size of Dmer (d=#): either 6 or 8; if not provided, default to 8 -Number of steps (steps=#): positive number, if not provided, default to 32 -Percentage of samples used for training(split=#): positive number; if not provided, default to 100 - - -###Running Test: -make test - - -###Usage: -To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments -If k or d is not provided, the optimize version of FASTCOVER is run. - -### Examples: -make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520" -make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c deleted file mode 100644 index 0a338bde2b203..0000000000000 --- a/contrib/experimental_dict_builders/fastCover/fastCover.c +++ /dev/null @@ -1,809 +0,0 @@ -/*-************************************* -* Dependencies -***************************************/ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* memset */ -#include <time.h> /* clock */ -#include "mem.h" /* read */ -#include "pool.h" -#include "threading.h" -#include "fastCover.h" -#include "zstd_internal.h" /* includes zstd.h */ -#include "zdict.h" - - -/*-************************************* -* Constants -***************************************/ -#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) -#define FASTCOVER_MAX_F 32 -#define DEFAULT_SPLITPOINT 1.0 - -/*-************************************* -* Console display -***************************************/ -static int g_displayLevel = 2; -#define DISPLAY(...) \ - { \ - fprintf(stderr, __VA_ARGS__); \ - fflush(stderr); \ - } -#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ - if (displayLevel >= l) { \ - DISPLAY(__VA_ARGS__); \ - } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ -#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) - -#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ - if (displayLevel >= l) { \ - if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ - g_time = clock(); \ - DISPLAY(__VA_ARGS__); \ - } \ - } -#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) -static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; -static clock_t g_time = 0; - - -/*-************************************* -* Hash Functions -***************************************/ -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } - - -/** - * Hash the d-byte value pointed to by p and mod 2^f - */ -static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) { - if (d == 6) { - return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1); - } - return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1); -} - - -/*-************************************* -* Context -***************************************/ -typedef struct { - const BYTE *samples; - size_t *offsets; - const size_t *samplesSizes; - size_t nbSamples; - size_t nbTrainSamples; - size_t nbTestSamples; - size_t nbDmers; - U32 *freqs; - U16 *segmentFreqs; - unsigned d; -} FASTCOVER_ctx_t; - - -/*-************************************* -* Helper functions -***************************************/ -/** - * Returns the sum of the sample sizes. - */ -static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) { - size_t sum = 0; - unsigned i; - for (i = 0; i < nbSamples; ++i) { - sum += samplesSizes[i]; - } - return sum; -} - - -/*-************************************* -* fast functions -***************************************/ -/** - * A segment is a range in the source as well as the score of the segment. - */ -typedef struct { - U32 begin; - U32 end; - U32 score; -} FASTCOVER_segment_t; - - -/** - * Selects the best segment in an epoch. - * Segments of are scored according to the function: - * - * Let F(d) be the frequency of all dmers with hash value d. - * Let S_i be hash value of the dmer at position i of segment S which has length k. - * - * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) - * - * Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2. - */ -static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, - U32 *freqs, U32 begin,U32 end, - ZDICT_fastCover_params_t parameters) { - /* Constants */ - const U32 k = parameters.k; - const U32 d = parameters.d; - const U32 dmersInK = k - d + 1; - /* Try each segment (activeSegment) and save the best (bestSegment) */ - FASTCOVER_segment_t bestSegment = {0, 0, 0}; - FASTCOVER_segment_t activeSegment; - /* Reset the activeDmers in the segment */ - /* The activeSegment starts at the beginning of the epoch. */ - activeSegment.begin = begin; - activeSegment.end = begin; - activeSegment.score = 0; - { - /* Slide the activeSegment through the whole epoch. - * Save the best segment in bestSegment. - */ - while (activeSegment.end < end) { - /* Get hash value of current dmer */ - const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d); - /* Add frequency of this index to score if this is the first occurrence of index in active segment */ - if (ctx->segmentFreqs[index] == 0) { - activeSegment.score += freqs[index]; - } - ctx->segmentFreqs[index] += 1; - /* Increment end of segment */ - activeSegment.end += 1; - /* If the window is now too large, drop the first position */ - if (activeSegment.end - activeSegment.begin == dmersInK + 1) { - /* Get hash value of the dmer to be eliminated from active segment */ - const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d); - ctx->segmentFreqs[delIndex] -= 1; - /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ - if (ctx->segmentFreqs[delIndex] == 0) { - activeSegment.score -= freqs[delIndex]; - } - /* Increment start of segment */ - activeSegment.begin += 1; - } - /* If this segment is the best so far save it */ - if (activeSegment.score > bestSegment.score) { - bestSegment = activeSegment; - } - } - /* Zero out rest of segmentFreqs array */ - while (activeSegment.begin < end) { - const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d); - ctx->segmentFreqs[delIndex] -= 1; - activeSegment.begin += 1; - } - } - { - /* Trim off the zero frequency head and tail from the segment. */ - U32 newBegin = bestSegment.end; - U32 newEnd = bestSegment.begin; - U32 pos; - for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { - const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d); - U32 freq = freqs[index]; - if (freq != 0) { - newBegin = MIN(newBegin, pos); - newEnd = pos + 1; - } - } - bestSegment.begin = newBegin; - bestSegment.end = newEnd; - } - { - /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ - U32 pos; - for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { - const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d); - freqs[i] = 0; - } - } - return bestSegment; -} - -/** - * Check the validity of the parameters. - * Returns non-zero if the parameters are valid and 0 otherwise. - */ -static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters, - size_t maxDictSize) { - /* k, d, and f are required parameters */ - if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) { - return 0; - } - /* d has to be 6 or 8 */ - if (parameters.d != 6 && parameters.d != 8) { - return 0; - } - /* 0 < f <= FASTCOVER_MAX_F */ - if (parameters.f > FASTCOVER_MAX_F) { - return 0; - } - /* k <= maxDictSize */ - if (parameters.k > maxDictSize) { - return 0; - } - /* d <= k */ - if (parameters.d > parameters.k) { - return 0; - } - /* 0 < splitPoint <= 1 */ - if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { - return 0; - } - return 1; -} - - -/** - * Clean up a context initialized with `FASTCOVER_ctx_init()`. - */ -static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) { - if (!ctx) { - return; - } - if (ctx->segmentFreqs) { - free(ctx->segmentFreqs); - ctx->segmentFreqs = NULL; - } - if (ctx->freqs) { - free(ctx->freqs); - ctx->freqs = NULL; - } - if (ctx->offsets) { - free(ctx->offsets); - ctx->offsets = NULL; - } -} - -/** - * Calculate for frequency of hash value of each dmer in ctx->samples - */ -static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){ - size_t start; /* start of current dmer */ - for (unsigned i = 0; i < ctx->nbTrainSamples; i++) { - size_t currSampleStart = ctx->offsets[i]; - size_t currSampleEnd = ctx->offsets[i+1]; - start = currSampleStart; - while (start + ctx->d <= currSampleEnd) { - const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d); - freqs[dmerIndex]++; - start++; - } - } -} - -/** - * Prepare a context for dictionary building. - * The context is only dependent on the parameter `d` and can used multiple - * times. - * Returns 1 on success or zero on error. - * The context must be destroyed with `FASTCOVER_ctx_destroy()`. - */ -static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, - unsigned d, double splitPoint, unsigned f) { - const BYTE *const samples = (const BYTE *)samplesBuffer; - const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples); - /* Split samples into testing and training sets */ - const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; - const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; - const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; - const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; - /* Checks */ - if (totalSamplesSize < MAX(d, sizeof(U64)) || - totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { - DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", - (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); - return 0; - } - /* Check if there are at least 5 training samples */ - if (nbTrainSamples < 5) { - DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); - return 0; - } - /* Check if there's testing sample */ - if (nbTestSamples < 1) { - DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); - return 0; - } - /* Zero the context */ - memset(ctx, 0, sizeof(*ctx)); - DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, - (U32)trainingSamplesSize); - DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, - (U32)testSamplesSize); - - ctx->samples = samples; - ctx->samplesSizes = samplesSizes; - ctx->nbSamples = nbSamples; - ctx->nbTrainSamples = nbTrainSamples; - ctx->nbTestSamples = nbTestSamples; - ctx->nbDmers = trainingSamplesSize - d + 1; - ctx->d = d; - - /* The offsets of each file */ - ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); - if (!ctx->offsets) { - DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); - FASTCOVER_ctx_destroy(ctx); - return 0; - } - - /* Fill offsets from the samplesSizes */ - { - U32 i; - ctx->offsets[0] = 0; - for (i = 1; i <= nbSamples; ++i) { - ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; - } - } - - /* Initialize frequency array of size 2^f */ - ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32)); - ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16)); - DISPLAYLEVEL(2, "Computing frequencies\n"); - FASTCOVER_computeFrequency(ctx->freqs, f, ctx); - - return 1; -} - - -/** - * Given the prepared context build the dictionary. - */ -static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs, - void *dictBuffer, - size_t dictBufferCapacity, - ZDICT_fastCover_params_t parameters){ - BYTE *const dict = (BYTE *)dictBuffer; - size_t tail = dictBufferCapacity; - /* Divide the data up into epochs of equal size. - * We will select at least one segment from each epoch. - */ - const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k)); - const U32 epochSize = (U32)(ctx->nbDmers / epochs); - size_t epoch; - DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs, - epochSize); - /* Loop through the epochs until there are no more segments or the dictionary - * is full. - */ - for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { - const U32 epochBegin = (U32)(epoch * epochSize); - const U32 epochEnd = epochBegin + epochSize; - size_t segmentSize; - /* Select a segment */ - FASTCOVER_segment_t segment = FASTCOVER_selectSegment( - ctx, freqs, epochBegin, epochEnd, parameters); - - /* If the segment covers no dmers, then we are out of content */ - if (segment.score == 0) { - break; - } - - /* Trim the segment if necessary and if it is too small then we are done */ - segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); - if (segmentSize < parameters.d) { - break; - } - - /* We fill the dictionary from the back to allow the best segments to be - * referenced with the smallest offsets. - */ - tail -= segmentSize; - memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); - DISPLAYUPDATE( - 2, "\r%u%% ", - (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); - } - DISPLAYLEVEL(2, "\r%79s\r", ""); - return tail; -} - - -/** - * FASTCOVER_best_t is used for two purposes: - * 1. Synchronizing threads. - * 2. Saving the best parameters and dictionary. - * - * All of the methods except FASTCOVER_best_init() are thread safe if zstd is - * compiled with multithreaded support. - */ -typedef struct fast_best_s { - ZSTD_pthread_mutex_t mutex; - ZSTD_pthread_cond_t cond; - size_t liveJobs; - void *dict; - size_t dictSize; - ZDICT_fastCover_params_t parameters; - size_t compressedSize; -} FASTCOVER_best_t; - -/** - * Initialize the `FASTCOVER_best_t`. - */ -static void FASTCOVER_best_init(FASTCOVER_best_t *best) { - if (best==NULL) return; /* compatible with init on NULL */ - (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); - (void)ZSTD_pthread_cond_init(&best->cond, NULL); - best->liveJobs = 0; - best->dict = NULL; - best->dictSize = 0; - best->compressedSize = (size_t)-1; - memset(&best->parameters, 0, sizeof(best->parameters)); -} - -/** - * Wait until liveJobs == 0. - */ -static void FASTCOVER_best_wait(FASTCOVER_best_t *best) { - if (!best) { - return; - } - ZSTD_pthread_mutex_lock(&best->mutex); - while (best->liveJobs != 0) { - ZSTD_pthread_cond_wait(&best->cond, &best->mutex); - } - ZSTD_pthread_mutex_unlock(&best->mutex); -} - -/** - * Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t. - */ -static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) { - if (!best) { - return; - } - FASTCOVER_best_wait(best); - if (best->dict) { - free(best->dict); - } - ZSTD_pthread_mutex_destroy(&best->mutex); - ZSTD_pthread_cond_destroy(&best->cond); -} - -/** - * Called when a thread is about to be launched. - * Increments liveJobs. - */ -static void FASTCOVER_best_start(FASTCOVER_best_t *best) { - if (!best) { - return; - } - ZSTD_pthread_mutex_lock(&best->mutex); - ++best->liveJobs; - ZSTD_pthread_mutex_unlock(&best->mutex); -} - -/** - * Called when a thread finishes executing, both on error or success. - * Decrements liveJobs and signals any waiting threads if liveJobs == 0. - * If this dictionary is the best so far save it and its parameters. - */ -static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize, - ZDICT_fastCover_params_t parameters, void *dict, - size_t dictSize) { - if (!best) { - return; - } - { - size_t liveJobs; - ZSTD_pthread_mutex_lock(&best->mutex); - --best->liveJobs; - liveJobs = best->liveJobs; - /* If the new dictionary is better */ - if (compressedSize < best->compressedSize) { - /* Allocate space if necessary */ - if (!best->dict || best->dictSize < dictSize) { - if (best->dict) { - free(best->dict); - } - best->dict = malloc(dictSize); - if (!best->dict) { - best->compressedSize = ERROR(GENERIC); - best->dictSize = 0; - return; - } - } - /* Save the dictionary, parameters, and size */ - memcpy(best->dict, dict, dictSize); - best->dictSize = dictSize; - best->parameters = parameters; - best->compressedSize = compressedSize; - } - ZSTD_pthread_mutex_unlock(&best->mutex); - if (liveJobs == 0) { - ZSTD_pthread_cond_broadcast(&best->cond); - } - } -} - -/** - * Parameters for FASTCOVER_tryParameters(). - */ -typedef struct FASTCOVER_tryParameters_data_s { - const FASTCOVER_ctx_t *ctx; - FASTCOVER_best_t *best; - size_t dictBufferCapacity; - ZDICT_fastCover_params_t parameters; -} FASTCOVER_tryParameters_data_t; - -/** - * Tries a set of parameters and updates the FASTCOVER_best_t with the results. - * This function is thread safe if zstd is compiled with multithreaded support. - * It takes its parameters as an *OWNING* opaque pointer to support threading. - */ -static void FASTCOVER_tryParameters(void *opaque) { - /* Save parameters as local variables */ - FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque; - const FASTCOVER_ctx_t *const ctx = data->ctx; - const ZDICT_fastCover_params_t parameters = data->parameters; - size_t dictBufferCapacity = data->dictBufferCapacity; - size_t totalCompressedSize = ERROR(GENERIC); - /* Allocate space for hash table, dict, and freqs */ - BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); - U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32)); - if (!dict || !freqs) { - DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); - goto _cleanup; - } - /* Copy the frequencies because we need to modify them */ - memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32)); - /* Build the dictionary */ - { - const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, - dictBufferCapacity, parameters); - - dictBufferCapacity = ZDICT_finalizeDictionary( - dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, - parameters.zParams); - if (ZDICT_isError(dictBufferCapacity)) { - DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); - goto _cleanup; - } - } - /* Check total compressed size */ - { - /* Pointers */ - ZSTD_CCtx *cctx; - ZSTD_CDict *cdict; - void *dst; - /* Local variables */ - size_t dstCapacity; - size_t i; - /* Allocate dst with enough space to compress the maximum sized sample */ - { - size_t maxSampleSize = 0; - i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0; - for (; i < ctx->nbSamples; ++i) { - maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize); - } - dstCapacity = ZSTD_compressBound(maxSampleSize); - dst = malloc(dstCapacity); - } - /* Create the cctx and cdict */ - cctx = ZSTD_createCCtx(); - cdict = ZSTD_createCDict(dict, dictBufferCapacity, - parameters.zParams.compressionLevel); - if (!dst || !cctx || !cdict) { - goto _compressCleanup; - } - /* Compress each sample and sum their sizes (or error) */ - totalCompressedSize = dictBufferCapacity; - i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0; - for (; i < ctx->nbSamples; ++i) { - const size_t size = ZSTD_compress_usingCDict( - cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], - ctx->samplesSizes[i], cdict); - if (ZSTD_isError(size)) { - totalCompressedSize = ERROR(GENERIC); - goto _compressCleanup; - } - totalCompressedSize += size; - } - _compressCleanup: - ZSTD_freeCCtx(cctx); - ZSTD_freeCDict(cdict); - if (dst) { - free(dst); - } - } - -_cleanup: - FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict, - dictBufferCapacity); - free(data); - if (dict) { - free(dict); - } - if (freqs) { - free(freqs); - } -} - -ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) { - BYTE* const dict = (BYTE*)dictBuffer; - FASTCOVER_ctx_t ctx; - parameters.splitPoint = 1.0; - /* Initialize global data */ - g_displayLevel = parameters.zParams.notificationLevel; - /* Checks */ - if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) { - DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); - return ERROR(GENERIC); - } - if (nbSamples == 0) { - DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); - return ERROR(GENERIC); - } - if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { - DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", - ZDICT_DICTSIZE_MIN); - return ERROR(dstSize_tooSmall); - } - /* Initialize context */ - if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, - parameters.d, parameters.splitPoint, parameters.f)) { - DISPLAYLEVEL(1, "Failed to initialize context\n"); - return ERROR(GENERIC); - } - /* Build the dictionary */ - DISPLAYLEVEL(2, "Building dictionary\n"); - { - const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, - dictBufferCapacity, parameters); - - const size_t dictionarySize = ZDICT_finalizeDictionary( - dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples, - parameters.zParams); - if (!ZSTD_isError(dictionarySize)) { - DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", - (U32)dictionarySize); - } - FASTCOVER_ctx_destroy(&ctx); - return dictionarySize; - } -} - - - -ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, - ZDICT_fastCover_params_t *parameters) { - /* constants */ - const unsigned nbThreads = parameters->nbThreads; - const double splitPoint = - parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; - const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; - const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; - const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; - const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; - const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; - const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); - const unsigned kIterations = - (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); - const unsigned f = parameters->f == 0 ? 23 : parameters->f; - - /* Local variables */ - const int displayLevel = parameters->zParams.notificationLevel; - unsigned iteration = 1; - unsigned d; - unsigned k; - FASTCOVER_best_t best; - POOL_ctx *pool = NULL; - - /* Checks */ - if (splitPoint <= 0 || splitPoint > 1) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); - return ERROR(GENERIC); - } - if (kMinK < kMaxD || kMaxK < kMinK) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); - return ERROR(GENERIC); - } - if (nbSamples == 0) { - DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); - return ERROR(GENERIC); - } - if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { - DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", - ZDICT_DICTSIZE_MIN); - return ERROR(dstSize_tooSmall); - } - if (nbThreads > 1) { - pool = POOL_create(nbThreads, 1); - if (!pool) { - return ERROR(memory_allocation); - } - } - /* Initialization */ - FASTCOVER_best_init(&best); - /* Turn down global display level to clean up display at level 2 and below */ - g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; - /* Loop through d first because each new value needs a new context */ - LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", - kIterations); - for (d = kMinD; d <= kMaxD; d += 2) { - /* Initialize the context for this value of d */ - FASTCOVER_ctx_t ctx; - LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - FASTCOVER_best_destroy(&best); - POOL_free(pool); - return ERROR(GENERIC); - } - /* Loop through k reusing the same context */ - for (k = kMinK; k <= kMaxK; k += kStepSize) { - /* Prepare the arguments */ - FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( - sizeof(FASTCOVER_tryParameters_data_t)); - LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); - if (!data) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); - FASTCOVER_best_destroy(&best); - FASTCOVER_ctx_destroy(&ctx); - POOL_free(pool); - return ERROR(GENERIC); - } - data->ctx = &ctx; - data->best = &best; - data->dictBufferCapacity = dictBufferCapacity; - data->parameters = *parameters; - data->parameters.k = k; - data->parameters.d = d; - data->parameters.f = f; - data->parameters.splitPoint = splitPoint; - data->parameters.steps = kSteps; - data->parameters.zParams.notificationLevel = g_displayLevel; - /* Check the parameters */ - if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) { - DISPLAYLEVEL(1, "fastCover parameters incorrect\n"); - free(data); - continue; - } - /* Call the function and pass ownership of data to it */ - FASTCOVER_best_start(&best); - if (pool) { - POOL_add(pool, &FASTCOVER_tryParameters, data); - } else { - FASTCOVER_tryParameters(data); - } - /* Print status */ - LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", - (U32)((iteration * 100) / kIterations)); - ++iteration; - } - FASTCOVER_best_wait(&best); - FASTCOVER_ctx_destroy(&ctx); - } - LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); - /* Fill the output buffer and parameters with output of the best parameters */ - { - const size_t dictSize = best.dictSize; - if (ZSTD_isError(best.compressedSize)) { - const size_t compressedSize = best.compressedSize; - FASTCOVER_best_destroy(&best); - POOL_free(pool); - return compressedSize; - } - *parameters = best.parameters; - memcpy(dictBuffer, best.dict, dictSize); - FASTCOVER_best_destroy(&best); - POOL_free(pool); - return dictSize; - } - -} diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.h b/contrib/experimental_dict_builders/fastCover/fastCover.h deleted file mode 100644 index 958e9f4239308..0000000000000 --- a/contrib/experimental_dict_builders/fastCover/fastCover.h +++ /dev/null @@ -1,57 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* memset */ -#include <time.h> /* clock */ -#include "mem.h" /* read */ -#include "pool.h" -#include "threading.h" -#include "zstd_internal.h" /* includes zstd.h */ -#ifndef ZDICT_STATIC_LINKING_ONLY -#define ZDICT_STATIC_LINKING_ONLY -#endif -#include "zdict.h" - - -typedef struct { - unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ - unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ - unsigned f; /* log of size of frequency array */ - unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ - unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ - double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ - ZDICT_params_t zParams; -} ZDICT_fastCover_params_t; - - -/*! ZDICT_optimizeTrainFromBuffer_fastCover(): - * Train a dictionary from an array of samples using a modified version of the COVER algorithm. - * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - * The resulting dictionary will be saved into `dictBuffer`. - * All of the parameters except for f are optional. - * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. - * if steps is zero it defaults to its default value. - * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. - * - * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - * On success `*parameters` contains the parameters selected. - */ - ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, - ZDICT_fastCover_params_t *parameters); - - -/*! ZDICT_trainFromBuffer_fastCover(): - * Train a dictionary from an array of samples using a modified version of the COVER algorithm. - * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - * The resulting dictionary will be saved into `dictBuffer`. - * d, k, and f are required. - * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - */ -ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters); diff --git a/contrib/experimental_dict_builders/fastCover/main.c b/contrib/experimental_dict_builders/fastCover/main.c deleted file mode 100644 index df7d91812e295..0000000000000 --- a/contrib/experimental_dict_builders/fastCover/main.c +++ /dev/null @@ -1,183 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* strcmp, strlen */ -#include <errno.h> /* errno */ -#include <ctype.h> -#include "fastCover.h" -#include "io.h" -#include "util.h" -#include "zdict.h" - - -/*-************************************* -* Console display -***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - -static const U64 g_refreshRate = SEC_TO_MICRO / 6; -static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; - -#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \ - if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \ - { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ - if (displayLevel>=4) fflush(stderr); } } } - - -/*-************************************* -* Exceptions -***************************************/ -#ifndef DEBUG -# define DEBUG 0 -#endif -#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); -#define EXM_THROW(error, ...) \ -{ \ - DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ - DISPLAY("Error %i : ", error); \ - DISPLAY(__VA_ARGS__); \ - DISPLAY("\n"); \ - exit(error); \ -} - - -/*-************************************* -* Constants -***************************************/ -static const unsigned g_defaultMaxDictSize = 110 KB; -#define DEFAULT_CLEVEL 3 - - -/*-************************************* -* FASTCOVER -***************************************/ -int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info, - unsigned maxDictSize, - ZDICT_fastCover_params_t *params) { - unsigned const displayLevel = params->zParams.notificationLevel; - void* const dictBuffer = malloc(maxDictSize); - - int result = 0; - - /* Checks */ - if (!dictBuffer) - EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */ - - { size_t dictSize; - /* Run the optimize version if either k or d is not provided */ - if (!params->d || !params->k) { - dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, params); - } else { - dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, *params); - } - DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100)); - if (ZDICT_isError(dictSize)) { - DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ - result = 1; - goto _done; - } - /* save dict */ - DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName); - saveDict(dictFileName, dictBuffer, dictSize); - } - - /* clean up */ -_done: - free(dictBuffer); - return result; -} - - - -int main(int argCount, const char* argv[]) -{ - int displayLevel = 2; - const char* programName = argv[0]; - int operationResult = 0; - - /* Initialize arguments to default values */ - unsigned k = 0; - unsigned d = 0; - unsigned f = 23; - unsigned steps = 32; - unsigned nbThreads = 1; - unsigned split = 100; - const char* outputFile = "fastCoverDict"; - unsigned dictID = 0; - unsigned maxDictSize = g_defaultMaxDictSize; - - /* Initialize table to store input files */ - const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); - unsigned filenameIdx = 0; - - char* fileNamesBuf = NULL; - unsigned fileNamesNb = filenameIdx; - int followLinks = 0; /* follow directory recursively */ - const char** extendedFileList = NULL; - - /* Parse arguments */ - for (int i = 1; i < argCount; i++) { - const char* argument = argv[i]; - if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "in=")) { - filenameTable[filenameIdx] = argument; - filenameIdx++; - continue; - } - if (longCommandWArg(&argument, "out=")) { - outputFile = argument; - continue; - } - DISPLAYLEVEL(1, "Incorrect parameters\n"); - operationResult = 1; - return operationResult; - } - - /* Get the list of all files recursively (because followLinks==0)*/ - extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, - &fileNamesNb, followLinks); - if (extendedFileList) { - unsigned u; - for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]); - free((void*)filenameTable); - filenameTable = extendedFileList; - filenameIdx = fileNamesNb; - } - - size_t blockSize = 0; - - /* Set up zParams */ - ZDICT_params_t zParams; - zParams.compressionLevel = DEFAULT_CLEVEL; - zParams.notificationLevel = displayLevel; - zParams.dictID = dictID; - - /* Set up fastCover params */ - ZDICT_fastCover_params_t params; - params.zParams = zParams; - params.k = k; - params.d = d; - params.f = f; - params.steps = steps; - params.nbThreads = nbThreads; - params.splitPoint = (double)split/100; - - /* Build dictionary */ - sampleInfo* info = getSampleInfo(filenameTable, - filenameIdx, blockSize, maxDictSize, zParams.notificationLevel); - operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, ¶ms); - - /* Free allocated memory */ - UTIL_freeFileList(extendedFileList, fileNamesBuf); - freeSampleInfo(info); - - return operationResult; -} diff --git a/contrib/experimental_dict_builders/fastCover/test.sh b/contrib/experimental_dict_builders/fastCover/test.sh deleted file mode 100755 index f86915b59fc5b..0000000000000 --- a/contrib/experimental_dict_builders/fastCover/test.sh +++ /dev/null @@ -1,15 +0,0 @@ -echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1" -./main in=../../../lib/common f=20 out=dict1 -zstd -be3 -D dict1 -r ../../../lib/common -q -echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000" -./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000 -zstd -be3 -D dict2 -r ../../../lib/common -q -echo "Building fastCover dictionary with 2 sample sources" -./main in=../../../lib/common in=../../../lib/compress out=dict3 -zstd -be3 -D dict3 -r ../../../lib/common -q -echo "Removing dict1 dict2 dict3" -rm -f dict1 dict2 dict3 - -echo "Testing with invalid parameters, should fail" -! ./main in=../../../lib/common r=10 -! ./main in=../../../lib/common d=10 diff --git a/contrib/experimental_dict_builders/randomDictBuilder/Makefile b/contrib/experimental_dict_builders/randomDictBuilder/Makefile deleted file mode 100644 index bbd40e47c3126..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -ARG := - -CC ?= gcc -CFLAGS ?= -O3 -INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder - -TEST_INPUT := ../../../lib -TEST_OUTPUT := randomDict - -all: main run clean - -.PHONY: test -test: main testrun testshell clean - -.PHONY: run -run: - echo "Building a random dictionary with given arguments" - ./main $(ARG) - -main: main.o io.o random.o libzstd.a - $(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main - -main.o: main.c - $(CC) $(CFLAGS) $(INCLUDES) -c main.c - -random.o: random.c - $(CC) $(CFLAGS) $(INCLUDES) -c random.c - -io.o: io.c - $(CC) $(CFLAGS) $(INCLUDES) -c io.c - -libzstd.a: - $(MAKE) -C ../../../lib libzstd.a - mv ../../../lib/libzstd.a . - -.PHONY: testrun -testrun: main - echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) " - ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT) - zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q - rm -f $(TEST_OUTPUT) - -.PHONY: testshell -testshell: test.sh - sh test.sh - echo "Finish running test.sh" - -.PHONY: clean -clean: - rm -f *.o main libzstd.a - $(MAKE) -C ../../../lib clean - echo "Cleaning is completed" diff --git a/contrib/experimental_dict_builders/randomDictBuilder/README.md b/contrib/experimental_dict_builders/randomDictBuilder/README.md deleted file mode 100644 index da12a4280541c..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/README.md +++ /dev/null @@ -1,20 +0,0 @@ -Random Dictionary Builder - -### Permitted Arguments: -Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in=" -Output Dictionary (out=dictName): if not provided, default to defaultDict -Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0 -Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB -Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200 - -###Running Test: -make test - - -###Usage: -To build a random dictionary with the provided arguments: make ARG= followed by arguments - - -### Examples: -make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520" -make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" diff --git a/contrib/experimental_dict_builders/randomDictBuilder/io.c b/contrib/experimental_dict_builders/randomDictBuilder/io.c deleted file mode 100644 index bfe39eaed6b1e..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/io.c +++ /dev/null @@ -1,284 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* strcmp, strlen */ -#include <errno.h> /* errno */ -#include <ctype.h> -#include "io.h" -#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ -#include "platform.h" /* Large Files support */ -#include "util.h" -#include "zdict.h" - -/*-************************************* -* Console display -***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - -static const U64 g_refreshRate = SEC_TO_MICRO / 6; -static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; - -#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \ - if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \ - { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ - if (displayLevel>=4) fflush(stderr); } } } - -/*-************************************* -* Exceptions -***************************************/ -#ifndef DEBUG -# define DEBUG 0 -#endif -#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); -#define EXM_THROW(error, ...) \ -{ \ - DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ - DISPLAY("Error %i : ", error); \ - DISPLAY(__VA_ARGS__); \ - DISPLAY("\n"); \ - exit(error); \ -} - - -/*-************************************* -* Constants -***************************************/ - -#define SAMPLESIZE_MAX (128 KB) -#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) -#define RANDOM_MEMMULT 9 -static const size_t g_maxMemory = (sizeof(size_t) == 4) ? - (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t)); - -#define NOISELENGTH 32 - - -/*-************************************* -* Commandline related functions -***************************************/ -unsigned readU32FromChar(const char** stringPtr){ - const char errorMsg[] = "error: numeric value too large"; - unsigned result = 0; - while ((**stringPtr >='0') && (**stringPtr <='9')) { - unsigned const max = (((unsigned)(-1)) / 10) - 1; - if (result > max) exit(1); - result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; - } - if ((**stringPtr=='K') || (**stringPtr=='M')) { - unsigned const maxK = ((unsigned)(-1)) >> 10; - if (result > maxK) exit(1); - result <<= 10; - if (**stringPtr=='M') { - if (result > maxK) exit(1); - result <<= 10; - } - (*stringPtr)++; /* skip `K` or `M` */ - if (**stringPtr=='i') (*stringPtr)++; - if (**stringPtr=='B') (*stringPtr)++; - } - return result; -} - -unsigned longCommandWArg(const char** stringPtr, const char* longCommand){ - size_t const comSize = strlen(longCommand); - int const result = !strncmp(*stringPtr, longCommand, comSize); - if (result) *stringPtr += comSize; - return result; -} - - -/* ******************************************************** -* File related operations -**********************************************************/ -/** loadFiles() : - * load samples from files listed in fileNamesTable into buffer. - * works even if buffer is too small to load all samples. - * Also provides the size of each sample into sampleSizes table - * which must be sized correctly, using DiB_fileStats(). - * @return : nb of samples effectively loaded into `buffer` - * *bufferSizePtr is modified, it provides the amount data loaded within buffer. - * sampleSizes is filled with the size of each sample. - */ -static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes, - unsigned sstSize, const char** fileNamesTable, unsigned nbFiles, - size_t targetChunkSize, unsigned displayLevel) { - char* const buff = (char*)buffer; - size_t pos = 0; - unsigned nbLoadedChunks = 0, fileIndex; - - for (fileIndex=0; fileIndex<nbFiles; fileIndex++) { - const char* const fileName = fileNamesTable[fileIndex]; - unsigned long long const fs64 = UTIL_getFileSize(fileName); - unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64; - U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1; - U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64; - size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX); - U32 cnb; - FILE* const f = fopen(fileName, "rb"); - if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno)); - DISPLAYUPDATE(2, "Loading %s... \r", fileName); - for (cnb=0; cnb<nbChunks; cnb++) { - size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad); - if (toLoad > *bufferSizePtr-pos) break; - { size_t const readSize = fread(buff+pos, 1, toLoad, f); - if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName); - pos += readSize; - sampleSizes[nbLoadedChunks++] = toLoad; - remainingToLoad -= targetChunkSize; - if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */ - fileIndex = nbFiles; /* stop there */ - break; - } - if (toLoad < targetChunkSize) { - fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR); - } } } - fclose(f); - } - DISPLAYLEVEL(2, "\r%79s\r", ""); - *bufferSizePtr = pos; - DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10)) - return nbLoadedChunks; -} - -#define rotl32(x,r) ((x << r) | (x >> (32 - r))) -static U32 getRand(U32* src) -{ - static const U32 prime1 = 2654435761U; - static const U32 prime2 = 2246822519U; - U32 rand32 = *src; - rand32 *= prime1; - rand32 ^= prime2; - rand32 = rotl32(rand32, 13); - *src = rand32; - return rand32 >> 5; -} - -/* shuffle() : - * shuffle a table of file names in a semi-random way - * It improves dictionary quality by reducing "locality" impact, so if sample set is very large, - * it will load random elements from it, instead of just the first ones. */ -static void shuffle(const char** fileNamesTable, unsigned nbFiles) { - U32 seed = 0xFD2FB528; - unsigned i; - for (i = nbFiles - 1; i > 0; --i) { - unsigned const j = getRand(&seed) % (i + 1); - const char* const tmp = fileNamesTable[j]; - fileNamesTable[j] = fileNamesTable[i]; - fileNamesTable[i] = tmp; - } -} - - -/*-******************************************************** -* Dictionary training functions -**********************************************************/ -size_t findMaxMem(unsigned long long requiredMem) { - size_t const step = 8 MB; - void* testmem = NULL; - - requiredMem = (((requiredMem >> 23) + 1) << 23); - requiredMem += step; - if (requiredMem > g_maxMemory) requiredMem = g_maxMemory; - - while (!testmem) { - testmem = malloc((size_t)requiredMem); - requiredMem -= step; - } - - free(testmem); - return (size_t)requiredMem; -} - -void saveDict(const char* dictFileName, - const void* buff, size_t buffSize) { - FILE* const f = fopen(dictFileName, "wb"); - if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName); - - { size_t const n = fwrite(buff, 1, buffSize, f); - if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) } - - { size_t const n = (size_t)fclose(f); - if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) } -} - -/*! getFileStats() : - * Given a list of files, and a chunkSize (0 == no chunk, whole files) - * provides the amount of data to be loaded and the resulting nb of samples. - * This is useful primarily for allocation purpose => sample buffer, and sample sizes table. - */ -static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles, - size_t chunkSize, unsigned displayLevel) { - fileStats fs; - unsigned n; - memset(&fs, 0, sizeof(fs)); - for (n=0; n<nbFiles; n++) { - U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]); - U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize; - U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1); - U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize; - size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX); - fs.totalSizeToLoad += cappedChunkSize * nbSamples; - fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX); - fs.nbSamples += nbSamples; - } - DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10)); - return fs; -} - - - - -sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, - unsigned maxDictSize, const unsigned displayLevel) { - fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel); - size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t)); - size_t const memMult = RANDOM_MEMMULT; - size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult; - size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad); - void* const srcBuffer = malloc(loadedSize+NOISELENGTH); - - /* Checks */ - if ((!sampleSizes) || (!srcBuffer)) - EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */ - if (fs.oneSampleTooLarge) { - DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n"); - DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n"); - DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX); - } - if (fs.nbSamples < 5) { - DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n"); - DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n"); - DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n"); - EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */ - } - if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) { - DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n"); - DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n"); - } - - /* init */ - if (loadedSize < fs.totalSizeToLoad) - DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20)); - - /* Load input buffer */ - DISPLAYLEVEL(3, "Shuffling input files\n"); - shuffle(fileNamesTable, nbFiles); - nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, - fileNamesTable, nbFiles, chunkSize, displayLevel); - - sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo)); - - info->nbSamples = fs.nbSamples; - info->samplesSizes = sampleSizes; - info->srcBuffer = srcBuffer; - - return info; -} - - -void freeSampleInfo(sampleInfo *info) { - if (!info) return; - if (info->samplesSizes) free((void*)(info->samplesSizes)); - if (info->srcBuffer) free((void*)(info->srcBuffer)); - free(info); -} diff --git a/contrib/experimental_dict_builders/randomDictBuilder/io.h b/contrib/experimental_dict_builders/randomDictBuilder/io.h deleted file mode 100644 index 0ee24604eed2c..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/io.h +++ /dev/null @@ -1,60 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* strcmp, strlen */ -#include <errno.h> /* errno */ -#include <ctype.h> -#include "zstd_internal.h" /* includes zstd.h */ -#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ -#include "platform.h" /* Large Files support */ -#include "util.h" -#include "zdict.h" - - -/*-************************************* -* Structs -***************************************/ -typedef struct { - U64 totalSizeToLoad; - unsigned oneSampleTooLarge; - unsigned nbSamples; -} fileStats; - -typedef struct { - const void* srcBuffer; - const size_t *samplesSizes; - size_t nbSamples; -}sampleInfo; - - - -/*! getSampleInfo(): - * Load from input files and add samples to buffer - * @return: a sampleInfo struct containing infomation about buffer where samples are stored, - * size of each sample, and total number of samples - */ -sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, - unsigned maxDictSize, const unsigned displayLevel); - - - -/*! freeSampleInfo(): - * Free memory allocated for info - */ -void freeSampleInfo(sampleInfo *info); - - - -/*! saveDict(): - * Save data stored on buff to dictFileName - */ -void saveDict(const char* dictFileName, const void* buff, size_t buffSize); - - -unsigned readU32FromChar(const char** stringPtr); - -/** longCommandWArg() : - * check if *stringPtr is the same as longCommand. - * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. - * @return 0 and doesn't modify *stringPtr otherwise. - */ -unsigned longCommandWArg(const char** stringPtr, const char* longCommand); diff --git a/contrib/experimental_dict_builders/randomDictBuilder/main.c b/contrib/experimental_dict_builders/randomDictBuilder/main.c deleted file mode 100644 index 3ad8857460908..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/main.c +++ /dev/null @@ -1,161 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* strcmp, strlen */ -#include <errno.h> /* errno */ -#include <ctype.h> -#include "random.h" -#include "io.h" -#include "util.h" -#include "zdict.h" - - -/*-************************************* -* Console display -***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - -static const U64 g_refreshRate = SEC_TO_MICRO / 6; -static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; - -#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \ - if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \ - { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ - if (displayLevel>=4) fflush(stderr); } } } - - -/*-************************************* -* Exceptions -***************************************/ -#ifndef DEBUG -# define DEBUG 0 -#endif -#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); -#define EXM_THROW(error, ...) \ -{ \ - DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ - DISPLAY("Error %i : ", error); \ - DISPLAY(__VA_ARGS__); \ - DISPLAY("\n"); \ - exit(error); \ -} - - -/*-************************************* -* Constants -***************************************/ -static const unsigned g_defaultMaxDictSize = 110 KB; -#define DEFAULT_CLEVEL 3 -#define DEFAULT_k 200 -#define DEFAULT_OUTPUTFILE "defaultDict" -#define DEFAULT_DICTID 0 - - - -/*-************************************* -* RANDOM -***************************************/ -int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info, - unsigned maxDictSize, - ZDICT_random_params_t *params) { - unsigned const displayLevel = params->zParams.notificationLevel; - void* const dictBuffer = malloc(maxDictSize); - - int result = 0; - - /* Checks */ - if (!dictBuffer) - EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */ - - { size_t dictSize; - dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer, - info->samplesSizes, info->nbSamples, *params); - DISPLAYLEVEL(2, "k=%u\n", params->k); - if (ZDICT_isError(dictSize)) { - DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ - result = 1; - goto _done; - } - /* save dict */ - DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName); - saveDict(dictFileName, dictBuffer, dictSize); - } - - /* clean up */ -_done: - free(dictBuffer); - return result; -} - - - -int main(int argCount, const char* argv[]) -{ - int displayLevel = 2; - const char* programName = argv[0]; - int operationResult = 0; - - /* Initialize arguments to default values */ - unsigned k = DEFAULT_k; - const char* outputFile = DEFAULT_OUTPUTFILE; - unsigned dictID = DEFAULT_DICTID; - unsigned maxDictSize = g_defaultMaxDictSize; - - /* Initialize table to store input files */ - const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); - unsigned filenameIdx = 0; - - /* Parse arguments */ - for (int i = 1; i < argCount; i++) { - const char* argument = argv[i]; - if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "in=")) { - filenameTable[filenameIdx] = argument; - filenameIdx++; - continue; - } - if (longCommandWArg(&argument, "out=")) { - outputFile = argument; - continue; - } - DISPLAYLEVEL(1, "Incorrect parameters\n"); - operationResult = 1; - return operationResult; - } - - char* fileNamesBuf = NULL; - unsigned fileNamesNb = filenameIdx; - int followLinks = 0; /* follow directory recursively */ - const char** extendedFileList = NULL; - extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, - &fileNamesNb, followLinks); - if (extendedFileList) { - unsigned u; - for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]); - free((void*)filenameTable); - filenameTable = extendedFileList; - filenameIdx = fileNamesNb; - } - - size_t blockSize = 0; - - ZDICT_random_params_t params; - ZDICT_params_t zParams; - zParams.compressionLevel = DEFAULT_CLEVEL; - zParams.notificationLevel = displayLevel; - zParams.dictID = dictID; - params.zParams = zParams; - params.k = k; - - sampleInfo* info = getSampleInfo(filenameTable, - filenameIdx, blockSize, maxDictSize, zParams.notificationLevel); - operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, ¶ms); - - /* Free allocated memory */ - UTIL_freeFileList(extendedFileList, fileNamesBuf); - freeSampleInfo(info); - - return operationResult; -} diff --git a/contrib/experimental_dict_builders/randomDictBuilder/random.c b/contrib/experimental_dict_builders/randomDictBuilder/random.c deleted file mode 100644 index 5276bea96a56f..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/random.c +++ /dev/null @@ -1,163 +0,0 @@ -/*-************************************* -* Dependencies -***************************************/ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* memset */ -#include <time.h> /* clock */ -#include "random.h" -#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */ -#ifndef ZDICT_STATIC_LINKING_ONLY -#define ZDICT_STATIC_LINKING_ONLY -#endif -#include "zdict.h" - -/*-************************************* -* Console display -***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - -#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ - if (displayLevel >= l) { \ - if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ - g_time = clock(); \ - DISPLAY(__VA_ARGS__); \ - } \ - } -#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__) -static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; -static clock_t g_time = 0; - - - -/* ******************************************************** -* Random Dictionary Builder -**********************************************************/ -/** - * Returns the sum of the sample sizes. - */ -static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) { - size_t sum = 0; - unsigned i; - for (i = 0; i < nbSamples; ++i) { - sum += samplesSizes[i]; - } - return sum; -} - - -/** - * A segment is an inclusive range in the source. - */ -typedef struct { - U32 begin; - U32 end; -} RANDOM_segment_t; - - -/** - * Selects a random segment from totalSamplesSize - k + 1 possible segments - */ -static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize, - ZDICT_random_params_t parameters) { - const U32 k = parameters.k; - RANDOM_segment_t segment; - unsigned index; - - /* Randomly generate a number from 0 to sampleSizes - k */ - index = rand()%(totalSamplesSize - k + 1); - - /* inclusive */ - segment.begin = index; - segment.end = index + k - 1; - - return segment; -} - - -/** - * Check the validity of the parameters. - * Returns non-zero if the parameters are valid and 0 otherwise. - */ -static int RANDOM_checkParameters(ZDICT_random_params_t parameters, - size_t maxDictSize) { - /* k is a required parameter */ - if (parameters.k == 0) { - return 0; - } - /* k <= maxDictSize */ - if (parameters.k > maxDictSize) { - return 0; - } - return 1; -} - - -/** - * Given the prepared context build the dictionary. - */ -static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples, - void *dictBuffer, size_t dictBufferCapacity, - ZDICT_random_params_t parameters) { - BYTE *const dict = (BYTE *)dictBuffer; - size_t tail = dictBufferCapacity; - const int displayLevel = parameters.zParams.notificationLevel; - while (tail > 0) { - - /* Select a segment */ - RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters); - - size_t segmentSize; - segmentSize = MIN(segment.end - segment.begin + 1, tail); - - tail -= segmentSize; - memcpy(dict + tail, samples + segment.begin, segmentSize); - DISPLAYUPDATE( - 2, "\r%u%% ", - (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); - } - - return tail; -} - - - - -ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( - void *dictBuffer, size_t dictBufferCapacity, - const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, - ZDICT_random_params_t parameters) { - const int displayLevel = parameters.zParams.notificationLevel; - BYTE* const dict = (BYTE*)dictBuffer; - /* Checks */ - if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) { - DISPLAYLEVEL(1, "k is incorrect\n"); - return ERROR(GENERIC); - } - if (nbSamples == 0) { - DISPLAYLEVEL(1, "Random must have at least one input file\n"); - return ERROR(GENERIC); - } - if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { - DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", - ZDICT_DICTSIZE_MIN); - return ERROR(dstSize_tooSmall); - } - const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples); - const BYTE *const samples = (const BYTE *)samplesBuffer; - - DISPLAYLEVEL(2, "Building dictionary\n"); - { - const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples, - dictBuffer, dictBufferCapacity, parameters); - const size_t dictSize = ZDICT_finalizeDictionary( - dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - samplesBuffer, samplesSizes, nbSamples, parameters.zParams); - if (!ZSTD_isError(dictSize)) { - DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", - (U32)dictSize); - } - return dictSize; - } -} diff --git a/contrib/experimental_dict_builders/randomDictBuilder/random.h b/contrib/experimental_dict_builders/randomDictBuilder/random.h deleted file mode 100644 index 352775f950c4d..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/random.h +++ /dev/null @@ -1,29 +0,0 @@ -#include <stdio.h> /* fprintf */ -#include <stdlib.h> /* malloc, free, qsort */ -#include <string.h> /* memset */ -#include <time.h> /* clock */ -#include "zstd_internal.h" /* includes zstd.h */ -#ifndef ZDICT_STATIC_LINKING_ONLY -#define ZDICT_STATIC_LINKING_ONLY -#endif -#include "zdict.h" - - - -typedef struct { - unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */ - ZDICT_params_t zParams; -} ZDICT_random_params_t; - - -/*! ZDICT_trainFromBuffer_random(): - * Train a dictionary from an array of samples. - * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - * The resulting dictionary will be saved into `dictBuffer`. - * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - */ -ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity, - const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, - ZDICT_random_params_t parameters); diff --git a/contrib/experimental_dict_builders/randomDictBuilder/test.sh b/contrib/experimental_dict_builders/randomDictBuilder/test.sh deleted file mode 100755 index 1eb732e52a093..0000000000000 --- a/contrib/experimental_dict_builders/randomDictBuilder/test.sh +++ /dev/null @@ -1,14 +0,0 @@ -echo "Building random dictionary with in=../../lib/common k=200 out=dict1" -./main in=../../../lib/common k=200 out=dict1 -zstd -be3 -D dict1 -r ../../../lib/common -q -echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000" -./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000 -zstd -be3 -D dict2 -r ../../../lib/common -q -echo "Building random dictionary with 2 sample sources" -./main in=../../../lib/common in=../../../lib/compress out=dict3 -zstd -be3 -D dict3 -r ../../../lib/common -q -echo "Removing dict1 dict2 dict3" -rm -f dict1 dict2 dict3 - -echo "Testing with invalid parameters, should fail" -! ./main r=10 diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile deleted file mode 100644 index 425f266c4e464..0000000000000 --- a/contrib/gen_html/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# ################################################################ -# Copyright (c) 2016-present, Facebook, Inc. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# ################################################################ - -CXXFLAGS ?= -O3 -CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment -CXXFLAGS += $(MOREFLAGS) -FLAGS = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) - -ZSTDAPI = ../../lib/zstd.h -ZSTDMANUAL = ../../doc/zstd_manual.html -LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)` -LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)` -LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)` -LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT) -LIBVER := $(shell echo $(LIBVER_SCRIPT)) - - -# Define *.exe as extension for Windows systems -ifneq (,$(filter Windows%,$(OS))) -EXT =.exe -else -EXT = -endif - - -.PHONY: default -default: gen_html - -.PHONY: all -all: manual - -gen_html: gen_html.cpp - $(CXX) $(FLAGS) $^ -o $@$(EXT) - -$(ZSTDMANUAL): gen_html $(ZSTDAPI) - echo "Update zstd manual in /doc" - ./gen_html $(LIBVER) $(ZSTDAPI) $(ZSTDMANUAL) - -.PHONY: manual -manual: gen_html $(ZSTDMANUAL) - -.PHONY: clean -clean: - @$(RM) gen_html$(EXT) - @echo Cleaning completed diff --git a/contrib/gen_html/README.md b/contrib/gen_html/README.md deleted file mode 100644 index 63a4caa25061f..0000000000000 --- a/contrib/gen_html/README.md +++ /dev/null @@ -1,31 +0,0 @@ -gen_html - a program for automatic generation of zstd manual -============================================================ - -#### Introduction - -This simple C++ program generates a single-page HTML manual from `zstd.h`. - -The format of recognized comment blocks is following: -- comments of type `/*!` mean: this is a function declaration; switch comments with declarations -- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line -- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line -- comments of type `/*X` where `X` is different from above-mentioned are ignored - -Moreover: -- `ZSTDLIB_API` is removed to improve readability -- `typedef` are detected and included even if uncommented -- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold) - - -#### Usage - -The program requires 3 parameters: -``` -gen_html [zstd_version] [input_file] [output_html] -``` - -To compile program and generate zstd manual we have used: -``` -make -./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html -``` diff --git a/contrib/gen_html/gen-zstd-manual.sh b/contrib/gen_html/gen-zstd-manual.sh deleted file mode 100755 index 57a8b6ea512a5..0000000000000 --- a/contrib/gen_html/gen-zstd-manual.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -LIBVER_MAJOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h` -LIBVER_MINOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h` -LIBVER_PATCH_SCRIPT=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h` -LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT - -echo ZSTD_VERSION=$LIBVER_SCRIPT -./gen_html $LIBVER_SCRIPT ../../lib/zstd.h ./zstd_manual.html diff --git a/contrib/gen_html/gen_html.cpp b/contrib/gen_html/gen_html.cpp deleted file mode 100644 index 90d5b21a3aa61..0000000000000 --- a/contrib/gen_html/gen_html.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -#include <iostream> -#include <fstream> -#include <sstream> -#include <vector> -using namespace std; - - -/* trim string at the beginning and at the end */ -void trim(string& s, string characters) -{ - size_t p = s.find_first_not_of(characters); - s.erase(0, p); - - p = s.find_last_not_of(characters); - if (string::npos != p) - s.erase(p+1); -} - - -/* trim C++ style comments */ -void trim_comments(string &s) -{ - size_t spos, epos; - - spos = s.find("/*"); - epos = s.find("*/"); - s = s.substr(spos+3, epos-(spos+3)); -} - - -/* get lines until a given terminator */ -vector<string> get_lines(vector<string>& input, int& linenum, string terminator) -{ - vector<string> out; - string line; - size_t epos; - - while ((size_t)linenum < input.size()) { - line = input[linenum]; - - if (terminator.empty() && line.empty()) { linenum--; break; } - - epos = line.find(terminator); - if (!terminator.empty() && epos!=string::npos) { - out.push_back(line); - break; - } - out.push_back(line); - linenum++; - } - return out; -} - - -/* print line with ZSTDLIB_API removed and C++ comments not bold */ -void print_line(stringstream &sout, string line) -{ - size_t spos; - - if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12); - spos = line.find("/*"); - if (spos!=string::npos) { - sout << line.substr(0, spos); - sout << "</b>" << line.substr(spos) << "<b>" << endl; - } else { - // fprintf(stderr, "lines=%s\n", line.c_str()); - sout << line << endl; - } -} - - -int main(int argc, char *argv[]) { - char exclam; - int linenum, chapter = 1; - vector<string> input, lines, comments, chapters; - string line, version; - size_t spos, l; - stringstream sout; - ifstream istream; - ofstream ostream; - - if (argc < 4) { - cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl; - return 1; - } - - version = "zstd " + string(argv[1]) + " Manual"; - - istream.open(argv[2], ifstream::in); - if (!istream.is_open()) { - cout << "Error opening file " << argv[2] << endl; - return 1; - } - - ostream.open(argv[3], ifstream::out); - if (!ostream.is_open()) { - cout << "Error opening file " << argv[3] << endl; - return 1; - } - - while (getline(istream, line)) { - input.push_back(line); - } - - for (linenum=0; (size_t)linenum < input.size(); linenum++) { - line = input[linenum]; - - /* typedefs are detected and included even if uncommented */ - if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) { - lines = get_lines(input, linenum, "}"); - sout << "<pre><b>"; - for (l=0; l<lines.size(); l++) { - print_line(sout, lines[l]); - } - sout << "</b></pre><BR>" << endl; - continue; - } - - /* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */ - if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) { - sout << "<pre><b>"; - print_line(sout, line); - sout << "</b></pre><BR>" << endl; - continue; - } - - spos = line.find("/**="); - if (spos==string::npos) { - spos = line.find("/*!"); - if (spos==string::npos) - spos = line.find("/**"); - if (spos==string::npos) - spos = line.find("/*-"); - if (spos==string::npos) - spos = line.find("/*="); - if (spos==string::npos) - continue; - exclam = line[spos+2]; - } - else exclam = '='; - - comments = get_lines(input, linenum, "*/"); - if (!comments.empty()) comments[0] = line.substr(spos+3); - if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/")); - for (l=0; l<comments.size(); l++) { - if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2); - else if (comments[l].find(" *")==0) comments[l] = comments[l].substr(3); - trim(comments[l], "*-="); - } - while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end - while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start - - /* comments of type /*! mean: this is a function declaration; switch comments with declarations */ - if (exclam == '!') { - if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */ - linenum++; - lines = get_lines(input, linenum, ""); - - sout << "<pre><b>"; - for (l=0; l<lines.size(); l++) { - // fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str()); - string fline = lines[l]; - if (fline.substr(0, 12) == "ZSTDLIB_API " || - fline.substr(0, 12) == string(12, ' ')) - fline = fline.substr(12); - print_line(sout, fline); - } - sout << "</b><p>"; - for (l=0; l<comments.size(); l++) { - print_line(sout, comments[l]); - } - sout << "</p></pre><BR>" << endl << endl; - } else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */ - trim(comments[0], " "); - sout << "<h3>" << comments[0] << "</h3><pre>"; - for (l=1; l<comments.size(); l++) { - print_line(sout, comments[l]); - } - sout << "</pre><b><pre>"; - lines = get_lines(input, ++linenum, ""); - for (l=0; l<lines.size(); l++) { - print_line(sout, lines[l]); - } - sout << "</pre></b><BR>" << endl; - } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */ - if (comments.empty()) continue; - - trim(comments[0], " "); - sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>"; - chapters.push_back(comments[0]); - chapter++; - - for (l=1; l<comments.size(); l++) { - print_line(sout, comments[l]); - } - if (comments.size() > 1) - sout << "<BR></pre>" << endl << endl; - else - sout << "</pre>" << endl << endl; - } - } - - ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl; - ostream << "<h1>" << version << "</h1>\n"; - - ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n"; - for (size_t i=0; i<chapters.size(); i++) - ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n"; - ostream << "</ol>\n<hr>\n"; - - ostream << sout.str(); - ostream << "</html>" << endl << "</body>" << endl; - - return 0; -} diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile deleted file mode 100644 index 4c055b0ed3fe5..0000000000000 --- a/contrib/largeNbDicts/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -# ################################################################ -# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# ################################################################ - -PROGDIR = ../../programs -LIBDIR = ../../lib - -LIBZSTD = $(LIBDIR)/libzstd.a - -CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR) - -CFLAGS ?= -O3 -CFLAGS += -std=gnu99 -DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ - -Wstrict-aliasing=1 -Wswitch-enum \ - -Wstrict-prototypes -Wundef -Wpointer-arith \ - -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ - -Wredundant-decls -CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) - - -default: largeNbDicts - -all : largeNbDicts - -largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -.PHONY: $(LIBZSTD) -$(LIBZSTD): - $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" - -benchfn.o: $(PROGDIR)/benchfn.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c - -timefn.o: $(PROGDIR)/timefn.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c - -datagen.o: $(PROGDIR)/datagen.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c - -util.o: $(PROGDIR)/util.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c - - -xxhash.o : $(LIBDIR)/common/xxhash.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c - - -clean: - $(RM) *.o - $(MAKE) -C $(LIBDIR) clean > /dev/null - $(RM) largeNbDicts diff --git a/contrib/largeNbDicts/README.md b/contrib/largeNbDicts/README.md deleted file mode 100644 index f29bcdfe8e37c..0000000000000 --- a/contrib/largeNbDicts/README.md +++ /dev/null @@ -1,25 +0,0 @@ -largeNbDicts -===================== - -`largeNbDicts` is a benchmark test tool -dedicated to the specific scenario of -dictionary decompression using a very large number of dictionaries. -When dictionaries are constantly changing, they are always "cold", -suffering from increased latency due to cache misses. - -The tool is created in a bid to investigate performance for this scenario, -and experiment mitigation techniques. - -Command line : -``` -largeNbDicts [Options] filename(s) - -Options : --r : recursively load all files in subdirectories (default: off) --B# : split input into blocks of size # (default: no split) --# : use compression level # (default: 3) --D # : use # as a dictionary (default: create one) --i# : nb benchmark rounds (default: 6) ---nbDicts=# : set nb of dictionaries to # (default: one per block) --h : help (this text) -``` diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c deleted file mode 100644 index 627a6910576f5..0000000000000 --- a/contrib/largeNbDicts/largeNbDicts.c +++ /dev/null @@ -1,817 +0,0 @@ -/* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* largeNbDicts - * This is a benchmark test tool - * dedicated to the specific case of dictionary decompression - * using a very large nb of dictionaries - * thus suffering latency from lots of cache misses. - * It's created in a bid to investigate performance and find optimizations. */ - - -/*--- Dependencies ---*/ - -#include <stddef.h> /* size_t */ -#include <stdlib.h> /* malloc, free, abort */ -#include <stdio.h> /* fprintf */ -#include <limits.h> /* UINT_MAX */ -#include <assert.h> /* assert */ - -#include "util.h" -#include "benchfn.h" -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -#include "zdict.h" - - -/*--- Constants --- */ - -#define KB *(1<<10) -#define MB *(1<<20) - -#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */ -#define DICTSIZE (4 KB) -#define CLEVEL_DEFAULT 3 - -#define BENCH_TIME_DEFAULT_S 6 -#define RUN_TIME_DEFAULT_MS 1000 -#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS) - -#define DISPLAY_LEVEL_DEFAULT 3 - -#define BENCH_SIZE_MAX (1200 MB) - - -/*--- Macros ---*/ - -#define CONTROL(c) { if (!(c)) abort(); } -#undef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) - - -/*--- Display Macros ---*/ - -#define DISPLAY(...) fprintf(stdout, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } -static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ - - -/*--- buffer_t ---*/ - -typedef struct { - void* ptr; - size_t size; - size_t capacity; -} buffer_t; - -static const buffer_t kBuffNull = { NULL, 0, 0 }; - -/* @return : kBuffNull if any error */ -static buffer_t createBuffer(size_t capacity) -{ - assert(capacity > 0); - void* const ptr = malloc(capacity); - if (ptr==NULL) return kBuffNull; - - buffer_t buffer; - buffer.ptr = ptr; - buffer.capacity = capacity; - buffer.size = 0; - return buffer; -} - -static void freeBuffer(buffer_t buff) -{ - free(buff.ptr); -} - - -static void fillBuffer_fromHandle(buffer_t* buff, FILE* f) -{ - size_t const readSize = fread(buff->ptr, 1, buff->capacity, f); - buff->size = readSize; -} - - -/* @return : kBuffNull if any error */ -static buffer_t createBuffer_fromFile(const char* fileName) -{ - U64 const fileSize = UTIL_getFileSize(fileName); - size_t const bufferSize = (size_t) fileSize; - - if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull; - assert((U64)bufferSize == fileSize); /* check overflow */ - - { FILE* const f = fopen(fileName, "rb"); - if (f == NULL) return kBuffNull; - - buffer_t buff = createBuffer(bufferSize); - CONTROL(buff.ptr != NULL); - - fillBuffer_fromHandle(&buff, f); - CONTROL(buff.size == buff.capacity); - - fclose(f); /* do nothing specific if fclose() fails */ - return buff; - } -} - - -/* @return : kBuffNull if any error */ -static buffer_t -createDictionaryBuffer(const char* dictionaryName, - const void* srcBuffer, - const size_t* srcBlockSizes, size_t nbBlocks, - size_t requestedDictSize) -{ - if (dictionaryName) { - DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName); - return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */ - - } else { - - DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", - (unsigned)requestedDictSize); - void* const dictBuffer = malloc(requestedDictSize); - CONTROL(dictBuffer != NULL); - - assert(nbBlocks <= UINT_MAX); - size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize, - srcBuffer, - srcBlockSizes, (unsigned)nbBlocks); - CONTROL(!ZSTD_isError(dictSize)); - - buffer_t result; - result.ptr = dictBuffer; - result.capacity = requestedDictSize; - result.size = dictSize; - return result; - } -} - - -/*! BMK_loadFiles() : - * Loads `buffer`, with content from files listed within `fileNamesTable`. - * Fills `buffer` entirely. - * @return : 0 on success, !=0 on error */ -static int loadFiles(void* buffer, size_t bufferSize, - size_t* fileSizes, - const char* const * fileNamesTable, unsigned nbFiles) -{ - size_t pos = 0, totalSize = 0; - - for (unsigned n=0; n<nbFiles; n++) { - U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); - if (UTIL_isDirectory(fileNamesTable[n])) { - fileSizes[n] = 0; - continue; - } - if (fileSize == UTIL_FILESIZE_UNKNOWN) { - fileSizes[n] = 0; - continue; - } - - FILE* const f = fopen(fileNamesTable[n], "rb"); - assert(f!=NULL); - - assert(pos <= bufferSize); - assert(fileSize <= bufferSize - pos); - - { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); - assert(readSize == fileSize); - pos += readSize; - } - fileSizes[n] = (size_t)fileSize; - totalSize += (size_t)fileSize; - fclose(f); - } - - assert(totalSize == bufferSize); - return 0; -} - - - -/*--- slice_collection_t ---*/ - -typedef struct { - void** slicePtrs; - size_t* capacities; - size_t nbSlices; -} slice_collection_t; - -static const slice_collection_t kNullCollection = { NULL, NULL, 0 }; - -static void freeSliceCollection(slice_collection_t collection) -{ - free(collection.slicePtrs); - free(collection.capacities); -} - -/* shrinkSizes() : - * downsizes sizes of slices within collection, according to `newSizes`. - * every `newSizes` entry must be <= than its corresponding collection size */ -void shrinkSizes(slice_collection_t collection, - const size_t* newSizes) /* presumed same size as collection */ -{ - size_t const nbSlices = collection.nbSlices; - for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) { - assert(newSizes[blockNb] <= collection.capacities[blockNb]); - collection.capacities[blockNb] = newSizes[blockNb]; - } -} - - -/* splitSlices() : - * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize. - * otherwise, creates exactly nbSlices slices, - * by either truncating input (when smaller) - * or repeating input from beginning */ -static slice_collection_t -splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices) -{ - if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */ - size_t nbSrcBlocks = 0; - for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) { - size_t pos = 0; - while (pos <= srcSlices.capacities[ssnb]) { - nbSrcBlocks++; - pos += blockSize; - } - } - - if (nbSlices == 0) nbSlices = nbSrcBlocks; - - void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable)); - size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities)); - if (sliceTable == NULL || capacities == NULL) { - free(sliceTable); - free(capacities); - return kNullCollection; - } - - size_t ssnb = 0; - for (size_t sliceNb=0; sliceNb < nbSlices; ) { - ssnb = (ssnb + 1) % srcSlices.nbSlices; - size_t pos = 0; - char* const ptr = (char*)srcSlices.slicePtrs[ssnb]; - while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) { - size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos); - sliceTable[sliceNb] = ptr + pos; - capacities[sliceNb] = size; - sliceNb++; - pos += blockSize; - } - } - - slice_collection_t result; - result.nbSlices = nbSlices; - result.slicePtrs = sliceTable; - result.capacities = capacities; - return result; -} - - -static size_t sliceCollection_totalCapacity(slice_collection_t sc) -{ - size_t totalSize = 0; - for (size_t n=0; n<sc.nbSlices; n++) - totalSize += sc.capacities[n]; - return totalSize; -} - - -/* --- buffer collection --- */ - -typedef struct { - buffer_t buffer; - slice_collection_t slices; -} buffer_collection_t; - - -static void freeBufferCollection(buffer_collection_t bc) -{ - freeBuffer(bc.buffer); - freeSliceCollection(bc.slices); -} - - -static buffer_collection_t -createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc) -{ - size_t const bufferSize = sliceCollection_totalCapacity(sc); - - buffer_t buffer = createBuffer(bufferSize); - CONTROL(buffer.ptr != NULL); - - size_t const nbSlices = sc.nbSlices; - void** const slices = (void**)malloc(nbSlices * sizeof(*slices)); - CONTROL(slices != NULL); - - size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities)); - CONTROL(capacities != NULL); - - char* const ptr = (char*)buffer.ptr; - size_t pos = 0; - for (size_t n=0; n < nbSlices; n++) { - capacities[n] = sc.capacities[n]; - slices[n] = ptr + pos; - pos += capacities[n]; - } - - buffer_collection_t result; - result.buffer = buffer; - result.slices.nbSlices = nbSlices; - result.slices.capacities = capacities; - result.slices.slicePtrs = slices; - return result; -} - - -/* @return : kBuffNull if any error */ -static buffer_collection_t -createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles) -{ - U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); - assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN); - assert(totalSizeToLoad <= BENCH_SIZE_MAX); - size_t const loadedSize = (size_t)totalSizeToLoad; - assert(loadedSize > 0); - void* const srcBuffer = malloc(loadedSize); - assert(srcBuffer != NULL); - - assert(nbFiles > 0); - size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes)); - assert(fileSizes != NULL); - - /* Load input buffer */ - int const errorCode = loadFiles(srcBuffer, loadedSize, - fileSizes, - fileNamesTable, nbFiles); - assert(errorCode == 0); - - void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable)); - assert(sliceTable != NULL); - - char* const ptr = (char*)srcBuffer; - size_t pos = 0; - unsigned fileNb = 0; - for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) { - sliceTable[fileNb] = ptr + pos; - pos += fileSizes[fileNb]; - } - assert(pos == loadedSize); - assert(fileNb == nbFiles); - - - buffer_t buffer; - buffer.ptr = srcBuffer; - buffer.capacity = loadedSize; - buffer.size = loadedSize; - - slice_collection_t slices; - slices.slicePtrs = sliceTable; - slices.capacities = fileSizes; - slices.nbSlices = nbFiles; - - buffer_collection_t bc; - bc.buffer = buffer; - bc.slices = slices; - return bc; -} - - - - -/*--- ddict_collection_t ---*/ - -typedef struct { - ZSTD_DDict** ddicts; - size_t nbDDict; -} ddict_collection_t; - -static const ddict_collection_t kNullDDictCollection = { NULL, 0 }; - -static void freeDDictCollection(ddict_collection_t ddictc) -{ - for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) { - ZSTD_freeDDict(ddictc.ddicts[dictNb]); - } - free(ddictc.ddicts); -} - -/* returns .buffers=NULL if operation fails */ -static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict) -{ - ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*)); - assert(ddicts != NULL); - if (ddicts==NULL) return kNullDDictCollection; - for (size_t dictNb=0; dictNb < nbDDict; dictNb++) { - ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize); - assert(ddicts[dictNb] != NULL); - } - ddict_collection_t ddictc; - ddictc.ddicts = ddicts; - ddictc.nbDDict = nbDDict; - return ddictc; -} - - -/* mess with addresses, so that linear scanning dictionaries != linear address scanning */ -void shuffleDictionaries(ddict_collection_t dicts) -{ - size_t const nbDicts = dicts.nbDDict; - for (size_t r=0; r<nbDicts; r++) { - size_t const d = rand() % nbDicts; - ZSTD_DDict* tmpd = dicts.ddicts[d]; - dicts.ddicts[d] = dicts.ddicts[r]; - dicts.ddicts[r] = tmpd; - } - for (size_t r=0; r<nbDicts; r++) { - size_t const d1 = rand() % nbDicts; - size_t const d2 = rand() % nbDicts; - ZSTD_DDict* tmpd = dicts.ddicts[d1]; - dicts.ddicts[d1] = dicts.ddicts[d2]; - dicts.ddicts[d2] = tmpd; - } -} - - -/* --- Compression --- */ - -/* compressBlocks() : - * @return : total compressed size of all blocks, - * or 0 if error. - */ -static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */ - slice_collection_t dstBlockBuffers, - slice_collection_t srcBlockBuffers, - ZSTD_CDict* cdict, int cLevel) -{ - size_t const nbBlocks = srcBlockBuffers.nbSlices; - assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices); - - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - assert(cctx != NULL); - - size_t totalCSize = 0; - for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) { - size_t cBlockSize; - if (cdict == NULL) { - cBlockSize = ZSTD_compressCCtx(cctx, - dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb], - srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb], - cLevel); - } else { - cBlockSize = ZSTD_compress_usingCDict(cctx, - dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb], - srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb], - cdict); - } - CONTROL(!ZSTD_isError(cBlockSize)); - if (cSizes) cSizes[blockNb] = cBlockSize; - totalCSize += cBlockSize; - } - return totalCSize; -} - - -/* --- Benchmark --- */ - -typedef struct { - ZSTD_DCtx* dctx; - size_t nbDicts; - size_t dictNb; - ddict_collection_t dictionaries; -} decompressInstructions; - -decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries) -{ - decompressInstructions di; - di.dctx = ZSTD_createDCtx(); - assert(di.dctx != NULL); - di.nbDicts = dictionaries.nbDDict; - di.dictNb = 0; - di.dictionaries = dictionaries; - return di; -} - -void freeDecompressInstructions(decompressInstructions di) -{ - ZSTD_freeDCtx(di.dctx); -} - -/* benched function */ -size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload) -{ - decompressInstructions* const di = (decompressInstructions*) payload; - - size_t const result = ZSTD_decompress_usingDDict(di->dctx, - dst, dstCapacity, - src, srcSize, - di->dictionaries.ddicts[di->dictNb]); - - di->dictNb = di->dictNb + 1; - if (di->dictNb >= di->nbDicts) di->dictNb = 0; - - return result; -} - - -static int benchMem(slice_collection_t dstBlocks, - slice_collection_t srcBlocks, - ddict_collection_t dictionaries, - int nbRounds) -{ - assert(dstBlocks.nbSlices == srcBlocks.nbSlices); - - unsigned const ms_per_round = RUN_TIME_DEFAULT_MS; - unsigned const total_time_ms = nbRounds * ms_per_round; - - double bestSpeed = 0.; - - BMK_timedFnState_t* const benchState = - BMK_createTimedFnState(total_time_ms, ms_per_round); - decompressInstructions di = createDecompressInstructions(dictionaries); - BMK_benchParams_t const bp = { - .benchFn = decompress, - .benchPayload = &di, - .initFn = NULL, - .initPayload = NULL, - .errorFn = ZSTD_isError, - .blockCount = dstBlocks.nbSlices, - .srcBuffers = (const void* const*) srcBlocks.slicePtrs, - .srcSizes = srcBlocks.capacities, - .dstBuffers = dstBlocks.slicePtrs, - .dstCapacities = dstBlocks.capacities, - .blockResults = NULL - }; - - for (;;) { - BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp); - CONTROL(BMK_isSuccessful_runOutcome(outcome)); - - BMK_runTime_t const result = BMK_extract_runTime(outcome); - double const dTime_ns = result.nanoSecPerRun; - double const dTime_sec = (double)dTime_ns / 1000000000; - size_t const srcSize = result.sumOfReturn; - double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB); - if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps; - DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed); - fflush(stdout); - if (BMK_isCompleted_TimedFn(benchState)) break; - } - DISPLAY("\n"); - - freeDecompressInstructions(di); - BMK_freeTimedFnState(benchState); - - return 0; /* success */ -} - - -/*! bench() : - * fileName : file to load for benchmarking purpose - * dictionary : optional (can be NULL), file to load as dictionary, - * if none provided : will be calculated on the fly by the program. - * @return : 0 is success, 1+ otherwise */ -int bench(const char** fileNameTable, unsigned nbFiles, - const char* dictionary, - size_t blockSize, int clevel, - unsigned nbDictMax, unsigned nbBlocks, - int nbRounds) -{ - int result = 0; - - DISPLAYLEVEL(3, "loading %u files... \n", nbFiles); - buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles); - CONTROL(srcs.buffer.ptr != NULL); - buffer_t srcBuffer = srcs.buffer; - size_t const srcSize = srcBuffer.size; - DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n", - (double)srcSize / (1 MB)); - - slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks); - nbBlocks = (unsigned)(srcSlices.nbSlices); - DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks); - if (blockSize) - DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize); - DISPLAYLEVEL(3, "\n"); - size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices); - - - size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities)); - CONTROL(dstCapacities != NULL); - size_t dstBufferCapacity = 0; - for (size_t bnb=0; bnb<nbBlocks; bnb++) { - dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]); - dstBufferCapacity += dstCapacities[bnb]; - } - - buffer_t dstBuffer = createBuffer(dstBufferCapacity); - CONTROL(dstBuffer.ptr != NULL); - - void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable)); - CONTROL(sliceTable != NULL); - - { char* const ptr = dstBuffer.ptr; - size_t pos = 0; - for (size_t snb=0; snb < nbBlocks; snb++) { - sliceTable[snb] = ptr + pos; - pos += dstCapacities[snb]; - } } - - slice_collection_t dstSlices; - dstSlices.capacities = dstCapacities; - dstSlices.slicePtrs = sliceTable; - dstSlices.nbSlices = nbBlocks; - - - /* dictionary determination */ - buffer_t const dictBuffer = createDictionaryBuffer(dictionary, - srcs.buffer.ptr, - srcs.slices.capacities, srcs.slices.nbSlices, - DICTSIZE); - CONTROL(dictBuffer.ptr != NULL); - - ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel); - CONTROL(cdict != NULL); - - size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel); - CONTROL(cTotalSizeNoDict != 0); - DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n", - clevel, - (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); - - size_t* const cSizes = malloc(nbBlocks * sizeof(size_t)); - CONTROL(cSizes != NULL); - - size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel); - CONTROL(cTotalSize != 0); - DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n", - (unsigned)dictBuffer.size, - (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize); - - /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */ - shrinkSizes(dstSlices, cSizes); - - size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy); - unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks; - size_t const allDictMem = dictMem * nbDicts; - DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n", - nbDicts, (double)allDictMem / (1 MB)); - - ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts); - CONTROL(dictionaries.ddicts != NULL); - - shuffleDictionaries(dictionaries); - - buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices); - CONTROL(resultCollection.buffer.ptr != NULL); - - result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds); - - /* free all heap objects in reverse order */ - freeBufferCollection(resultCollection); - freeDDictCollection(dictionaries); - free(cSizes); - ZSTD_freeCDict(cdict); - freeBuffer(dictBuffer); - freeSliceCollection(dstSlices); - freeBuffer(dstBuffer); - freeSliceCollection(srcSlices); - freeBufferCollection(srcs); - - return result; -} - - - -/* --- Command Line --- */ - -/*! readU32FromChar() : - * @return : unsigned integer value read from input in `char` format. - * allows and interprets K, KB, KiB, M, MB and MiB suffix. - * Will also modify `*stringPtr`, advancing it to position where it stopped reading. - * Note : function will exit() program if digit sequence overflows */ -static unsigned readU32FromChar(const char** stringPtr) -{ - unsigned result = 0; - while ((**stringPtr >='0') && (**stringPtr <='9')) { - unsigned const max = (((unsigned)(-1)) / 10) - 1; - assert(result <= max); /* check overflow */ - result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; - } - if ((**stringPtr=='K') || (**stringPtr=='M')) { - unsigned const maxK = ((unsigned)(-1)) >> 10; - assert(result <= maxK); /* check overflow */ - result <<= 10; - if (**stringPtr=='M') { - assert(result <= maxK); /* check overflow */ - result <<= 10; - } - (*stringPtr)++; /* skip `K` or `M` */ - if (**stringPtr=='i') (*stringPtr)++; - if (**stringPtr=='B') (*stringPtr)++; - } - return result; -} - -/** longCommandWArg() : - * check if *stringPtr is the same as longCommand. - * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. - * @return 0 and doesn't modify *stringPtr otherwise. - */ -static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) -{ - size_t const comSize = strlen(longCommand); - int const result = !strncmp(*stringPtr, longCommand, comSize); - if (result) *stringPtr += comSize; - return result; -} - - -int usage(const char* exeName) -{ - DISPLAY (" \n"); - DISPLAY (" %s [Options] filename(s) \n", exeName); - DISPLAY (" \n"); - DISPLAY ("Options : \n"); - DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n"); - DISPLAY ("-B# : split input into blocks of size # (default: no split) \n"); - DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT); - DISPLAY ("-D # : use # as a dictionary (default: create one) \n"); - DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S); - DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n"); - DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n"); - DISPLAY ("-h : help (this text) \n"); - return 0; -} - -int bad_usage(const char* exeName) -{ - DISPLAY (" bad usage : \n"); - usage(exeName); - return 1; -} - -int main (int argc, const char** argv) -{ - int recursiveMode = 0; - int nbRounds = BENCH_TIME_DEFAULT_S; - const char* const exeName = argv[0]; - - if (argc < 2) return bad_usage(exeName); - - const char** nameTable = (const char**)malloc(argc * sizeof(const char*)); - assert(nameTable != NULL); - unsigned nameIdx = 0; - - const char* dictionary = NULL; - int cLevel = CLEVEL_DEFAULT; - size_t blockSize = BLOCKSIZE_DEFAULT; - unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */ - unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ - - for (int argNb = 1; argNb < argc ; argNb++) { - const char* argument = argv[argNb]; - if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); } - if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; } - if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; } - if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; } - if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; } - if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; } - /* anything that's not a command is a filename */ - nameTable[nameIdx++] = argument; - } - - const char** filenameTable = nameTable; - unsigned nbFiles = nameIdx; - char* buffer_containing_filenames = NULL; - - if (recursiveMode) { -#ifndef UTIL_HAS_CREATEFILELIST - assert(0); /* missing capability, do not run */ -#endif - filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */); - } - - int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds); - - free(buffer_containing_filenames); - free(nameTable); - - return result; -} diff --git a/contrib/premake/premake4.lua b/contrib/premake/premake4.lua deleted file mode 100644 index 6675e2e481c19..0000000000000 --- a/contrib/premake/premake4.lua +++ /dev/null @@ -1,6 +0,0 @@ --- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function -dofile('zstd.lua') - -solution 'example' - configurations { 'Debug', 'Release' } - project_zstd('../../lib/') diff --git a/contrib/premake/zstd.lua b/contrib/premake/zstd.lua deleted file mode 100644 index df1ace3ee8ea5..0000000000000 --- a/contrib/premake/zstd.lua +++ /dev/null @@ -1,80 +0,0 @@ --- This GENie/premake file copies the behavior of the Makefile in the lib folder. --- Basic usage: project_zstd(ZSTD_DIR) - -function project_zstd(dir, compression, decompression, deprecated, dictbuilder, legacy) - if compression == nil then compression = true end - if decompression == nil then decompression = true end - if deprecated == nil then deprecated = false end - if dictbuilder == nil then dictbuilder = false end - - if legacy == nil then legacy = 0 end - - if not compression then - dictbuilder = false - deprecated = false - end - - if not decompression then - legacy = 0 - deprecated = false - end - - project 'zstd' - kind 'StaticLib' - language 'C' - - files { - dir .. 'zstd.h', - dir .. 'common/**.c', - dir .. 'common/**.h' - } - - if compression then - files { - dir .. 'compress/**.c', - dir .. 'compress/**.h' - } - end - - if decompression then - files { - dir .. 'decompress/**.c', - dir .. 'decompress/**.h' - } - end - - if dictbuilder then - files { - dir .. 'dictBuilder/**.c', - dir .. 'dictBuilder/**.h' - } - end - - if deprecated then - files { - dir .. 'deprecated/**.c', - dir .. 'deprecated/**.h' - } - end - - if legacy ~= 0 then - if legacy >= 8 then - files { - dir .. 'legacy/zstd_v0' .. (legacy - 7) .. '.*' - } - end - includedirs { - dir .. 'legacy' - } - end - - includedirs { - dir, - dir .. 'common' - } - - defines { - 'XXH_NAMESPACE=ZSTD_', - 'ZSTD_LEGACY_SUPPORT=' .. legacy - } -end diff --git a/contrib/pzstd/BUCK b/contrib/pzstd/BUCK deleted file mode 100644 index d04eeedd8a1a3..0000000000000 --- a/contrib/pzstd/BUCK +++ /dev/null @@ -1,72 +0,0 @@ -cxx_library( - name='libpzstd', - visibility=['PUBLIC'], - header_namespace='', - exported_headers=[ - 'ErrorHolder.h', - 'Logging.h', - 'Pzstd.h', - ], - headers=[ - 'SkippableFrame.h', - ], - srcs=[ - 'Pzstd.cpp', - 'SkippableFrame.cpp', - ], - deps=[ - ':options', - '//contrib/pzstd/utils:utils', - '//lib:mem', - '//lib:zstd', - ], -) - -cxx_library( - name='options', - visibility=['PUBLIC'], - header_namespace='', - exported_headers=['Options.h'], - srcs=['Options.cpp'], - deps=[ - '//contrib/pzstd/utils:scope_guard', - '//lib:zstd', - '//programs:util', - ], -) - -cxx_binary( - name='pzstd', - visibility=['PUBLIC'], - srcs=['main.cpp'], - deps=[ - ':libpzstd', - ':options', - ], -) - -# Must run "make googletest" first -cxx_library( - name='gtest', - srcs=glob([ - 'googletest/googletest/src/gtest-all.cc', - 'googletest/googlemock/src/gmock-all.cc', - 'googletest/googlemock/src/gmock_main.cc', - ]), - header_namespace='', - exported_headers=subdir_glob([ - ('googletest/googletest/include', '**/*.h'), - ('googletest/googlemock/include', '**/*.h'), - ]), - headers=subdir_glob([ - ('googletest/googletest', 'src/*.cc'), - ('googletest/googletest', 'src/*.h'), - ('googletest/googlemock', 'src/*.cc'), - ('googletest/googlemock', 'src/*.h'), - ]), - platform_linker_flags=[ - ('android', []), - ('', ['-lpthread']), - ], - visibility=['PUBLIC'], -) diff --git a/contrib/pzstd/ErrorHolder.h b/contrib/pzstd/ErrorHolder.h deleted file mode 100644 index 829651c5961e5..0000000000000 --- a/contrib/pzstd/ErrorHolder.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include <atomic> -#include <cassert> -#include <stdexcept> -#include <string> - -namespace pzstd { - -// Coordinates graceful shutdown of the pzstd pipeline -class ErrorHolder { - std::atomic<bool> error_; - std::string message_; - - public: - ErrorHolder() : error_(false) {} - - bool hasError() noexcept { - return error_.load(); - } - - void setError(std::string message) noexcept { - // Given multiple possibly concurrent calls, exactly one will ever succeed. - bool expected = false; - if (error_.compare_exchange_strong(expected, true)) { - message_ = std::move(message); - } - } - - bool check(bool predicate, std::string message) noexcept { - if (!predicate) { - setError(std::move(message)); - } - return !hasError(); - } - - std::string getError() noexcept { - error_.store(false); - return std::move(message_); - } - - ~ErrorHolder() { - assert(!hasError()); - } -}; -} diff --git a/contrib/pzstd/Logging.h b/contrib/pzstd/Logging.h deleted file mode 100644 index 16a63932c0a3d..0000000000000 --- a/contrib/pzstd/Logging.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include <cstdio> -#include <mutex> - -namespace pzstd { - -constexpr int ERROR = 1; -constexpr int INFO = 2; -constexpr int DEBUG = 3; -constexpr int VERBOSE = 4; - -class Logger { - std::mutex mutex_; - FILE* out_; - const int level_; - - using Clock = std::chrono::system_clock; - Clock::time_point lastUpdate_; - std::chrono::milliseconds refreshRate_; - - public: - explicit Logger(int level, FILE* out = stderr) - : out_(out), level_(level), lastUpdate_(Clock::now()), - refreshRate_(150) {} - - - bool logsAt(int level) { - return level <= level_; - } - - template <typename... Args> - void operator()(int level, const char *fmt, Args... args) { - if (level > level_) { - return; - } - std::lock_guard<std::mutex> lock(mutex_); - std::fprintf(out_, fmt, args...); - } - - template <typename... Args> - void update(int level, const char *fmt, Args... args) { - if (level > level_) { - return; - } - std::lock_guard<std::mutex> lock(mutex_); - auto now = Clock::now(); - if (now - lastUpdate_ > refreshRate_) { - lastUpdate_ = now; - std::fprintf(out_, "\r"); - std::fprintf(out_, fmt, args...); - } - } - - void clear(int level) { - if (level > level_) { - return; - } - std::lock_guard<std::mutex> lock(mutex_); - std::fprintf(out_, "\r%79s\r", ""); - } -}; - -} diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile deleted file mode 100644 index 8d2b1932e91c3..0000000000000 --- a/contrib/pzstd/Makefile +++ /dev/null @@ -1,271 +0,0 @@ -# ################################################################ -# Copyright (c) 2016-present, Facebook, Inc. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# ################################################################ - -# Standard variables for installation -DESTDIR ?= -PREFIX ?= /usr/local -BINDIR := $(DESTDIR)$(PREFIX)/bin - -ZSTDDIR = ../../lib -PROGDIR = ../../programs - -# External program to use to run tests, e.g. qemu or valgrind -TESTPROG ?= -# Flags to pass to the tests -TESTFLAGS ?= - -# We use gcc/clang to generate the header dependencies of files -DEPFLAGS = -MMD -MP -MF $*.Td -POSTCOMPILE = mv -f $*.Td $*.d - -# CFLAGS, CXXFLAGS, CPPFLAGS, and LDFLAGS are for the users to override -CFLAGS ?= -O3 -Wall -Wextra -CXXFLAGS ?= -O3 -Wall -Wextra -pedantic -CPPFLAGS ?= -LDFLAGS ?= - -# Include flags -PZSTD_INC = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I. -GTEST_INC = -isystem googletest/googletest/include - -PZSTD_CPPFLAGS = $(PZSTD_INC) -PZSTD_CCXXFLAGS = -PZSTD_CFLAGS = $(PZSTD_CCXXFLAGS) -PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) -std=c++11 -PZSTD_LDFLAGS = -EXTRA_FLAGS = -ALL_CFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CFLAGS) $(PZSTD_CFLAGS) -ALL_CXXFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CXXFLAGS) $(PZSTD_CXXFLAGS) -ALL_LDFLAGS = $(EXTRA_FLAGS) $(CXXFLAGS) $(LDFLAGS) $(PZSTD_LDFLAGS) - - -# gtest libraries need to go before "-lpthread" because they depend on it. -GTEST_LIB = -L googletest/build/googlemock/gtest -LIBS = - -# Compilation commands -LD_COMMAND = $(CXX) $^ $(ALL_LDFLAGS) $(LIBS) -pthread -o $@ -CC_COMMAND = $(CC) $(DEPFLAGS) $(ALL_CFLAGS) -c $< -o $@ -CXX_COMMAND = $(CXX) $(DEPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@ - -# Get a list of all zstd files so we rebuild the static library when we need to -ZSTDCOMMON_FILES := $(wildcard $(ZSTDDIR)/common/*.c) \ - $(wildcard $(ZSTDDIR)/common/*.h) -ZSTDCOMP_FILES := $(wildcard $(ZSTDDIR)/compress/*.c) \ - $(wildcard $(ZSTDDIR)/compress/*.h) -ZSTDDECOMP_FILES := $(wildcard $(ZSTDDIR)/decompress/*.c) \ - $(wildcard $(ZSTDDIR)/decompress/*.h) -ZSTDPROG_FILES := $(wildcard $(PROGDIR)/*.c) \ - $(wildcard $(PROGDIR)/*.h) -ZSTD_FILES := $(wildcard $(ZSTDDIR)/*.h) \ - $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) \ - $(ZSTDPROG_FILES) - -# List all the pzstd source files so we can determine their dependencies -PZSTD_SRCS := $(wildcard *.cpp) -PZSTD_TESTS := $(wildcard test/*.cpp) -UTILS_TESTS := $(wildcard utils/test/*.cpp) -ALL_SRCS := $(PZSTD_SRCS) $(PZSTD_TESTS) $(UTILS_TESTS) - - -# Define *.exe as extension for Windows systems -ifneq (,$(filter Windows%,$(OS))) -EXT =.exe -else -EXT = -endif - -# Standard targets -.PHONY: default -default: all - -.PHONY: test-pzstd -test-pzstd: TESTFLAGS=--gtest_filter=-*ExtremelyLarge* -test-pzstd: clean googletest pzstd tests check - -.PHONY: test-pzstd32 -test-pzstd32: clean googletest32 all32 check - -.PHONY: test-pzstd-tsan -test-pzstd-tsan: LDFLAGS=-fuse-ld=gold -test-pzstd-tsan: TESTFLAGS=--gtest_filter=-*ExtremelyLarge* -test-pzstd-tsan: clean googletest tsan check - -.PHONY: test-pzstd-asan -test-pzstd-asan: LDFLAGS=-fuse-ld=gold -test-pzstd-asan: TESTFLAGS=--gtest_filter=-*ExtremelyLarge* -test-pzstd-asan: clean asan check - -.PHONY: check -check: - $(TESTPROG) ./utils/test/BufferTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./utils/test/RangeTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./utils/test/ResourcePoolTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./utils/test/ScopeGuardTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./utils/test/ThreadPoolTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./utils/test/WorkQueueTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./test/OptionsTest$(EXT) $(TESTFLAGS) - $(TESTPROG) ./test/PzstdTest$(EXT) $(TESTFLAGS) - -.PHONY: install -install: PZSTD_CPPFLAGS += -DNDEBUG -install: pzstd$(EXT) - install -d -m 755 $(BINDIR)/ - install -m 755 pzstd$(EXT) $(BINDIR)/pzstd$(EXT) - -.PHONY: uninstall -uninstall: - $(RM) $(BINDIR)/pzstd$(EXT) - -# Targets for many different builds -.PHONY: all -all: PZSTD_CPPFLAGS += -DNDEBUG -all: pzstd$(EXT) - -.PHONY: debug -debug: EXTRA_FLAGS += -g -debug: pzstd$(EXT) tests roundtrip - -.PHONY: tsan -tsan: PZSTD_CCXXFLAGS += -fsanitize=thread -fPIC -tsan: PZSTD_LDFLAGS += -fsanitize=thread -tsan: debug - -.PHONY: asan -asan: EXTRA_FLAGS += -fsanitize=address -asan: debug - -.PHONY: ubsan -ubsan: EXTRA_FLAGS += -fsanitize=undefined -ubsan: debug - -.PHONY: all32 -all32: EXTRA_FLAGS += -m32 -all32: all tests roundtrip - -.PHONY: debug32 -debug32: EXTRA_FLAGS += -m32 -debug32: debug - -.PHONY: asan32 -asan32: EXTRA_FLAGS += -m32 -asan32: asan - -.PHONY: tsan32 -tsan32: EXTRA_FLAGS += -m32 -tsan32: tsan - -.PHONY: ubsan32 -ubsan32: EXTRA_FLAGS += -m32 -ubsan32: ubsan - -# Run long round trip tests -.PHONY: roundtripcheck -roundtripcheck: roundtrip check - $(TESTPROG) ./test/RoundTripTest$(EXT) $(TESTFLAGS) - -# Build the main binary -pzstd$(EXT): main.o $(PROGDIR)/util.o Options.o Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a - $(LD_COMMAND) - -# Target that depends on all the tests -.PHONY: tests -tests: EXTRA_FLAGS += -Wno-deprecated-declarations -tests: $(patsubst %,%$(EXT),$(basename $(PZSTD_TESTS) $(UTILS_TESTS))) - -# Build the round trip tests -.PHONY: roundtrip -roundtrip: EXTRA_FLAGS += -Wno-deprecated-declarations -roundtrip: test/RoundTripTest$(EXT) - -# Use the static library that zstd builds for simplicity and -# so we get the compiler options correct -$(ZSTDDIR)/libzstd.a: $(ZSTD_FILES) - CFLAGS="$(ALL_CFLAGS)" LDFLAGS="$(ALL_LDFLAGS)" $(MAKE) -C $(ZSTDDIR) libzstd.a - -# Rules to build the tests -test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o \ - $(PROGDIR)/util.o Options.o \ - Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a - $(LD_COMMAND) - -test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB) -test/%Test$(EXT): LIBS += -lgtest -lgtest_main -test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o \ - $(PROGDIR)/util.o Options.o Pzstd.o \ - SkippableFrame.o $(ZSTDDIR)/libzstd.a - $(LD_COMMAND) - -utils/test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB) -utils/test/%Test$(EXT): LIBS += -lgtest -lgtest_main -utils/test/%Test$(EXT): utils/test/%Test.o - $(LD_COMMAND) - - -GTEST_CMAKEFLAGS = - -# Install googletest -.PHONY: googletest -googletest: PZSTD_CCXXFLAGS += -fPIC -googletest: - @$(RM) -rf googletest - @git clone https://github.com/google/googletest - @mkdir -p googletest/build - @cd googletest/build && cmake $(GTEST_CMAKEFLAGS) -DCMAKE_CXX_FLAGS="$(ALL_CXXFLAGS)" .. && $(MAKE) - -.PHONY: googletest32 -googletest32: PZSTD_CCXXFLAGS += -m32 -googletest32: googletest - -.PHONY: googletest-mingw64 -googletest-mingw64: GTEST_CMAKEFLAGS += -G "MSYS Makefiles" -googletest-mingw64: googletest - -.PHONY: clean -clean: - $(RM) -f *.o pzstd$(EXT) *.Td *.d - $(RM) -f test/*.o test/*Test$(EXT) test/*.Td test/*.d - $(RM) -f utils/test/*.o utils/test/*Test$(EXT) utils/test/*.Td utils/test/*.d - $(RM) -f $(PROGDIR)/*.o $(PROGDIR)/*.Td $(PROGDIR)/*.d - $(MAKE) -C $(ZSTDDIR) clean - @echo Cleaning completed - - -# Cancel implicit rules -%.o: %.c -%.o: %.cpp - -# Object file rules -%.o: %.c - $(CC_COMMAND) - $(POSTCOMPILE) - -$(PROGDIR)/%.o: $(PROGDIR)/%.c - $(CC_COMMAND) - $(POSTCOMPILE) - -%.o: %.cpp - $(CXX_COMMAND) - $(POSTCOMPILE) - -test/%.o: PZSTD_CPPFLAGS += $(GTEST_INC) -test/%.o: test/%.cpp - $(CXX_COMMAND) - $(POSTCOMPILE) - -utils/test/%.o: PZSTD_CPPFLAGS += $(GTEST_INC) -utils/test/%.o: utils/test/%.cpp - $(CXX_COMMAND) - $(POSTCOMPILE) - -# Dependency file stuff -.PRECIOUS: %.d test/%.d utils/test/%.d - -# Include rules that specify header file dependencies --include $(patsubst %,%.d,$(basename $(ALL_SRCS))) diff --git a/contrib/pzstd/Options.cpp b/contrib/pzstd/Options.cpp deleted file mode 100644 index 2123f8894c3e7..0000000000000 --- a/contrib/pzstd/Options.cpp +++ /dev/null @@ -1,428 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "Options.h" -#include "util.h" -#include "utils/ScopeGuard.h" - -#include <algorithm> -#include <cassert> -#include <cstdio> -#include <cstring> -#include <iterator> -#include <thread> -#include <vector> - - -namespace pzstd { - -namespace { -unsigned defaultNumThreads() { -#ifdef PZSTD_NUM_THREADS - return PZSTD_NUM_THREADS; -#else - return std::thread::hardware_concurrency(); -#endif -} - -unsigned parseUnsigned(const char **arg) { - unsigned result = 0; - while (**arg >= '0' && **arg <= '9') { - result *= 10; - result += **arg - '0'; - ++(*arg); - } - return result; -} - -const char *getArgument(const char *options, const char **argv, int &i, - int argc) { - if (options[1] != 0) { - return options + 1; - } - ++i; - if (i == argc) { - std::fprintf(stderr, "Option -%c requires an argument, but none provided\n", - *options); - return nullptr; - } - return argv[i]; -} - -const std::string kZstdExtension = ".zst"; -constexpr char kStdIn[] = "-"; -constexpr char kStdOut[] = "-"; -constexpr unsigned kDefaultCompressionLevel = 3; -constexpr unsigned kMaxNonUltraCompressionLevel = 19; - -#ifdef _WIN32 -const char nullOutput[] = "nul"; -#else -const char nullOutput[] = "/dev/null"; -#endif - -void notSupported(const char *option) { - std::fprintf(stderr, "Operation not supported: %s\n", option); -} - -void usage() { - std::fprintf(stderr, "Usage:\n"); - std::fprintf(stderr, " pzstd [args] [FILE(s)]\n"); - std::fprintf(stderr, "Parallel ZSTD options:\n"); - std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n"); - - std::fprintf(stderr, "ZSTD options:\n"); - std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel); - std::fprintf(stderr, " -d, --decompress : decompression\n"); - std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n"); - std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n"); - std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n"); - std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n"); - std::fprintf(stderr, " -h, --help : display help and exit\n"); - std::fprintf(stderr, " -V, --version : display version number and exit\n"); - std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n"); - std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n"); - std::fprintf(stderr, " -c, --stdout : force write to standard output, even if it is the console\n"); -#ifdef UTIL_HAS_CREATEFILELIST - std::fprintf(stderr, " -r : operate recursively on directories\n"); -#endif - std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel()); - std::fprintf(stderr, " -C, --check : integrity check (default)\n"); - std::fprintf(stderr, " --no-check : no integrity check\n"); - std::fprintf(stderr, " -t, --test : test compressed file integrity\n"); - std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n"); -} -} // anonymous namespace - -Options::Options() - : numThreads(defaultNumThreads()), maxWindowLog(23), - compressionLevel(kDefaultCompressionLevel), decompress(false), - overwrite(false), keepSource(true), writeMode(WriteMode::Auto), - checksum(true), verbosity(2) {} - -Options::Status Options::parse(int argc, const char **argv) { - bool test = false; - bool recursive = false; - bool ultra = false; - bool forceStdout = false; - bool followLinks = false; - // Local copy of input files, which are pointers into argv. - std::vector<const char *> localInputFiles; - for (int i = 1; i < argc; ++i) { - const char *arg = argv[i]; - // Protect against empty arguments - if (arg[0] == 0) { - continue; - } - // Everything after "--" is an input file - if (!std::strcmp(arg, "--")) { - ++i; - std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles)); - break; - } - // Long arguments that don't have a short option - { - bool isLongOption = true; - if (!std::strcmp(arg, "--rm")) { - keepSource = false; - } else if (!std::strcmp(arg, "--ultra")) { - ultra = true; - maxWindowLog = 0; - } else if (!std::strcmp(arg, "--no-check")) { - checksum = false; - } else if (!std::strcmp(arg, "--sparse")) { - writeMode = WriteMode::Sparse; - notSupported("Sparse mode"); - return Status::Failure; - } else if (!std::strcmp(arg, "--no-sparse")) { - writeMode = WriteMode::Regular; - notSupported("Sparse mode"); - return Status::Failure; - } else if (!std::strcmp(arg, "--dictID")) { - notSupported(arg); - return Status::Failure; - } else if (!std::strcmp(arg, "--no-dictID")) { - notSupported(arg); - return Status::Failure; - } else { - isLongOption = false; - } - if (isLongOption) { - continue; - } - } - // Arguments with a short option simply set their short option. - const char *options = nullptr; - if (!std::strcmp(arg, "--processes")) { - options = "p"; - } else if (!std::strcmp(arg, "--version")) { - options = "V"; - } else if (!std::strcmp(arg, "--help")) { - options = "h"; - } else if (!std::strcmp(arg, "--decompress")) { - options = "d"; - } else if (!std::strcmp(arg, "--force")) { - options = "f"; - } else if (!std::strcmp(arg, "--stdout")) { - options = "c"; - } else if (!std::strcmp(arg, "--keep")) { - options = "k"; - } else if (!std::strcmp(arg, "--verbose")) { - options = "v"; - } else if (!std::strcmp(arg, "--quiet")) { - options = "q"; - } else if (!std::strcmp(arg, "--check")) { - options = "C"; - } else if (!std::strcmp(arg, "--test")) { - options = "t"; - } else if (arg[0] == '-' && arg[1] != 0) { - options = arg + 1; - } else { - localInputFiles.emplace_back(arg); - continue; - } - assert(options != nullptr); - - bool finished = false; - while (!finished && *options != 0) { - // Parse the compression level - if (*options >= '0' && *options <= '9') { - compressionLevel = parseUnsigned(&options); - continue; - } - - switch (*options) { - case 'h': - case 'H': - usage(); - return Status::Message; - case 'V': - std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING); - return Status::Message; - case 'p': { - finished = true; - const char *optionArgument = getArgument(options, argv, i, argc); - if (optionArgument == nullptr) { - return Status::Failure; - } - if (*optionArgument < '0' || *optionArgument > '9') { - std::fprintf(stderr, "Option -p expects a number, but %s provided\n", - optionArgument); - return Status::Failure; - } - numThreads = parseUnsigned(&optionArgument); - if (*optionArgument != 0) { - std::fprintf(stderr, - "Option -p expects a number, but %u%s provided\n", - numThreads, optionArgument); - return Status::Failure; - } - break; - } - case 'o': { - finished = true; - const char *optionArgument = getArgument(options, argv, i, argc); - if (optionArgument == nullptr) { - return Status::Failure; - } - outputFile = optionArgument; - break; - } - case 'C': - checksum = true; - break; - case 'k': - keepSource = true; - break; - case 'd': - decompress = true; - break; - case 'f': - overwrite = true; - forceStdout = true; - followLinks = true; - break; - case 't': - test = true; - decompress = true; - break; -#ifdef UTIL_HAS_CREATEFILELIST - case 'r': - recursive = true; - break; -#endif - case 'c': - outputFile = kStdOut; - forceStdout = true; - break; - case 'v': - ++verbosity; - break; - case 'q': - --verbosity; - // Ignore them for now - break; - // Unsupported options from Zstd - case 'D': - case 's': - notSupported("Zstd dictionaries."); - return Status::Failure; - case 'b': - case 'e': - case 'i': - case 'B': - notSupported("Zstd benchmarking options."); - return Status::Failure; - default: - std::fprintf(stderr, "Invalid argument: %s\n", arg); - return Status::Failure; - } - if (!finished) { - ++options; - } - } // while (*options != 0); - } // for (int i = 1; i < argc; ++i); - - // Set options for test mode - if (test) { - outputFile = nullOutput; - keepSource = true; - } - - // Input file defaults to standard input if not provided. - if (localInputFiles.empty()) { - localInputFiles.emplace_back(kStdIn); - } - - // Check validity of input files - if (localInputFiles.size() > 1) { - const auto it = std::find(localInputFiles.begin(), localInputFiles.end(), - std::string{kStdIn}); - if (it != localInputFiles.end()) { - std::fprintf( - stderr, - "Cannot specify standard input when handling multiple files\n"); - return Status::Failure; - } - } - if (localInputFiles.size() > 1 || recursive) { - if (!outputFile.empty() && outputFile != nullOutput) { - std::fprintf( - stderr, - "Cannot specify an output file when handling multiple inputs\n"); - return Status::Failure; - } - } - - g_utilDisplayLevel = verbosity; - // Remove local input files that are symbolic links - if (!followLinks) { - std::remove_if(localInputFiles.begin(), localInputFiles.end(), - [&](const char *path) { - bool isLink = UTIL_isLink(path); - if (isLink && verbosity >= 2) { - std::fprintf( - stderr, - "Warning : %s is symbolic link, ignoring\n", - path); - } - return isLink; - }); - } - - // Translate input files/directories into files to (de)compress - if (recursive) { - char *scratchBuffer = nullptr; - unsigned numFiles = 0; - const char **files = - UTIL_createFileList(localInputFiles.data(), localInputFiles.size(), - &scratchBuffer, &numFiles, followLinks); - if (files == nullptr) { - std::fprintf(stderr, "Error traversing directories\n"); - return Status::Failure; - } - auto guard = - makeScopeGuard([&] { UTIL_freeFileList(files, scratchBuffer); }); - if (numFiles == 0) { - std::fprintf(stderr, "No files found\n"); - return Status::Failure; - } - inputFiles.resize(numFiles); - std::copy(files, files + numFiles, inputFiles.begin()); - } else { - inputFiles.resize(localInputFiles.size()); - std::copy(localInputFiles.begin(), localInputFiles.end(), - inputFiles.begin()); - } - localInputFiles.clear(); - assert(!inputFiles.empty()); - - // If reading from standard input, default to standard output - if (inputFiles[0] == kStdIn && outputFile.empty()) { - assert(inputFiles.size() == 1); - outputFile = "-"; - } - - if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) { - assert(inputFiles.size() == 1); - std::fprintf(stderr, "Cannot read input from interactive console\n"); - return Status::Failure; - } - if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) { - std::fprintf(stderr, "Will not write to console stdout unless -c or -f is " - "specified and decompressing\n"); - return Status::Failure; - } - - // Check compression level - { - unsigned maxCLevel = - ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel; - if (compressionLevel > maxCLevel || compressionLevel == 0) { - std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel); - return Status::Failure; - } - } - - // Check that numThreads is set - if (numThreads == 0) { - std::fprintf(stderr, "Invalid arguments: # of threads not specified " - "and unable to determine hardware concurrency.\n"); - return Status::Failure; - } - - // Modify verbosity - // If we are piping input and output, turn off interaction - if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) { - verbosity = 1; - } - // If we are in multi-file mode, turn off interaction - if (inputFiles.size() > 1 && verbosity == 2) { - verbosity = 1; - } - - return Status::Success; -} - -std::string Options::getOutputFile(const std::string &inputFile) const { - if (!outputFile.empty()) { - return outputFile; - } - // Attempt to add/remove zstd extension from the input file - if (decompress) { - int stemSize = inputFile.size() - kZstdExtension.size(); - if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) { - return inputFile.substr(0, stemSize); - } else { - return ""; - } - } else { - return inputFile + kZstdExtension; - } -} -} diff --git a/contrib/pzstd/Options.h b/contrib/pzstd/Options.h deleted file mode 100644 index f4f2aaa499cb9..0000000000000 --- a/contrib/pzstd/Options.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -#undef ZSTD_STATIC_LINKING_ONLY - -#include <cstdint> -#include <string> -#include <vector> - -namespace pzstd { - -struct Options { - enum class WriteMode { Regular, Auto, Sparse }; - - unsigned numThreads; - unsigned maxWindowLog; - unsigned compressionLevel; - bool decompress; - std::vector<std::string> inputFiles; - std::string outputFile; - bool overwrite; - bool keepSource; - WriteMode writeMode; - bool checksum; - int verbosity; - - enum class Status { - Success, // Successfully parsed options - Failure, // Failure to parse options - Message // Options specified to print a message (e.g. "-h") - }; - - Options(); - Options(unsigned numThreads, unsigned maxWindowLog, unsigned compressionLevel, - bool decompress, std::vector<std::string> inputFiles, - std::string outputFile, bool overwrite, bool keepSource, - WriteMode writeMode, bool checksum, int verbosity) - : numThreads(numThreads), maxWindowLog(maxWindowLog), - compressionLevel(compressionLevel), decompress(decompress), - inputFiles(std::move(inputFiles)), outputFile(std::move(outputFile)), - overwrite(overwrite), keepSource(keepSource), writeMode(writeMode), - checksum(checksum), verbosity(verbosity) {} - - Status parse(int argc, const char **argv); - - ZSTD_parameters determineParameters() const { - ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, 0); - params.fParams.contentSizeFlag = 0; - params.fParams.checksumFlag = checksum; - if (maxWindowLog != 0 && params.cParams.windowLog > maxWindowLog) { - params.cParams.windowLog = maxWindowLog; - params.cParams = ZSTD_adjustCParams(params.cParams, 0, 0); - } - return params; - } - - std::string getOutputFile(const std::string &inputFile) const; -}; -} diff --git a/contrib/pzstd/Pzstd.cpp b/contrib/pzstd/Pzstd.cpp deleted file mode 100644 index 652187c3bd0e4..0000000000000 --- a/contrib/pzstd/Pzstd.cpp +++ /dev/null @@ -1,611 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "platform.h" /* Large Files support, SET_BINARY_MODE */ -#include "Pzstd.h" -#include "SkippableFrame.h" -#include "utils/FileSystem.h" -#include "utils/Range.h" -#include "utils/ScopeGuard.h" -#include "utils/ThreadPool.h" -#include "utils/WorkQueue.h" - -#include <chrono> -#include <cinttypes> -#include <cstddef> -#include <cstdio> -#include <memory> -#include <string> - - -namespace pzstd { - -namespace { -#ifdef _WIN32 -const std::string nullOutput = "nul"; -#else -const std::string nullOutput = "/dev/null"; -#endif -} - -using std::size_t; - -static std::uintmax_t fileSizeOrZero(const std::string &file) { - if (file == "-") { - return 0; - } - std::error_code ec; - auto size = file_size(file, ec); - if (ec) { - size = 0; - } - return size; -} - -static std::uint64_t handleOneInput(const Options &options, - const std::string &inputFile, - FILE* inputFd, - const std::string &outputFile, - FILE* outputFd, - SharedState& state) { - auto inputSize = fileSizeOrZero(inputFile); - // WorkQueue outlives ThreadPool so in the case of error we are certain - // we don't accidentally try to call push() on it after it is destroyed - WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1}; - std::uint64_t bytesRead; - std::uint64_t bytesWritten; - { - // Initialize the (de)compression thread pool with numThreads - ThreadPool executor(options.numThreads); - // Run the reader thread on an extra thread - ThreadPool readExecutor(1); - if (!options.decompress) { - // Add a job that reads the input and starts all the compression jobs - readExecutor.add( - [&state, &outs, &executor, inputFd, inputSize, &options, &bytesRead] { - bytesRead = asyncCompressChunks( - state, - outs, - executor, - inputFd, - inputSize, - options.numThreads, - options.determineParameters()); - }); - // Start writing - bytesWritten = writeFile(state, outs, outputFd, options.decompress); - } else { - // Add a job that reads the input and starts all the decompression jobs - readExecutor.add([&state, &outs, &executor, inputFd, &bytesRead] { - bytesRead = asyncDecompressFrames(state, outs, executor, inputFd); - }); - // Start writing - bytesWritten = writeFile(state, outs, outputFd, options.decompress); - } - } - if (!state.errorHolder.hasError()) { - std::string inputFileName = inputFile == "-" ? "stdin" : inputFile; - std::string outputFileName = outputFile == "-" ? "stdout" : outputFile; - if (!options.decompress) { - double ratio = static_cast<double>(bytesWritten) / - static_cast<double>(bytesRead + !bytesRead); - state.log(INFO, "%-20s :%6.2f%% (%6" PRIu64 " => %6" PRIu64 - " bytes, %s)\n", - inputFileName.c_str(), ratio * 100, bytesRead, bytesWritten, - outputFileName.c_str()); - } else { - state.log(INFO, "%-20s: %" PRIu64 " bytes \n", - inputFileName.c_str(),bytesWritten); - } - } - return bytesWritten; -} - -static FILE *openInputFile(const std::string &inputFile, - ErrorHolder &errorHolder) { - if (inputFile == "-") { - SET_BINARY_MODE(stdin); - return stdin; - } - // Check if input file is a directory - { - std::error_code ec; - if (is_directory(inputFile, ec)) { - errorHolder.setError("Output file is a directory -- ignored"); - return nullptr; - } - } - auto inputFd = std::fopen(inputFile.c_str(), "rb"); - if (!errorHolder.check(inputFd != nullptr, "Failed to open input file")) { - return nullptr; - } - return inputFd; -} - -static FILE *openOutputFile(const Options &options, - const std::string &outputFile, - SharedState& state) { - if (outputFile == "-") { - SET_BINARY_MODE(stdout); - return stdout; - } - // Check if the output file exists and then open it - if (!options.overwrite && outputFile != nullOutput) { - auto outputFd = std::fopen(outputFile.c_str(), "rb"); - if (outputFd != nullptr) { - std::fclose(outputFd); - if (!state.log.logsAt(INFO)) { - state.errorHolder.setError("Output file exists"); - return nullptr; - } - state.log( - INFO, - "pzstd: %s already exists; do you wish to overwrite (y/n) ? ", - outputFile.c_str()); - int c = getchar(); - if (c != 'y' && c != 'Y') { - state.errorHolder.setError("Not overwritten"); - return nullptr; - } - } - } - auto outputFd = std::fopen(outputFile.c_str(), "wb"); - if (!state.errorHolder.check( - outputFd != nullptr, "Failed to open output file")) { - return nullptr; - } - return outputFd; -} - -int pzstdMain(const Options &options) { - int returnCode = 0; - SharedState state(options); - for (const auto& input : options.inputFiles) { - // Setup the shared state - auto printErrorGuard = makeScopeGuard([&] { - if (state.errorHolder.hasError()) { - returnCode = 1; - state.log(ERROR, "pzstd: %s: %s.\n", input.c_str(), - state.errorHolder.getError().c_str()); - } - }); - // Open the input file - auto inputFd = openInputFile(input, state.errorHolder); - if (inputFd == nullptr) { - continue; - } - auto closeInputGuard = makeScopeGuard([&] { std::fclose(inputFd); }); - // Open the output file - auto outputFile = options.getOutputFile(input); - if (!state.errorHolder.check(outputFile != "", - "Input file does not have extension .zst")) { - continue; - } - auto outputFd = openOutputFile(options, outputFile, state); - if (outputFd == nullptr) { - continue; - } - auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); }); - // (de)compress the file - handleOneInput(options, input, inputFd, outputFile, outputFd, state); - if (state.errorHolder.hasError()) { - continue; - } - // Delete the input file if necessary - if (!options.keepSource) { - // Be sure that we are done and have written everything before we delete - if (!state.errorHolder.check(std::fclose(inputFd) == 0, - "Failed to close input file")) { - continue; - } - closeInputGuard.dismiss(); - if (!state.errorHolder.check(std::fclose(outputFd) == 0, - "Failed to close output file")) { - continue; - } - closeOutputGuard.dismiss(); - if (std::remove(input.c_str()) != 0) { - state.errorHolder.setError("Failed to remove input file"); - continue; - } - } - } - // Returns 1 if any of the files failed to (de)compress. - return returnCode; -} - -/// Construct a `ZSTD_inBuffer` that points to the data in `buffer`. -static ZSTD_inBuffer makeZstdInBuffer(const Buffer& buffer) { - return ZSTD_inBuffer{buffer.data(), buffer.size(), 0}; -} - -/** - * Advance `buffer` and `inBuffer` by the amount of data read, as indicated by - * `inBuffer.pos`. - */ -void advance(Buffer& buffer, ZSTD_inBuffer& inBuffer) { - auto pos = inBuffer.pos; - inBuffer.src = static_cast<const unsigned char*>(inBuffer.src) + pos; - inBuffer.size -= pos; - inBuffer.pos = 0; - return buffer.advance(pos); -} - -/// Construct a `ZSTD_outBuffer` that points to the data in `buffer`. -static ZSTD_outBuffer makeZstdOutBuffer(Buffer& buffer) { - return ZSTD_outBuffer{buffer.data(), buffer.size(), 0}; -} - -/** - * Split `buffer` and advance `outBuffer` by the amount of data written, as - * indicated by `outBuffer.pos`. - */ -Buffer split(Buffer& buffer, ZSTD_outBuffer& outBuffer) { - auto pos = outBuffer.pos; - outBuffer.dst = static_cast<unsigned char*>(outBuffer.dst) + pos; - outBuffer.size -= pos; - outBuffer.pos = 0; - return buffer.splitAt(pos); -} - -/** - * Stream chunks of input from `in`, compress it, and stream it out to `out`. - * - * @param state The shared state - * @param in Queue that we `pop()` input buffers from - * @param out Queue that we `push()` compressed output buffers to - * @param maxInputSize An upper bound on the size of the input - */ -static void compress( - SharedState& state, - std::shared_ptr<BufferWorkQueue> in, - std::shared_ptr<BufferWorkQueue> out, - size_t maxInputSize) { - auto& errorHolder = state.errorHolder; - auto guard = makeScopeGuard([&] { out->finish(); }); - // Initialize the CCtx - auto ctx = state.cStreamPool->get(); - if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_CStream")) { - return; - } - { - auto err = ZSTD_resetCStream(ctx.get(), 0); - if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) { - return; - } - } - - // Allocate space for the result - auto outBuffer = Buffer(ZSTD_compressBound(maxInputSize)); - auto zstdOutBuffer = makeZstdOutBuffer(outBuffer); - { - Buffer inBuffer; - // Read a buffer in from the input queue - while (in->pop(inBuffer) && !errorHolder.hasError()) { - auto zstdInBuffer = makeZstdInBuffer(inBuffer); - // Compress the whole buffer and send it to the output queue - while (!inBuffer.empty() && !errorHolder.hasError()) { - if (!errorHolder.check( - !outBuffer.empty(), "ZSTD_compressBound() was too small")) { - return; - } - // Compress - auto err = - ZSTD_compressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer); - if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) { - return; - } - // Split the compressed data off outBuffer and pass to the output queue - out->push(split(outBuffer, zstdOutBuffer)); - // Forget about the data we already compressed - advance(inBuffer, zstdInBuffer); - } - } - } - // Write the epilog - size_t bytesLeft; - do { - if (!errorHolder.check( - !outBuffer.empty(), "ZSTD_compressBound() was too small")) { - return; - } - bytesLeft = ZSTD_endStream(ctx.get(), &zstdOutBuffer); - if (!errorHolder.check( - !ZSTD_isError(bytesLeft), ZSTD_getErrorName(bytesLeft))) { - return; - } - out->push(split(outBuffer, zstdOutBuffer)); - } while (bytesLeft != 0 && !errorHolder.hasError()); -} - -/** - * Calculates how large each independently compressed frame should be. - * - * @param size The size of the source if known, 0 otherwise - * @param numThreads The number of threads available to run compression jobs on - * @param params The zstd parameters to be used for compression - */ -static size_t calculateStep( - std::uintmax_t size, - size_t numThreads, - const ZSTD_parameters ¶ms) { - (void)size; - (void)numThreads; - return size_t{1} << (params.cParams.windowLog + 2); -} - -namespace { -enum class FileStatus { Continue, Done, Error }; -/// Determines the status of the file descriptor `fd`. -FileStatus fileStatus(FILE* fd) { - if (std::feof(fd)) { - return FileStatus::Done; - } else if (std::ferror(fd)) { - return FileStatus::Error; - } - return FileStatus::Continue; -} -} // anonymous namespace - -/** - * Reads `size` data in chunks of `chunkSize` and puts it into `queue`. - * Will read less if an error or EOF occurs. - * Returns the status of the file after all of the reads have occurred. - */ -static FileStatus -readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd, - std::uint64_t *totalBytesRead) { - Buffer buffer(size); - while (!buffer.empty()) { - auto bytesRead = - std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd); - *totalBytesRead += bytesRead; - queue.push(buffer.splitAt(bytesRead)); - auto status = fileStatus(fd); - if (status != FileStatus::Continue) { - return status; - } - } - return FileStatus::Continue; -} - -std::uint64_t asyncCompressChunks( - SharedState& state, - WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks, - ThreadPool& executor, - FILE* fd, - std::uintmax_t size, - size_t numThreads, - ZSTD_parameters params) { - auto chunksGuard = makeScopeGuard([&] { chunks.finish(); }); - std::uint64_t bytesRead = 0; - - // Break the input up into chunks of size `step` and compress each chunk - // independently. - size_t step = calculateStep(size, numThreads, params); - state.log(DEBUG, "Chosen frame size: %zu\n", step); - auto status = FileStatus::Continue; - while (status == FileStatus::Continue && !state.errorHolder.hasError()) { - // Make a new input queue that we will put the chunk's input data into. - auto in = std::make_shared<BufferWorkQueue>(); - auto inGuard = makeScopeGuard([&] { in->finish(); }); - // Make a new output queue that compress will put the compressed data into. - auto out = std::make_shared<BufferWorkQueue>(); - // Start compression in the thread pool - executor.add([&state, in, out, step] { - return compress( - state, std::move(in), std::move(out), step); - }); - // Pass the output queue to the writer thread. - chunks.push(std::move(out)); - state.log(VERBOSE, "%s\n", "Starting a new frame"); - // Fill the input queue for the compression job we just started - status = readData(*in, ZSTD_CStreamInSize(), step, fd, &bytesRead); - } - state.errorHolder.check(status != FileStatus::Error, "Error reading input"); - return bytesRead; -} - -/** - * Decompress a frame, whose data is streamed into `in`, and stream the output - * to `out`. - * - * @param state The shared state - * @param in Queue that we `pop()` input buffers from. It contains - * exactly one compressed frame. - * @param out Queue that we `push()` decompressed output buffers to - */ -static void decompress( - SharedState& state, - std::shared_ptr<BufferWorkQueue> in, - std::shared_ptr<BufferWorkQueue> out) { - auto& errorHolder = state.errorHolder; - auto guard = makeScopeGuard([&] { out->finish(); }); - // Initialize the DCtx - auto ctx = state.dStreamPool->get(); - if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_DStream")) { - return; - } - { - auto err = ZSTD_resetDStream(ctx.get()); - if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) { - return; - } - } - - const size_t outSize = ZSTD_DStreamOutSize(); - Buffer inBuffer; - size_t returnCode = 0; - // Read a buffer in from the input queue - while (in->pop(inBuffer) && !errorHolder.hasError()) { - auto zstdInBuffer = makeZstdInBuffer(inBuffer); - // Decompress the whole buffer and send it to the output queue - while (!inBuffer.empty() && !errorHolder.hasError()) { - // Allocate a buffer with at least outSize bytes. - Buffer outBuffer(outSize); - auto zstdOutBuffer = makeZstdOutBuffer(outBuffer); - // Decompress - returnCode = - ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer); - if (!errorHolder.check( - !ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) { - return; - } - // Pass the buffer with the decompressed data to the output queue - out->push(split(outBuffer, zstdOutBuffer)); - // Advance past the input we already read - advance(inBuffer, zstdInBuffer); - if (returnCode == 0) { - // The frame is over, prepare to (maybe) start a new frame - ZSTD_initDStream(ctx.get()); - } - } - } - if (!errorHolder.check(returnCode <= 1, "Incomplete block")) { - return; - } - // We've given ZSTD_decompressStream all of our data, but there may still - // be data to read. - while (returnCode == 1) { - // Allocate a buffer with at least outSize bytes. - Buffer outBuffer(outSize); - auto zstdOutBuffer = makeZstdOutBuffer(outBuffer); - // Pass in no input. - ZSTD_inBuffer zstdInBuffer{nullptr, 0, 0}; - // Decompress - returnCode = - ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer); - if (!errorHolder.check( - !ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) { - return; - } - // Pass the buffer with the decompressed data to the output queue - out->push(split(outBuffer, zstdOutBuffer)); - } -} - -std::uint64_t asyncDecompressFrames( - SharedState& state, - WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames, - ThreadPool& executor, - FILE* fd) { - auto framesGuard = makeScopeGuard([&] { frames.finish(); }); - std::uint64_t totalBytesRead = 0; - - // Split the source up into its component frames. - // If we find our recognized skippable frame we know the next frames size - // which means that we can decompress each standard frame in independently. - // Otherwise, we will decompress using only one decompression task. - const size_t chunkSize = ZSTD_DStreamInSize(); - auto status = FileStatus::Continue; - while (status == FileStatus::Continue && !state.errorHolder.hasError()) { - // Make a new input queue that we will put the frames's bytes into. - auto in = std::make_shared<BufferWorkQueue>(); - auto inGuard = makeScopeGuard([&] { in->finish(); }); - // Make a output queue that decompress will put the decompressed data into - auto out = std::make_shared<BufferWorkQueue>(); - - size_t frameSize; - { - // Calculate the size of the next frame. - // frameSize is 0 if the frame info can't be decoded. - Buffer buffer(SkippableFrame::kSize); - auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd); - totalBytesRead += bytesRead; - status = fileStatus(fd); - if (bytesRead == 0 && status != FileStatus::Continue) { - break; - } - buffer.subtract(buffer.size() - bytesRead); - frameSize = SkippableFrame::tryRead(buffer.range()); - in->push(std::move(buffer)); - } - if (frameSize == 0) { - // We hit a non SkippableFrame, so this will be the last job. - // Make sure that we don't use too much memory - in->setMaxSize(64); - out->setMaxSize(64); - } - // Start decompression in the thread pool - executor.add([&state, in, out] { - return decompress(state, std::move(in), std::move(out)); - }); - // Pass the output queue to the writer thread - frames.push(std::move(out)); - if (frameSize == 0) { - // We hit a non SkippableFrame ==> not compressed by pzstd or corrupted - // Pass the rest of the source to this decompression task - state.log(VERBOSE, "%s\n", - "Input not in pzstd format, falling back to serial decompression"); - while (status == FileStatus::Continue && !state.errorHolder.hasError()) { - status = readData(*in, chunkSize, chunkSize, fd, &totalBytesRead); - } - break; - } - state.log(VERBOSE, "Decompressing a frame of size %zu", frameSize); - // Fill the input queue for the decompression job we just started - status = readData(*in, chunkSize, frameSize, fd, &totalBytesRead); - } - state.errorHolder.check(status != FileStatus::Error, "Error reading input"); - return totalBytesRead; -} - -/// Write `data` to `fd`, returns true iff success. -static bool writeData(ByteRange data, FILE* fd) { - while (!data.empty()) { - data.advance(std::fwrite(data.begin(), 1, data.size(), fd)); - if (std::ferror(fd)) { - return false; - } - } - return true; -} - -std::uint64_t writeFile( - SharedState& state, - WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs, - FILE* outputFd, - bool decompress) { - auto& errorHolder = state.errorHolder; - auto lineClearGuard = makeScopeGuard([&state] { - state.log.clear(INFO); - }); - std::uint64_t bytesWritten = 0; - std::shared_ptr<BufferWorkQueue> out; - // Grab the output queue for each decompression job (in order). - while (outs.pop(out)) { - if (errorHolder.hasError()) { - continue; - } - if (!decompress) { - // If we are compressing and want to write skippable frames we can't - // start writing before compression is done because we need to know the - // compressed size. - // Wait for the compressed size to be available and write skippable frame - SkippableFrame frame(out->size()); - if (!writeData(frame.data(), outputFd)) { - errorHolder.setError("Failed to write output"); - return bytesWritten; - } - bytesWritten += frame.kSize; - } - // For each chunk of the frame: Pop it from the queue and write it - Buffer buffer; - while (out->pop(buffer) && !errorHolder.hasError()) { - if (!writeData(buffer.range(), outputFd)) { - errorHolder.setError("Failed to write output"); - return bytesWritten; - } - bytesWritten += buffer.size(); - state.log.update(INFO, "Written: %u MB ", - static_cast<std::uint32_t>(bytesWritten >> 20)); - } - } - return bytesWritten; -} -} diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h deleted file mode 100644 index 79d1fcca26537..0000000000000 --- a/contrib/pzstd/Pzstd.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "ErrorHolder.h" -#include "Logging.h" -#include "Options.h" -#include "utils/Buffer.h" -#include "utils/Range.h" -#include "utils/ResourcePool.h" -#include "utils/ThreadPool.h" -#include "utils/WorkQueue.h" -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -#undef ZSTD_STATIC_LINKING_ONLY - -#include <cstddef> -#include <cstdint> -#include <memory> - -namespace pzstd { -/** - * Runs pzstd with `options` and returns the number of bytes written. - * An error occurred if `errorHandler.hasError()`. - * - * @param options The pzstd options to use for (de)compression - * @returns 0 upon success and non-zero on failure. - */ -int pzstdMain(const Options& options); - -class SharedState { - public: - SharedState(const Options& options) : log(options.verbosity) { - if (!options.decompress) { - auto parameters = options.determineParameters(); - cStreamPool.reset(new ResourcePool<ZSTD_CStream>{ - [this, parameters]() -> ZSTD_CStream* { - this->log(VERBOSE, "%s\n", "Creating new ZSTD_CStream"); - auto zcs = ZSTD_createCStream(); - if (zcs) { - auto err = ZSTD_initCStream_advanced( - zcs, nullptr, 0, parameters, 0); - if (ZSTD_isError(err)) { - ZSTD_freeCStream(zcs); - return nullptr; - } - } - return zcs; - }, - [](ZSTD_CStream *zcs) { - ZSTD_freeCStream(zcs); - }}); - } else { - dStreamPool.reset(new ResourcePool<ZSTD_DStream>{ - [this]() -> ZSTD_DStream* { - this->log(VERBOSE, "%s\n", "Creating new ZSTD_DStream"); - auto zds = ZSTD_createDStream(); - if (zds) { - auto err = ZSTD_initDStream(zds); - if (ZSTD_isError(err)) { - ZSTD_freeDStream(zds); - return nullptr; - } - } - return zds; - }, - [](ZSTD_DStream *zds) { - ZSTD_freeDStream(zds); - }}); - } - } - - ~SharedState() { - // The resource pools have references to this, so destroy them first. - cStreamPool.reset(); - dStreamPool.reset(); - } - - Logger log; - ErrorHolder errorHolder; - std::unique_ptr<ResourcePool<ZSTD_CStream>> cStreamPool; - std::unique_ptr<ResourcePool<ZSTD_DStream>> dStreamPool; -}; - -/** - * Streams input from `fd`, breaks input up into chunks, and compresses each - * chunk independently. Output of each chunk gets streamed to a queue, and - * the output queues get put into `chunks` in order. - * - * @param state The shared state - * @param chunks Each compression jobs output queue gets `pushed()` here - * as soon as it is available - * @param executor The thread pool to run compression jobs in - * @param fd The input file descriptor - * @param size The size of the input file if known, 0 otherwise - * @param numThreads The number of threads in the thread pool - * @param parameters The zstd parameters to use for compression - * @returns The number of bytes read from the file - */ -std::uint64_t asyncCompressChunks( - SharedState& state, - WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks, - ThreadPool& executor, - FILE* fd, - std::uintmax_t size, - std::size_t numThreads, - ZSTD_parameters parameters); - -/** - * Streams input from `fd`. If pzstd headers are available it breaks the input - * up into independent frames. It sends each frame to an independent - * decompression job. Output of each frame gets streamed to a queue, and - * the output queues get put into `frames` in order. - * - * @param state The shared state - * @param frames Each decompression jobs output queue gets `pushed()` here - * as soon as it is available - * @param executor The thread pool to run compression jobs in - * @param fd The input file descriptor - * @returns The number of bytes read from the file - */ -std::uint64_t asyncDecompressFrames( - SharedState& state, - WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames, - ThreadPool& executor, - FILE* fd); - -/** - * Streams input in from each queue in `outs` in order, and writes the data to - * `outputFd`. - * - * @param state The shared state - * @param outs A queue of output queues, one for each - * (de)compression job. - * @param outputFd The file descriptor to write to - * @param decompress Are we decompressing? - * @returns The number of bytes written - */ -std::uint64_t writeFile( - SharedState& state, - WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs, - FILE* outputFd, - bool decompress); -} diff --git a/contrib/pzstd/README.md b/contrib/pzstd/README.md deleted file mode 100644 index 84d945815838f..0000000000000 --- a/contrib/pzstd/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Parallel Zstandard (PZstandard) - -Parallel Zstandard is a Pigz-like tool for Zstandard. -It provides Zstandard format compatible compression and decompression that is able to utilize multiple cores. -It breaks the input up into equal sized chunks and compresses each chunk independently into a Zstandard frame. -It then concatenates the frames together to produce the final compressed output. -Pzstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame. -PZstandard supports parallel decompression of files compressed with PZstandard. -When decompressing files compressed with Zstandard, PZstandard does IO in one thread, and decompression in another. - -## Usage - -PZstandard supports the same command line interface as Zstandard, but also provides the `-p` option to specify the number of threads. -Dictionary mode is not currently supported. - -Basic usage - - pzstd input-file -o output-file -p num-threads -# # Compression - pzstd -d input-file -o output-file -p num-threads # Decompression - -PZstandard also supports piping and fifo pipes - - cat input-file | pzstd -p num-threads -# -c > /dev/null - -For more options - - pzstd --help - -PZstandard tries to pick a smart default number of threads if not specified (displayed in `pzstd --help`). -If this number is not suitable, during compilation you can define `PZSTD_NUM_THREADS` to the number of threads you prefer. - -## Benchmarks - -As a reference, PZstandard and Pigz were compared on an Intel Core i7 @ 3.1 GHz, each using 4 threads, with the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia). - -Compression Speed vs Ratio with 4 Threads | Decompression Speed with 4 Threads -------------------------------------------|----------------------------------- - |  - -The test procedure was to run each of the following commands 2 times for each compression level, and take the minimum time. - - time pzstd -# -p 4 -c silesia.tar > silesia.tar.zst - time pzstd -d -p 4 -c silesia.tar.zst > /dev/null - - time pigz -# -p 4 -k -c silesia.tar > silesia.tar.gz - time pigz -d -p 4 -k -c silesia.tar.gz > /dev/null - -PZstandard was tested using compression levels 1-19, and Pigz was tested using compression levels 1-9. -Pigz cannot do parallel decompression, it simply does each of reading, decompression, and writing on separate threads. - -## Tests - -Tests require that you have [gtest](https://github.com/google/googletest) installed. -Set `GTEST_INC` and `GTEST_LIB` in `Makefile` to specify the location of the gtest headers and libraries. -Alternatively, run `make googletest`, which will clone googletest and build it. -Run `make tests && make check` to run tests. diff --git a/contrib/pzstd/SkippableFrame.cpp b/contrib/pzstd/SkippableFrame.cpp deleted file mode 100644 index 769866dfc8157..0000000000000 --- a/contrib/pzstd/SkippableFrame.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "SkippableFrame.h" -#include "mem.h" -#include "utils/Range.h" - -#include <cstdio> - -using namespace pzstd; - -SkippableFrame::SkippableFrame(std::uint32_t size) : frameSize_(size) { - MEM_writeLE32(data_.data(), kSkippableFrameMagicNumber); - MEM_writeLE32(data_.data() + 4, kFrameContentsSize); - MEM_writeLE32(data_.data() + 8, frameSize_); -} - -/* static */ std::size_t SkippableFrame::tryRead(ByteRange bytes) { - if (bytes.size() < SkippableFrame::kSize || - MEM_readLE32(bytes.begin()) != kSkippableFrameMagicNumber || - MEM_readLE32(bytes.begin() + 4) != kFrameContentsSize) { - return 0; - } - return MEM_readLE32(bytes.begin() + 8); -} diff --git a/contrib/pzstd/SkippableFrame.h b/contrib/pzstd/SkippableFrame.h deleted file mode 100644 index 60deed0405bed..0000000000000 --- a/contrib/pzstd/SkippableFrame.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "utils/Range.h" - -#include <array> -#include <cstddef> -#include <cstdint> -#include <cstdio> - -namespace pzstd { -/** - * We put a skippable frame before each frame. - * It contains a skippable frame magic number, the size of the skippable frame, - * and the size of the next frame. - * Each skippable frame is exactly 12 bytes in little endian format. - * The first 8 bytes are for compatibility with the ZSTD format. - * If we have N threads, the output will look like - * - * [0x184D2A50|4|size1] [frame1 of size size1] - * [0x184D2A50|4|size2] [frame2 of size size2] - * ... - * [0x184D2A50|4|sizeN] [frameN of size sizeN] - * - * Each sizeX is 4 bytes. - * - * These skippable frames should allow us to skip through the compressed file - * and only load at most N pages. - */ -class SkippableFrame { - public: - static constexpr std::size_t kSize = 12; - - private: - std::uint32_t frameSize_; - std::array<std::uint8_t, kSize> data_; - static constexpr std::uint32_t kSkippableFrameMagicNumber = 0x184D2A50; - // Could be improved if the size fits in less bytes - static constexpr std::uint32_t kFrameContentsSize = kSize - 8; - - public: - // Write the skippable frame to data_ in LE format. - explicit SkippableFrame(std::uint32_t size); - - // Read the skippable frame from bytes in LE format. - static std::size_t tryRead(ByteRange bytes); - - ByteRange data() const { - return {data_.data(), data_.size()}; - } - - // Size of the next frame. - std::size_t frameSize() const { - return frameSize_; - } -}; -} diff --git a/contrib/pzstd/images/Cspeed.png b/contrib/pzstd/images/Cspeed.png Binary files differdeleted file mode 100644 index aca4f663ea2e9..0000000000000 --- a/contrib/pzstd/images/Cspeed.png +++ /dev/null diff --git a/contrib/pzstd/images/Dspeed.png b/contrib/pzstd/images/Dspeed.png Binary files differdeleted file mode 100644 index e48881bcd05b7..0000000000000 --- a/contrib/pzstd/images/Dspeed.png +++ /dev/null diff --git a/contrib/pzstd/main.cpp b/contrib/pzstd/main.cpp deleted file mode 100644 index b93f043b16b12..0000000000000 --- a/contrib/pzstd/main.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "ErrorHolder.h" -#include "Options.h" -#include "Pzstd.h" - -using namespace pzstd; - -int main(int argc, const char** argv) { - Options options; - switch (options.parse(argc, argv)) { - case Options::Status::Failure: - return 1; - case Options::Status::Message: - return 0; - default: - break; - } - - return pzstdMain(options); -} diff --git a/contrib/pzstd/test/BUCK b/contrib/pzstd/test/BUCK deleted file mode 100644 index 6d3fdd3c269b4..0000000000000 --- a/contrib/pzstd/test/BUCK +++ /dev/null @@ -1,37 +0,0 @@ -cxx_test( - name='options_test', - srcs=['OptionsTest.cpp'], - deps=['//contrib/pzstd:options'], -) - -cxx_test( - name='pzstd_test', - srcs=['PzstdTest.cpp'], - deps=[ - ':round_trip', - '//contrib/pzstd:libpzstd', - '//contrib/pzstd/utils:scope_guard', - '//programs:datagen', - ], -) - -cxx_binary( - name='round_trip_test', - srcs=['RoundTripTest.cpp'], - deps=[ - ':round_trip', - '//contrib/pzstd/utils:scope_guard', - '//programs:datagen', - ] -) - -cxx_library( - name='round_trip', - header_namespace='test', - exported_headers=['RoundTrip.h'], - deps=[ - '//contrib/pzstd:libpzstd', - '//contrib/pzstd:options', - '//contrib/pzstd/utils:scope_guard', - ] -) diff --git a/contrib/pzstd/test/OptionsTest.cpp b/contrib/pzstd/test/OptionsTest.cpp deleted file mode 100644 index e601148255d41..0000000000000 --- a/contrib/pzstd/test/OptionsTest.cpp +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "Options.h" - -#include <array> -#include <gtest/gtest.h> - -using namespace pzstd; - -namespace pzstd { -bool operator==(const Options &lhs, const Options &rhs) { - return lhs.numThreads == rhs.numThreads && - lhs.maxWindowLog == rhs.maxWindowLog && - lhs.compressionLevel == rhs.compressionLevel && - lhs.decompress == rhs.decompress && lhs.inputFiles == rhs.inputFiles && - lhs.outputFile == rhs.outputFile && lhs.overwrite == rhs.overwrite && - lhs.keepSource == rhs.keepSource && lhs.writeMode == rhs.writeMode && - lhs.checksum == rhs.checksum && lhs.verbosity == rhs.verbosity; -} - -std::ostream &operator<<(std::ostream &out, const Options &opt) { - out << "{"; - { - out << "\n\t" - << "numThreads: " << opt.numThreads; - out << ",\n\t" - << "maxWindowLog: " << opt.maxWindowLog; - out << ",\n\t" - << "compressionLevel: " << opt.compressionLevel; - out << ",\n\t" - << "decompress: " << opt.decompress; - out << ",\n\t" - << "inputFiles: {"; - { - bool first = true; - for (const auto &file : opt.inputFiles) { - if (!first) { - out << ","; - } - first = false; - out << "\n\t\t" << file; - } - } - out << "\n\t}"; - out << ",\n\t" - << "outputFile: " << opt.outputFile; - out << ",\n\t" - << "overwrite: " << opt.overwrite; - out << ",\n\t" - << "keepSource: " << opt.keepSource; - out << ",\n\t" - << "writeMode: " << static_cast<int>(opt.writeMode); - out << ",\n\t" - << "checksum: " << opt.checksum; - out << ",\n\t" - << "verbosity: " << opt.verbosity; - } - out << "\n}"; - return out; -} -} - -namespace { -#ifdef _WIN32 -const char nullOutput[] = "nul"; -#else -const char nullOutput[] = "/dev/null"; -#endif - -constexpr auto autoMode = Options::WriteMode::Auto; -} // anonymous namespace - -#define EXPECT_SUCCESS(...) EXPECT_EQ(Options::Status::Success, __VA_ARGS__) -#define EXPECT_FAILURE(...) EXPECT_EQ(Options::Status::Failure, __VA_ARGS__) -#define EXPECT_MESSAGE(...) EXPECT_EQ(Options::Status::Message, __VA_ARGS__) - -template <typename... Args> -std::array<const char *, sizeof...(Args) + 1> makeArray(Args... args) { - return {{nullptr, args...}}; -} - -TEST(Options, ValidInputs) { - { - Options options; - auto args = makeArray("--processes", "5", "-o", "x", "y", "-f"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {5, 23, 3, false, {"y"}, "x", - true, true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("-p", "1", "input", "-19"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {1, 23, 19, false, {"input"}, "", - false, true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = - makeArray("--ultra", "-22", "-p", "1", "-o", "x", "-d", "x.zst", "-f"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {1, 0, 22, true, {"x.zst"}, "x", - true, true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("--processes", "100", "hello.zst", "--decompress", - "--force"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {100, 23, 3, true, {"hello.zst"}, "", true, - true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("x", "-dp", "1", "-c"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {1, 23, 3, true, {"x"}, "-", - false, true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("x", "-dp", "1", "--stdout"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {1, 23, 3, true, {"x"}, "-", - false, true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("-p", "1", "x", "-5", "-fo", "-", "--ultra", "-d"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {1, 0, 5, true, {"x"}, "-", - true, true, autoMode, true, 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("silesia.tar", "-o", "silesia.tar.pzstd", "-p", "2"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {2, - 23, - 3, - false, - {"silesia.tar"}, - "silesia.tar.pzstd", - false, - true, - autoMode, - true, - 2}; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("x", "-p", "1"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "-p", "1"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - } -} - -TEST(Options, GetOutputFile) { - { - Options options; - auto args = makeArray("x"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ("x.zst", options.getOutputFile(options.inputFiles[0])); - } - { - Options options; - auto args = makeArray("x", "y", "-o", nullOutput); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0])); - } - { - Options options; - auto args = makeArray("x.zst", "-do", nullOutput); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0])); - } - { - Options options; - auto args = makeArray("x.zst", "-d"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ("x", options.getOutputFile(options.inputFiles[0])); - } - { - Options options; - auto args = makeArray("xzst", "-d"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ("", options.getOutputFile(options.inputFiles[0])); - } - { - Options options; - auto args = makeArray("xzst", "-doxx"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ("xx", options.getOutputFile(options.inputFiles[0])); - } -} - -TEST(Options, MultipleFiles) { - { - Options options; - auto args = makeArray("x", "y", "z"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected; - expected.inputFiles = {"x", "y", "z"}; - expected.verbosity = 1; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("x", "y", "z", "-o", nullOutput); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected; - expected.inputFiles = {"x", "y", "z"}; - expected.outputFile = nullOutput; - expected.verbosity = 1; - EXPECT_EQ(expected, options); - } - { - Options options; - auto args = makeArray("x", "y", "-o-"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "y", "-o", "file"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("-qqvd12qp4", "-f", "x", "--", "--rm", "-c"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - Options expected = {4, 23, 12, true, {"x", "--rm", "-c"}, - "", true, true, autoMode, true, - 0}; - EXPECT_EQ(expected, options); - } -} - -TEST(Options, NumThreads) { - { - Options options; - auto args = makeArray("x", "-dfo", "-"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "-p", "0", "-fo", "-"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("-f", "-p", "-o", "-"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } -} - -TEST(Options, BadCompressionLevel) { - { - Options options; - auto args = makeArray("x", "-20"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "--ultra", "-23"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "--1"); // negative 1? - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } -} - -TEST(Options, InvalidOption) { - { - Options options; - auto args = makeArray("x", "-x"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } -} - -TEST(Options, BadOutputFile) { - { - Options options; - auto args = makeArray("notzst", "-d", "-p", "1"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ("", options.getOutputFile(options.inputFiles.front())); - } -} - -TEST(Options, BadOptionsWithArguments) { - { - Options options; - auto args = makeArray("x", "-pf"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "-p", "10f"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "-p"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "-o"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("x", "-o"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } -} - -TEST(Options, KeepSource) { - { - Options options; - auto args = makeArray("x", "--rm", "-k"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.keepSource); - } - { - Options options; - auto args = makeArray("x", "--rm", "--keep"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.keepSource); - } - { - Options options; - auto args = makeArray("x"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.keepSource); - } - { - Options options; - auto args = makeArray("x", "--rm"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(false, options.keepSource); - } -} - -TEST(Options, Verbosity) { - { - Options options; - auto args = makeArray("x"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(2, options.verbosity); - } - { - Options options; - auto args = makeArray("--quiet", "-qq", "x"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(-1, options.verbosity); - } - { - Options options; - auto args = makeArray("x", "y"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(1, options.verbosity); - } - { - Options options; - auto args = makeArray("--", "x", "y"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(1, options.verbosity); - } - { - Options options; - auto args = makeArray("-qv", "x", "y"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(1, options.verbosity); - } - { - Options options; - auto args = makeArray("-v", "x", "y"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(3, options.verbosity); - } - { - Options options; - auto args = makeArray("-v", "x"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(3, options.verbosity); - } -} - -TEST(Options, TestMode) { - { - Options options; - auto args = makeArray("x", "-t"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.keepSource); - EXPECT_EQ(true, options.decompress); - EXPECT_EQ(nullOutput, options.outputFile); - } - { - Options options; - auto args = makeArray("x", "--test", "--rm", "-ohello"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.keepSource); - EXPECT_EQ(true, options.decompress); - EXPECT_EQ(nullOutput, options.outputFile); - } -} - -TEST(Options, Checksum) { - { - Options options; - auto args = makeArray("x.zst", "--no-check", "-Cd"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.checksum); - } - { - Options options; - auto args = makeArray("x"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.checksum); - } - { - Options options; - auto args = makeArray("x", "--no-check", "--check"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(true, options.checksum); - } - { - Options options; - auto args = makeArray("x", "--no-check"); - EXPECT_SUCCESS(options.parse(args.size(), args.data())); - EXPECT_EQ(false, options.checksum); - } -} - -TEST(Options, InputFiles) { - { - Options options; - auto args = makeArray("-cd"); - options.parse(args.size(), args.data()); - EXPECT_EQ(1, options.inputFiles.size()); - EXPECT_EQ("-", options.inputFiles[0]); - EXPECT_EQ("-", options.outputFile); - } - { - Options options; - auto args = makeArray(); - options.parse(args.size(), args.data()); - EXPECT_EQ(1, options.inputFiles.size()); - EXPECT_EQ("-", options.inputFiles[0]); - EXPECT_EQ("-", options.outputFile); - } - { - Options options; - auto args = makeArray("-d"); - options.parse(args.size(), args.data()); - EXPECT_EQ(1, options.inputFiles.size()); - EXPECT_EQ("-", options.inputFiles[0]); - EXPECT_EQ("-", options.outputFile); - } - { - Options options; - auto args = makeArray("x", "-"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } -} - -TEST(Options, InvalidOptions) { - { - Options options; - auto args = makeArray("-ibasdf"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("- "); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("-n15"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("-0", "x"); - EXPECT_FAILURE(options.parse(args.size(), args.data())); - } -} - -TEST(Options, Extras) { - { - Options options; - auto args = makeArray("-h"); - EXPECT_MESSAGE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("-H"); - EXPECT_MESSAGE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("-V"); - EXPECT_MESSAGE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("--help"); - EXPECT_MESSAGE(options.parse(args.size(), args.data())); - } - { - Options options; - auto args = makeArray("--version"); - EXPECT_MESSAGE(options.parse(args.size(), args.data())); - } -} diff --git a/contrib/pzstd/test/PzstdTest.cpp b/contrib/pzstd/test/PzstdTest.cpp deleted file mode 100644 index 5c7d663108050..0000000000000 --- a/contrib/pzstd/test/PzstdTest.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "Pzstd.h" -extern "C" { -#include "datagen.h" -} -#include "test/RoundTrip.h" -#include "utils/ScopeGuard.h" - -#include <cstddef> -#include <cstdio> -#include <gtest/gtest.h> -#include <memory> -#include <random> - -using namespace std; -using namespace pzstd; - -TEST(Pzstd, SmallSizes) { - unsigned seed = std::random_device{}(); - std::fprintf(stderr, "Pzstd.SmallSizes seed: %u\n", seed); - std::mt19937 gen(seed); - - for (unsigned len = 1; len < 256; ++len) { - if (len % 16 == 0) { - std::fprintf(stderr, "%u / 16\n", len / 16); - } - std::string inputFile = std::tmpnam(nullptr); - auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); }); - { - static uint8_t buf[256]; - RDG_genBuffer(buf, len, 0.5, 0.0, gen()); - auto fd = std::fopen(inputFile.c_str(), "wb"); - auto written = std::fwrite(buf, 1, len, fd); - std::fclose(fd); - ASSERT_EQ(written, len); - } - for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) { - for (unsigned level = 1; level <= 4; level *= 4) { - auto errorGuard = makeScopeGuard([&] { - std::fprintf(stderr, "# threads: %u\n", numThreads); - std::fprintf(stderr, "compression level: %u\n", level); - }); - Options options; - options.overwrite = true; - options.inputFiles = {inputFile}; - options.numThreads = numThreads; - options.compressionLevel = level; - options.verbosity = 1; - ASSERT_TRUE(roundTrip(options)); - errorGuard.dismiss(); - } - } - } -} - -TEST(Pzstd, LargeSizes) { - unsigned seed = std::random_device{}(); - std::fprintf(stderr, "Pzstd.LargeSizes seed: %u\n", seed); - std::mt19937 gen(seed); - - for (unsigned len = 1 << 20; len <= (1 << 24); len *= 2) { - std::string inputFile = std::tmpnam(nullptr); - auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); }); - { - std::unique_ptr<uint8_t[]> buf(new uint8_t[len]); - RDG_genBuffer(buf.get(), len, 0.5, 0.0, gen()); - auto fd = std::fopen(inputFile.c_str(), "wb"); - auto written = std::fwrite(buf.get(), 1, len, fd); - std::fclose(fd); - ASSERT_EQ(written, len); - } - for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) { - for (unsigned level = 1; level <= 4; level *= 4) { - auto errorGuard = makeScopeGuard([&] { - std::fprintf(stderr, "# threads: %u\n", numThreads); - std::fprintf(stderr, "compression level: %u\n", level); - }); - Options options; - options.overwrite = true; - options.inputFiles = {inputFile}; - options.numThreads = std::min(numThreads, options.numThreads); - options.compressionLevel = level; - options.verbosity = 1; - ASSERT_TRUE(roundTrip(options)); - errorGuard.dismiss(); - } - } - } -} - -TEST(Pzstd, DISABLED_ExtremelyLargeSize) { - unsigned seed = std::random_device{}(); - std::fprintf(stderr, "Pzstd.ExtremelyLargeSize seed: %u\n", seed); - std::mt19937 gen(seed); - - std::string inputFile = std::tmpnam(nullptr); - auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); }); - - { - // Write 4GB + 64 MB - constexpr size_t kLength = 1 << 26; - std::unique_ptr<uint8_t[]> buf(new uint8_t[kLength]); - auto fd = std::fopen(inputFile.c_str(), "wb"); - auto closeGuard = makeScopeGuard([&] { std::fclose(fd); }); - for (size_t i = 0; i < (1 << 6) + 1; ++i) { - RDG_genBuffer(buf.get(), kLength, 0.5, 0.0, gen()); - auto written = std::fwrite(buf.get(), 1, kLength, fd); - if (written != kLength) { - std::fprintf(stderr, "Failed to write file, skipping test\n"); - return; - } - } - } - - Options options; - options.overwrite = true; - options.inputFiles = {inputFile}; - options.compressionLevel = 1; - if (options.numThreads == 0) { - options.numThreads = 1; - } - ASSERT_TRUE(roundTrip(options)); -} - -TEST(Pzstd, ExtremelyCompressible) { - std::string inputFile = std::tmpnam(nullptr); - auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); }); - { - std::unique_ptr<uint8_t[]> buf(new uint8_t[10000]); - std::memset(buf.get(), 'a', 10000); - auto fd = std::fopen(inputFile.c_str(), "wb"); - auto written = std::fwrite(buf.get(), 1, 10000, fd); - std::fclose(fd); - ASSERT_EQ(written, 10000); - } - Options options; - options.overwrite = true; - options.inputFiles = {inputFile}; - options.numThreads = 1; - options.compressionLevel = 1; - ASSERT_TRUE(roundTrip(options)); -} diff --git a/contrib/pzstd/test/RoundTrip.h b/contrib/pzstd/test/RoundTrip.h deleted file mode 100644 index c6364ecb4227e..0000000000000 --- a/contrib/pzstd/test/RoundTrip.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "Options.h" -#include "Pzstd.h" -#include "utils/ScopeGuard.h" - -#include <cstdio> -#include <string> -#include <cstdint> -#include <memory> - -namespace pzstd { - -inline bool check(std::string source, std::string decompressed) { - std::unique_ptr<std::uint8_t[]> sBuf(new std::uint8_t[1024]); - std::unique_ptr<std::uint8_t[]> dBuf(new std::uint8_t[1024]); - - auto sFd = std::fopen(source.c_str(), "rb"); - auto dFd = std::fopen(decompressed.c_str(), "rb"); - auto guard = makeScopeGuard([&] { - std::fclose(sFd); - std::fclose(dFd); - }); - - size_t sRead, dRead; - - do { - sRead = std::fread(sBuf.get(), 1, 1024, sFd); - dRead = std::fread(dBuf.get(), 1, 1024, dFd); - if (std::ferror(sFd) || std::ferror(dFd)) { - return false; - } - if (sRead != dRead) { - return false; - } - - for (size_t i = 0; i < sRead; ++i) { - if (sBuf.get()[i] != dBuf.get()[i]) { - return false; - } - } - } while (sRead == 1024); - if (!std::feof(sFd) || !std::feof(dFd)) { - return false; - } - return true; -} - -inline bool roundTrip(Options& options) { - if (options.inputFiles.size() != 1) { - return false; - } - std::string source = options.inputFiles.front(); - std::string compressedFile = std::tmpnam(nullptr); - std::string decompressedFile = std::tmpnam(nullptr); - auto guard = makeScopeGuard([&] { - std::remove(compressedFile.c_str()); - std::remove(decompressedFile.c_str()); - }); - - { - options.outputFile = compressedFile; - options.decompress = false; - if (pzstdMain(options) != 0) { - return false; - } - } - { - options.decompress = true; - options.inputFiles.front() = compressedFile; - options.outputFile = decompressedFile; - if (pzstdMain(options) != 0) { - return false; - } - } - return check(source, decompressedFile); -} -} diff --git a/contrib/pzstd/test/RoundTripTest.cpp b/contrib/pzstd/test/RoundTripTest.cpp deleted file mode 100644 index 36af0673ae6ae..0000000000000 --- a/contrib/pzstd/test/RoundTripTest.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -extern "C" { -#include "datagen.h" -} -#include "Options.h" -#include "test/RoundTrip.h" -#include "utils/ScopeGuard.h" - -#include <cstddef> -#include <cstdio> -#include <cstdlib> -#include <memory> -#include <random> - -using namespace std; -using namespace pzstd; - -namespace { -string -writeData(size_t size, double matchProba, double litProba, unsigned seed) { - std::unique_ptr<uint8_t[]> buf(new uint8_t[size]); - RDG_genBuffer(buf.get(), size, matchProba, litProba, seed); - string file = tmpnam(nullptr); - auto fd = std::fopen(file.c_str(), "wb"); - auto guard = makeScopeGuard([&] { std::fclose(fd); }); - auto bytesWritten = std::fwrite(buf.get(), 1, size, fd); - if (bytesWritten != size) { - std::abort(); - } - return file; -} - -template <typename Generator> -string generateInputFile(Generator& gen) { - // Use inputs ranging from 1 Byte to 2^16 Bytes - std::uniform_int_distribution<size_t> size{1, 1 << 16}; - std::uniform_real_distribution<> prob{0, 1}; - return writeData(size(gen), prob(gen), prob(gen), gen()); -} - -template <typename Generator> -Options generateOptions(Generator& gen, const string& inputFile) { - Options options; - options.inputFiles = {inputFile}; - options.overwrite = true; - - std::uniform_int_distribution<unsigned> numThreads{1, 32}; - std::uniform_int_distribution<unsigned> compressionLevel{1, 10}; - - options.numThreads = numThreads(gen); - options.compressionLevel = compressionLevel(gen); - - return options; -} -} - -int main() { - std::mt19937 gen(std::random_device{}()); - - auto newlineGuard = makeScopeGuard([] { std::fprintf(stderr, "\n"); }); - for (unsigned i = 0; i < 10000; ++i) { - if (i % 100 == 0) { - std::fprintf(stderr, "Progress: %u%%\r", i / 100); - } - auto inputFile = generateInputFile(gen); - auto inputGuard = makeScopeGuard([&] { std::remove(inputFile.c_str()); }); - for (unsigned i = 0; i < 10; ++i) { - auto options = generateOptions(gen, inputFile); - if (!roundTrip(options)) { - std::fprintf(stderr, "numThreads: %u\n", options.numThreads); - std::fprintf(stderr, "level: %u\n", options.compressionLevel); - std::fprintf(stderr, "decompress? %u\n", (unsigned)options.decompress); - std::fprintf(stderr, "file: %s\n", inputFile.c_str()); - return 1; - } - } - } - return 0; -} diff --git a/contrib/pzstd/utils/BUCK b/contrib/pzstd/utils/BUCK deleted file mode 100644 index e757f412070bf..0000000000000 --- a/contrib/pzstd/utils/BUCK +++ /dev/null @@ -1,75 +0,0 @@ -cxx_library( - name='buffer', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['Buffer.h'], - deps=[':range'], -) - -cxx_library( - name='file_system', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['FileSystem.h'], - deps=[':range'], -) - -cxx_library( - name='likely', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['Likely.h'], -) - -cxx_library( - name='range', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['Range.h'], - deps=[':likely'], -) - -cxx_library( - name='resource_pool', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['ResourcePool.h'], -) - -cxx_library( - name='scope_guard', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['ScopeGuard.h'], -) - -cxx_library( - name='thread_pool', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['ThreadPool.h'], - deps=[':work_queue'], -) - -cxx_library( - name='work_queue', - visibility=['PUBLIC'], - header_namespace='utils', - exported_headers=['WorkQueue.h'], - deps=[':buffer'], -) - -cxx_library( - name='utils', - visibility=['PUBLIC'], - deps=[ - ':buffer', - ':file_system', - ':likely', - ':range', - ':resource_pool', - ':scope_guard', - ':thread_pool', - ':work_queue', - ], -) diff --git a/contrib/pzstd/utils/Buffer.h b/contrib/pzstd/utils/Buffer.h deleted file mode 100644 index f69c3b4d9f7ab..0000000000000 --- a/contrib/pzstd/utils/Buffer.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "utils/Range.h" - -#include <array> -#include <cstddef> -#include <memory> - -namespace pzstd { - -/** - * A `Buffer` has a pointer to a shared buffer, and a range of the buffer that - * it owns. - * The idea is that you can allocate one buffer, and write chunks into it - * and break off those chunks. - * The underlying buffer is reference counted, and will be destroyed when all - * `Buffer`s that reference it are destroyed. - */ -class Buffer { - std::shared_ptr<unsigned char> buffer_; - MutableByteRange range_; - - static void delete_buffer(unsigned char* buffer) { - delete[] buffer; - } - - public: - /// Construct an empty buffer that owns no data. - explicit Buffer() {} - - /// Construct a `Buffer` that owns a new underlying buffer of size `size`. - explicit Buffer(std::size_t size) - : buffer_(new unsigned char[size], delete_buffer), - range_(buffer_.get(), buffer_.get() + size) {} - - explicit Buffer(std::shared_ptr<unsigned char> buffer, MutableByteRange data) - : buffer_(buffer), range_(data) {} - - Buffer(Buffer&&) = default; - Buffer& operator=(Buffer&&) & = default; - - /** - * Splits the data into two pieces: [begin, begin + n), [begin + n, end). - * Their data both points into the same underlying buffer. - * Modifies the original `Buffer` to point to only [begin + n, end). - * - * @param n The offset to split at. - * @returns A buffer that owns the data [begin, begin + n). - */ - Buffer splitAt(std::size_t n) { - auto firstPiece = range_.subpiece(0, n); - range_.advance(n); - return Buffer(buffer_, firstPiece); - } - - /// Modifies the buffer to point to the range [begin + n, end). - void advance(std::size_t n) { - range_.advance(n); - } - - /// Modifies the buffer to point to the range [begin, end - n). - void subtract(std::size_t n) { - range_.subtract(n); - } - - /// Returns a read only `Range` pointing to the `Buffer`s data. - ByteRange range() const { - return range_; - } - /// Returns a mutable `Range` pointing to the `Buffer`s data. - MutableByteRange range() { - return range_; - } - - const unsigned char* data() const { - return range_.data(); - } - - unsigned char* data() { - return range_.data(); - } - - std::size_t size() const { - return range_.size(); - } - - bool empty() const { - return range_.empty(); - } -}; -} diff --git a/contrib/pzstd/utils/FileSystem.h b/contrib/pzstd/utils/FileSystem.h deleted file mode 100644 index 3cfbe86e507ed..0000000000000 --- a/contrib/pzstd/utils/FileSystem.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "utils/Range.h" - -#include <sys/stat.h> -#include <cerrno> -#include <cstdint> -#include <system_error> - -// A small subset of `std::filesystem`. -// `std::filesystem` should be a drop in replacement. -// See http://en.cppreference.com/w/cpp/filesystem for documentation. - -namespace pzstd { - -// using file_status = ... causes gcc to emit a false positive warning -#if defined(_MSC_VER) -typedef struct ::_stat64 file_status; -#else -typedef struct ::stat file_status; -#endif - -/// http://en.cppreference.com/w/cpp/filesystem/status -inline file_status status(StringPiece path, std::error_code& ec) noexcept { - file_status status; -#if defined(_MSC_VER) - const auto error = ::_stat64(path.data(), &status); -#else - const auto error = ::stat(path.data(), &status); -#endif - if (error) { - ec.assign(errno, std::generic_category()); - } else { - ec.clear(); - } - return status; -} - -/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file -inline bool is_regular_file(file_status status) noexcept { -#if defined(S_ISREG) - return S_ISREG(status.st_mode); -#elif !defined(S_ISREG) && defined(S_IFMT) && defined(S_IFREG) - return (status.st_mode & S_IFMT) == S_IFREG; -#else - static_assert(false, "No POSIX stat() support."); -#endif -} - -/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file -inline bool is_regular_file(StringPiece path, std::error_code& ec) noexcept { - return is_regular_file(status(path, ec)); -} - -/// http://en.cppreference.com/w/cpp/filesystem/is_directory -inline bool is_directory(file_status status) noexcept { -#if defined(S_ISDIR) - return S_ISDIR(status.st_mode); -#elif !defined(S_ISDIR) && defined(S_IFMT) && defined(S_IFDIR) - return (status.st_mode & S_IFMT) == S_IFDIR; -#else - static_assert(false, "NO POSIX stat() support."); -#endif -} - -/// http://en.cppreference.com/w/cpp/filesystem/is_directory -inline bool is_directory(StringPiece path, std::error_code& ec) noexcept { - return is_directory(status(path, ec)); -} - -/// http://en.cppreference.com/w/cpp/filesystem/file_size -inline std::uintmax_t file_size( - StringPiece path, - std::error_code& ec) noexcept { - auto stat = status(path, ec); - if (ec) { - return -1; - } - if (!is_regular_file(stat)) { - ec.assign(ENOTSUP, std::generic_category()); - return -1; - } - ec.clear(); - return stat.st_size; -} -} diff --git a/contrib/pzstd/utils/Likely.h b/contrib/pzstd/utils/Likely.h deleted file mode 100644 index 7cea8da2771f0..0000000000000 --- a/contrib/pzstd/utils/Likely.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -/** - * Compiler hints to indicate the fast path of an "if" branch: whether - * the if condition is likely to be true or false. - * - * @author Tudor Bosman (tudorb@fb.com) - */ - -#pragma once - -#undef LIKELY -#undef UNLIKELY - -#if defined(__GNUC__) && __GNUC__ >= 4 -#define LIKELY(x) (__builtin_expect((x), 1)) -#define UNLIKELY(x) (__builtin_expect((x), 0)) -#else -#define LIKELY(x) (x) -#define UNLIKELY(x) (x) -#endif diff --git a/contrib/pzstd/utils/Range.h b/contrib/pzstd/utils/Range.h deleted file mode 100644 index fedb5d786c686..0000000000000 --- a/contrib/pzstd/utils/Range.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -/** - * A subset of `folly/Range.h`. - * All code copied verbatim modulo formatting - */ -#pragma once - -#include "utils/Likely.h" - -#include <cstddef> -#include <cstring> -#include <stdexcept> -#include <string> -#include <type_traits> - -namespace pzstd { - -namespace detail { -/* - *Use IsCharPointer<T>::type to enable const char* or char*. - *Use IsCharPointer<T>::const_type to enable only const char*. -*/ -template <class T> -struct IsCharPointer {}; - -template <> -struct IsCharPointer<char*> { - typedef int type; -}; - -template <> -struct IsCharPointer<const char*> { - typedef int const_type; - typedef int type; -}; - -} // namespace detail - -template <typename Iter> -class Range { - Iter b_; - Iter e_; - - public: - using size_type = std::size_t; - using iterator = Iter; - using const_iterator = Iter; - using value_type = typename std::remove_reference< - typename std::iterator_traits<Iter>::reference>::type; - using reference = typename std::iterator_traits<Iter>::reference; - - constexpr Range() : b_(), e_() {} - constexpr Range(Iter begin, Iter end) : b_(begin), e_(end) {} - - constexpr Range(Iter begin, size_type size) : b_(begin), e_(begin + size) {} - - template <class T = Iter, typename detail::IsCharPointer<T>::type = 0> - /* implicit */ Range(Iter str) : b_(str), e_(str + std::strlen(str)) {} - - template <class T = Iter, typename detail::IsCharPointer<T>::const_type = 0> - /* implicit */ Range(const std::string& str) - : b_(str.data()), e_(b_ + str.size()) {} - - // Allow implicit conversion from Range<From> to Range<To> if From is - // implicitly convertible to To. - template < - class OtherIter, - typename std::enable_if< - (!std::is_same<Iter, OtherIter>::value && - std::is_convertible<OtherIter, Iter>::value), - int>::type = 0> - constexpr /* implicit */ Range(const Range<OtherIter>& other) - : b_(other.begin()), e_(other.end()) {} - - Range(const Range&) = default; - Range(Range&&) = default; - - Range& operator=(const Range&) & = default; - Range& operator=(Range&&) & = default; - - constexpr size_type size() const { - return e_ - b_; - } - bool empty() const { - return b_ == e_; - } - Iter data() const { - return b_; - } - Iter begin() const { - return b_; - } - Iter end() const { - return e_; - } - - void advance(size_type n) { - if (UNLIKELY(n > size())) { - throw std::out_of_range("index out of range"); - } - b_ += n; - } - - void subtract(size_type n) { - if (UNLIKELY(n > size())) { - throw std::out_of_range("index out of range"); - } - e_ -= n; - } - - Range subpiece(size_type first, size_type length = std::string::npos) const { - if (UNLIKELY(first > size())) { - throw std::out_of_range("index out of range"); - } - - return Range(b_ + first, std::min(length, size() - first)); - } -}; - -using ByteRange = Range<const unsigned char*>; -using MutableByteRange = Range<unsigned char*>; -using StringPiece = Range<const char*>; -} diff --git a/contrib/pzstd/utils/ResourcePool.h b/contrib/pzstd/utils/ResourcePool.h deleted file mode 100644 index 8dfcdd765909c..0000000000000 --- a/contrib/pzstd/utils/ResourcePool.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include <cassert> -#include <functional> -#include <memory> -#include <mutex> -#include <vector> - -namespace pzstd { - -/** - * An unbounded pool of resources. - * A `ResourcePool<T>` requires a factory function that takes allocates `T*` and - * a free function that frees a `T*`. - * Calling `ResourcePool::get()` will give you a new `ResourcePool::UniquePtr` - * to a `T`, and when it goes out of scope the resource will be returned to the - * pool. - * The `ResourcePool<T>` *must* survive longer than any resources it hands out. - * Remember that `ResourcePool<T>` hands out mutable `T`s, so make sure to clean - * up the resource before or after every use. - */ -template <typename T> -class ResourcePool { - public: - class Deleter; - using Factory = std::function<T*()>; - using Free = std::function<void(T*)>; - using UniquePtr = std::unique_ptr<T, Deleter>; - - private: - std::mutex mutex_; - Factory factory_; - Free free_; - std::vector<T*> resources_; - unsigned inUse_; - - public: - /** - * Creates a `ResourcePool`. - * - * @param factory The function to use to create new resources. - * @param free The function to use to free resources created by `factory`. - */ - ResourcePool(Factory factory, Free free) - : factory_(std::move(factory)), free_(std::move(free)), inUse_(0) {} - - /** - * @returns A unique pointer to a resource. The resource is null iff - * there are no available resources and `factory()` returns null. - */ - UniquePtr get() { - std::lock_guard<std::mutex> lock(mutex_); - if (!resources_.empty()) { - UniquePtr resource{resources_.back(), Deleter{*this}}; - resources_.pop_back(); - ++inUse_; - return resource; - } - UniquePtr resource{factory_(), Deleter{*this}}; - ++inUse_; - return resource; - } - - ~ResourcePool() noexcept { - assert(inUse_ == 0); - for (const auto resource : resources_) { - free_(resource); - } - } - - class Deleter { - ResourcePool *pool_; - public: - explicit Deleter(ResourcePool &pool) : pool_(&pool) {} - - void operator() (T *resource) { - std::lock_guard<std::mutex> lock(pool_->mutex_); - // Make sure we don't put null resources into the pool - if (resource) { - pool_->resources_.push_back(resource); - } - assert(pool_->inUse_ > 0); - --pool_->inUse_; - } - }; -}; - -} diff --git a/contrib/pzstd/utils/ScopeGuard.h b/contrib/pzstd/utils/ScopeGuard.h deleted file mode 100644 index 31768f43d22c8..0000000000000 --- a/contrib/pzstd/utils/ScopeGuard.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include <utility> - -namespace pzstd { - -/** - * Dismissable scope guard. - * `Function` must be callable and take no parameters. - * Unless `dissmiss()` is called, the callable is executed upon destruction of - * `ScopeGuard`. - * - * Example: - * - * auto guard = makeScopeGuard([&] { cleanup(); }); - */ -template <typename Function> -class ScopeGuard { - Function function; - bool dismissed; - - public: - explicit ScopeGuard(Function&& function) - : function(std::move(function)), dismissed(false) {} - - void dismiss() { - dismissed = true; - } - - ~ScopeGuard() noexcept { - if (!dismissed) { - function(); - } - } -}; - -/// Creates a scope guard from `function`. -template <typename Function> -ScopeGuard<Function> makeScopeGuard(Function&& function) { - return ScopeGuard<Function>(std::forward<Function>(function)); -} -} diff --git a/contrib/pzstd/utils/ThreadPool.h b/contrib/pzstd/utils/ThreadPool.h deleted file mode 100644 index 8ece8e0da4eba..0000000000000 --- a/contrib/pzstd/utils/ThreadPool.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "utils/WorkQueue.h" - -#include <cstddef> -#include <functional> -#include <thread> -#include <vector> - -namespace pzstd { -/// A simple thread pool that pulls tasks off its queue in FIFO order. -class ThreadPool { - std::vector<std::thread> threads_; - - WorkQueue<std::function<void()>> tasks_; - - public: - /// Constructs a thread pool with `numThreads` threads. - explicit ThreadPool(std::size_t numThreads) { - threads_.reserve(numThreads); - for (std::size_t i = 0; i < numThreads; ++i) { - threads_.emplace_back([this] { - std::function<void()> task; - while (tasks_.pop(task)) { - task(); - } - }); - } - } - - /// Finishes all tasks currently in the queue. - ~ThreadPool() { - tasks_.finish(); - for (auto& thread : threads_) { - thread.join(); - } - } - - /** - * Adds `task` to the queue of tasks to execute. Since `task` is a - * `std::function<>`, it cannot be a move only type. So any lambda passed must - * not capture move only types (like `std::unique_ptr`). - * - * @param task The task to execute. - */ - void add(std::function<void()> task) { - tasks_.push(std::move(task)); - } -}; -} diff --git a/contrib/pzstd/utils/WorkQueue.h b/contrib/pzstd/utils/WorkQueue.h deleted file mode 100644 index 1d14d922c6480..0000000000000 --- a/contrib/pzstd/utils/WorkQueue.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#pragma once - -#include "utils/Buffer.h" - -#include <atomic> -#include <cassert> -#include <cstddef> -#include <condition_variable> -#include <cstddef> -#include <functional> -#include <mutex> -#include <queue> - -namespace pzstd { - -/// Unbounded thread-safe work queue. -template <typename T> -class WorkQueue { - // Protects all member variable access - std::mutex mutex_; - std::condition_variable readerCv_; - std::condition_variable writerCv_; - std::condition_variable finishCv_; - - std::queue<T> queue_; - bool done_; - std::size_t maxSize_; - - // Must have lock to call this function - bool full() const { - if (maxSize_ == 0) { - return false; - } - return queue_.size() >= maxSize_; - } - - public: - /** - * Constructs an empty work queue with an optional max size. - * If `maxSize == 0` the queue size is unbounded. - * - * @param maxSize The maximum allowed size of the work queue. - */ - WorkQueue(std::size_t maxSize = 0) : done_(false), maxSize_(maxSize) {} - - /** - * Push an item onto the work queue. Notify a single thread that work is - * available. If `finish()` has been called, do nothing and return false. - * If `push()` returns false, then `item` has not been moved from. - * - * @param item Item to push onto the queue. - * @returns True upon success, false if `finish()` has been called. An - * item was pushed iff `push()` returns true. - */ - bool push(T&& item) { - { - std::unique_lock<std::mutex> lock(mutex_); - while (full() && !done_) { - writerCv_.wait(lock); - } - if (done_) { - return false; - } - queue_.push(std::move(item)); - } - readerCv_.notify_one(); - return true; - } - - /** - * Attempts to pop an item off the work queue. It will block until data is - * available or `finish()` has been called. - * - * @param[out] item If `pop` returns `true`, it contains the popped item. - * If `pop` returns `false`, it is unmodified. - * @returns True upon success. False if the queue is empty and - * `finish()` has been called. - */ - bool pop(T& item) { - { - std::unique_lock<std::mutex> lock(mutex_); - while (queue_.empty() && !done_) { - readerCv_.wait(lock); - } - if (queue_.empty()) { - assert(done_); - return false; - } - item = std::move(queue_.front()); - queue_.pop(); - } - writerCv_.notify_one(); - return true; - } - - /** - * Sets the maximum queue size. If `maxSize == 0` then it is unbounded. - * - * @param maxSize The new maximum queue size. - */ - void setMaxSize(std::size_t maxSize) { - { - std::lock_guard<std::mutex> lock(mutex_); - maxSize_ = maxSize; - } - writerCv_.notify_all(); - } - - /** - * Promise that `push()` won't be called again, so once the queue is empty - * there will never any more work. - */ - void finish() { - { - std::lock_guard<std::mutex> lock(mutex_); - assert(!done_); - done_ = true; - } - readerCv_.notify_all(); - writerCv_.notify_all(); - finishCv_.notify_all(); - } - - /// Blocks until `finish()` has been called (but the queue may not be empty). - void waitUntilFinished() { - std::unique_lock<std::mutex> lock(mutex_); - while (!done_) { - finishCv_.wait(lock); - } - } -}; - -/// Work queue for `Buffer`s that knows the total number of bytes in the queue. -class BufferWorkQueue { - WorkQueue<Buffer> queue_; - std::atomic<std::size_t> size_; - - public: - BufferWorkQueue(std::size_t maxSize = 0) : queue_(maxSize), size_(0) {} - - void push(Buffer buffer) { - size_.fetch_add(buffer.size()); - queue_.push(std::move(buffer)); - } - - bool pop(Buffer& buffer) { - bool result = queue_.pop(buffer); - if (result) { - size_.fetch_sub(buffer.size()); - } - return result; - } - - void setMaxSize(std::size_t maxSize) { - queue_.setMaxSize(maxSize); - } - - void finish() { - queue_.finish(); - } - - /** - * Blocks until `finish()` has been called. - * - * @returns The total number of bytes of all the `Buffer`s currently in the - * queue. - */ - std::size_t size() { - queue_.waitUntilFinished(); - return size_.load(); - } -}; -} diff --git a/contrib/pzstd/utils/test/BUCK b/contrib/pzstd/utils/test/BUCK deleted file mode 100644 index a5113cab6b0e5..0000000000000 --- a/contrib/pzstd/utils/test/BUCK +++ /dev/null @@ -1,35 +0,0 @@ -cxx_test( - name='buffer_test', - srcs=['BufferTest.cpp'], - deps=['//contrib/pzstd/utils:buffer'], -) - -cxx_test( - name='range_test', - srcs=['RangeTest.cpp'], - deps=['//contrib/pzstd/utils:range'], -) - -cxx_test( - name='resource_pool_test', - srcs=['ResourcePoolTest.cpp'], - deps=['//contrib/pzstd/utils:resource_pool'], -) - -cxx_test( - name='scope_guard_test', - srcs=['ScopeGuardTest.cpp'], - deps=['//contrib/pzstd/utils:scope_guard'], -) - -cxx_test( - name='thread_pool_test', - srcs=['ThreadPoolTest.cpp'], - deps=['//contrib/pzstd/utils:thread_pool'], -) - -cxx_test( - name='work_queue_test', - srcs=['RangeTest.cpp'], - deps=['//contrib/pzstd/utils:work_queue'], -) diff --git a/contrib/pzstd/utils/test/BufferTest.cpp b/contrib/pzstd/utils/test/BufferTest.cpp deleted file mode 100644 index fbba74e82628f..0000000000000 --- a/contrib/pzstd/utils/test/BufferTest.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "utils/Buffer.h" -#include "utils/Range.h" - -#include <gtest/gtest.h> -#include <memory> - -using namespace pzstd; - -namespace { -void deleter(const unsigned char* buf) { - delete[] buf; -} -} - -TEST(Buffer, Constructors) { - Buffer empty; - EXPECT_TRUE(empty.empty()); - EXPECT_EQ(0, empty.size()); - - Buffer sized(5); - EXPECT_FALSE(sized.empty()); - EXPECT_EQ(5, sized.size()); - - Buffer moved(std::move(sized)); - EXPECT_FALSE(sized.empty()); - EXPECT_EQ(5, sized.size()); - - Buffer assigned; - assigned = std::move(moved); - EXPECT_FALSE(sized.empty()); - EXPECT_EQ(5, sized.size()); -} - -TEST(Buffer, BufferManagement) { - std::shared_ptr<unsigned char> buf(new unsigned char[10], deleter); - { - Buffer acquired(buf, MutableByteRange(buf.get(), buf.get() + 10)); - EXPECT_EQ(2, buf.use_count()); - Buffer moved(std::move(acquired)); - EXPECT_EQ(2, buf.use_count()); - Buffer assigned; - assigned = std::move(moved); - EXPECT_EQ(2, buf.use_count()); - - Buffer split = assigned.splitAt(5); - EXPECT_EQ(3, buf.use_count()); - - split.advance(1); - assigned.subtract(1); - EXPECT_EQ(3, buf.use_count()); - } - EXPECT_EQ(1, buf.use_count()); -} - -TEST(Buffer, Modifiers) { - Buffer buf(10); - { - unsigned char i = 0; - for (auto& byte : buf.range()) { - byte = i++; - } - } - - auto prefix = buf.splitAt(2); - - ASSERT_EQ(2, prefix.size()); - EXPECT_EQ(0, *prefix.data()); - - ASSERT_EQ(8, buf.size()); - EXPECT_EQ(2, *buf.data()); - - buf.advance(2); - EXPECT_EQ(4, *buf.data()); - - EXPECT_EQ(9, *(buf.range().end() - 1)); - - buf.subtract(2); - EXPECT_EQ(7, *(buf.range().end() - 1)); - - EXPECT_EQ(4, buf.size()); -} diff --git a/contrib/pzstd/utils/test/RangeTest.cpp b/contrib/pzstd/utils/test/RangeTest.cpp deleted file mode 100644 index 755b50fa6e80d..0000000000000 --- a/contrib/pzstd/utils/test/RangeTest.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "utils/Range.h" - -#include <gtest/gtest.h> -#include <string> - -using namespace pzstd; - -// Range is directly copied from folly. -// Just some sanity tests to make sure everything seems to work. - -TEST(Range, Constructors) { - StringPiece empty; - EXPECT_TRUE(empty.empty()); - EXPECT_EQ(0, empty.size()); - - std::string str = "hello"; - { - Range<std::string::const_iterator> piece(str.begin(), str.end()); - EXPECT_EQ(5, piece.size()); - EXPECT_EQ('h', *piece.data()); - EXPECT_EQ('o', *(piece.end() - 1)); - } - - { - StringPiece piece(str.data(), str.size()); - EXPECT_EQ(5, piece.size()); - EXPECT_EQ('h', *piece.data()); - EXPECT_EQ('o', *(piece.end() - 1)); - } - - { - StringPiece piece(str); - EXPECT_EQ(5, piece.size()); - EXPECT_EQ('h', *piece.data()); - EXPECT_EQ('o', *(piece.end() - 1)); - } - - { - StringPiece piece(str.c_str()); - EXPECT_EQ(5, piece.size()); - EXPECT_EQ('h', *piece.data()); - EXPECT_EQ('o', *(piece.end() - 1)); - } -} - -TEST(Range, Modifiers) { - StringPiece range("hello world"); - ASSERT_EQ(11, range.size()); - - { - auto hello = range.subpiece(0, 5); - EXPECT_EQ(5, hello.size()); - EXPECT_EQ('h', *hello.data()); - EXPECT_EQ('o', *(hello.end() - 1)); - } - { - auto hello = range; - hello.subtract(6); - EXPECT_EQ(5, hello.size()); - EXPECT_EQ('h', *hello.data()); - EXPECT_EQ('o', *(hello.end() - 1)); - } - { - auto world = range; - world.advance(6); - EXPECT_EQ(5, world.size()); - EXPECT_EQ('w', *world.data()); - EXPECT_EQ('d', *(world.end() - 1)); - } - - std::string expected = "hello world"; - EXPECT_EQ(expected, std::string(range.begin(), range.end())); - EXPECT_EQ(expected, std::string(range.data(), range.size())); -} diff --git a/contrib/pzstd/utils/test/ResourcePoolTest.cpp b/contrib/pzstd/utils/test/ResourcePoolTest.cpp deleted file mode 100644 index 6fe145180be91..0000000000000 --- a/contrib/pzstd/utils/test/ResourcePoolTest.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "utils/ResourcePool.h" - -#include <gtest/gtest.h> -#include <atomic> -#include <thread> - -using namespace pzstd; - -TEST(ResourcePool, FullTest) { - unsigned numCreated = 0; - unsigned numDeleted = 0; - { - ResourcePool<int> pool( - [&numCreated] { ++numCreated; return new int{5}; }, - [&numDeleted](int *x) { ++numDeleted; delete x; }); - - { - auto i = pool.get(); - EXPECT_EQ(5, *i); - *i = 6; - } - { - auto i = pool.get(); - EXPECT_EQ(6, *i); - auto j = pool.get(); - EXPECT_EQ(5, *j); - *j = 7; - } - { - auto i = pool.get(); - EXPECT_EQ(6, *i); - auto j = pool.get(); - EXPECT_EQ(7, *j); - } - } - EXPECT_EQ(2, numCreated); - EXPECT_EQ(numCreated, numDeleted); -} - -TEST(ResourcePool, ThreadSafe) { - std::atomic<unsigned> numCreated{0}; - std::atomic<unsigned> numDeleted{0}; - { - ResourcePool<int> pool( - [&numCreated] { ++numCreated; return new int{0}; }, - [&numDeleted](int *x) { ++numDeleted; delete x; }); - auto push = [&pool] { - for (int i = 0; i < 100; ++i) { - auto x = pool.get(); - ++*x; - } - }; - std::thread t1{push}; - std::thread t2{push}; - t1.join(); - t2.join(); - - auto x = pool.get(); - auto y = pool.get(); - EXPECT_EQ(200, *x + *y); - } - EXPECT_GE(2, numCreated); - EXPECT_EQ(numCreated, numDeleted); -} diff --git a/contrib/pzstd/utils/test/ScopeGuardTest.cpp b/contrib/pzstd/utils/test/ScopeGuardTest.cpp deleted file mode 100644 index 7bc624da79b2c..0000000000000 --- a/contrib/pzstd/utils/test/ScopeGuardTest.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "utils/ScopeGuard.h" - -#include <gtest/gtest.h> - -using namespace pzstd; - -TEST(ScopeGuard, Dismiss) { - { - auto guard = makeScopeGuard([&] { EXPECT_TRUE(false); }); - guard.dismiss(); - } -} - -TEST(ScopeGuard, Executes) { - bool executed = false; - { - auto guard = makeScopeGuard([&] { executed = true; }); - } - EXPECT_TRUE(executed); -} diff --git a/contrib/pzstd/utils/test/ThreadPoolTest.cpp b/contrib/pzstd/utils/test/ThreadPoolTest.cpp deleted file mode 100644 index 703fd4c9ca174..0000000000000 --- a/contrib/pzstd/utils/test/ThreadPoolTest.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "utils/ThreadPool.h" - -#include <gtest/gtest.h> -#include <atomic> -#include <iostream> -#include <thread> -#include <vector> - -using namespace pzstd; - -TEST(ThreadPool, Ordering) { - std::vector<int> results; - - { - ThreadPool executor(1); - for (int i = 0; i < 10; ++i) { - executor.add([ &results, i ] { results.push_back(i); }); - } - } - - for (int i = 0; i < 10; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(ThreadPool, AllJobsFinished) { - std::atomic<unsigned> numFinished{0}; - std::atomic<bool> start{false}; - { - std::cerr << "Creating executor" << std::endl; - ThreadPool executor(5); - for (int i = 0; i < 10; ++i) { - executor.add([ &numFinished, &start ] { - while (!start.load()) { - std::this_thread::yield(); - } - ++numFinished; - }); - } - std::cerr << "Starting" << std::endl; - start.store(true); - std::cerr << "Finishing" << std::endl; - } - EXPECT_EQ(10, numFinished.load()); -} - -TEST(ThreadPool, AddJobWhileJoining) { - std::atomic<bool> done{false}; - { - ThreadPool executor(1); - executor.add([&executor, &done] { - while (!done.load()) { - std::this_thread::yield(); - } - // Sleep for a second to be sure that we are joining - std::this_thread::sleep_for(std::chrono::seconds(1)); - executor.add([] { - EXPECT_TRUE(false); - }); - }); - done.store(true); - } -} diff --git a/contrib/pzstd/utils/test/WorkQueueTest.cpp b/contrib/pzstd/utils/test/WorkQueueTest.cpp deleted file mode 100644 index 14cf77304f21f..0000000000000 --- a/contrib/pzstd/utils/test/WorkQueueTest.cpp +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2016-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ -#include "utils/Buffer.h" -#include "utils/WorkQueue.h" - -#include <gtest/gtest.h> -#include <iostream> -#include <memory> -#include <mutex> -#include <thread> -#include <vector> - -using namespace pzstd; - -namespace { -struct Popper { - WorkQueue<int>* queue; - int* results; - std::mutex* mutex; - - void operator()() { - int result; - while (queue->pop(result)) { - std::lock_guard<std::mutex> lock(*mutex); - results[result] = result; - } - } -}; -} - -TEST(WorkQueue, SingleThreaded) { - WorkQueue<int> queue; - int result; - - queue.push(5); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(5, result); - - queue.push(1); - queue.push(2); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(1, result); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(2, result); - - queue.push(1); - queue.push(2); - queue.finish(); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(1, result); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(2, result); - EXPECT_FALSE(queue.pop(result)); - - queue.waitUntilFinished(); -} - -TEST(WorkQueue, SPSC) { - WorkQueue<int> queue; - const int max = 100; - - for (int i = 0; i < 10; ++i) { - queue.push(int{i}); - } - - std::thread thread([ &queue, max ] { - int result; - for (int i = 0;; ++i) { - if (!queue.pop(result)) { - EXPECT_EQ(i, max); - break; - } - EXPECT_EQ(i, result); - } - }); - - std::this_thread::yield(); - for (int i = 10; i < max; ++i) { - queue.push(int{i}); - } - queue.finish(); - - thread.join(); -} - -TEST(WorkQueue, SPMC) { - WorkQueue<int> queue; - std::vector<int> results(50, -1); - std::mutex mutex; - std::vector<std::thread> threads; - for (int i = 0; i < 5; ++i) { - threads.emplace_back(Popper{&queue, results.data(), &mutex}); - } - - for (int i = 0; i < 50; ++i) { - queue.push(int{i}); - } - queue.finish(); - - for (auto& thread : threads) { - thread.join(); - } - - for (int i = 0; i < 50; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(WorkQueue, MPMC) { - WorkQueue<int> queue; - std::vector<int> results(100, -1); - std::mutex mutex; - std::vector<std::thread> popperThreads; - for (int i = 0; i < 4; ++i) { - popperThreads.emplace_back(Popper{&queue, results.data(), &mutex}); - } - - std::vector<std::thread> pusherThreads; - for (int i = 0; i < 2; ++i) { - auto min = i * 50; - auto max = (i + 1) * 50; - pusherThreads.emplace_back( - [ &queue, min, max ] { - for (int i = min; i < max; ++i) { - queue.push(int{i}); - } - }); - } - - for (auto& thread : pusherThreads) { - thread.join(); - } - queue.finish(); - - for (auto& thread : popperThreads) { - thread.join(); - } - - for (int i = 0; i < 100; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(WorkQueue, BoundedSizeWorks) { - WorkQueue<int> queue(1); - int result; - queue.push(5); - queue.pop(result); - queue.push(5); - queue.pop(result); - queue.push(5); - queue.finish(); - queue.pop(result); - EXPECT_EQ(5, result); -} - -TEST(WorkQueue, BoundedSizePushAfterFinish) { - WorkQueue<int> queue(1); - int result; - queue.push(5); - std::thread pusher([&queue] { - queue.push(6); - }); - // Dirtily try and make sure that pusher has run. - std::this_thread::sleep_for(std::chrono::seconds(1)); - queue.finish(); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(5, result); - EXPECT_FALSE(queue.pop(result)); - - pusher.join(); -} - -TEST(WorkQueue, SetMaxSize) { - WorkQueue<int> queue(2); - int result; - queue.push(5); - queue.push(6); - queue.setMaxSize(1); - std::thread pusher([&queue] { - queue.push(7); - }); - // Dirtily try and make sure that pusher has run. - std::this_thread::sleep_for(std::chrono::seconds(1)); - queue.finish(); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(5, result); - EXPECT_TRUE(queue.pop(result)); - EXPECT_EQ(6, result); - EXPECT_FALSE(queue.pop(result)); - - pusher.join(); -} - -TEST(WorkQueue, BoundedSizeMPMC) { - WorkQueue<int> queue(10); - std::vector<int> results(200, -1); - std::mutex mutex; - std::cerr << "Creating popperThreads" << std::endl; - std::vector<std::thread> popperThreads; - for (int i = 0; i < 4; ++i) { - popperThreads.emplace_back(Popper{&queue, results.data(), &mutex}); - } - - std::cerr << "Creating pusherThreads" << std::endl; - std::vector<std::thread> pusherThreads; - for (int i = 0; i < 2; ++i) { - auto min = i * 100; - auto max = (i + 1) * 100; - pusherThreads.emplace_back( - [ &queue, min, max ] { - for (int i = min; i < max; ++i) { - queue.push(int{i}); - } - }); - } - - std::cerr << "Joining pusherThreads" << std::endl; - for (auto& thread : pusherThreads) { - thread.join(); - } - std::cerr << "Finishing queue" << std::endl; - queue.finish(); - - std::cerr << "Joining popperThreads" << std::endl; - for (auto& thread : popperThreads) { - thread.join(); - } - - std::cerr << "Inspecting results" << std::endl; - for (int i = 0; i < 200; ++i) { - EXPECT_EQ(i, results[i]); - } -} - -TEST(WorkQueue, FailedPush) { - WorkQueue<std::unique_ptr<int>> queue; - std::unique_ptr<int> x(new int{5}); - EXPECT_TRUE(queue.push(std::move(x))); - EXPECT_EQ(nullptr, x); - queue.finish(); - x.reset(new int{6}); - EXPECT_FALSE(queue.push(std::move(x))); - EXPECT_NE(nullptr, x); - EXPECT_EQ(6, *x); -} - -TEST(BufferWorkQueue, SizeCalculatedCorrectly) { - { - BufferWorkQueue queue; - queue.finish(); - EXPECT_EQ(0, queue.size()); - } - { - BufferWorkQueue queue; - queue.push(Buffer(10)); - queue.finish(); - EXPECT_EQ(10, queue.size()); - } - { - BufferWorkQueue queue; - queue.push(Buffer(10)); - queue.push(Buffer(5)); - queue.finish(); - EXPECT_EQ(15, queue.size()); - } - { - BufferWorkQueue queue; - queue.push(Buffer(10)); - queue.push(Buffer(5)); - queue.finish(); - Buffer buffer; - queue.pop(buffer); - EXPECT_EQ(5, queue.size()); - } -} diff --git a/contrib/seekable_format/examples/Makefile b/contrib/seekable_format/examples/Makefile deleted file mode 100644 index 543780f75d34e..0000000000000 --- a/contrib/seekable_format/examples/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -# ################################################################ -# Copyright (c) 2017-present, Facebook, Inc. -# All rights reserved. -# -# This source code is licensed under both the BSD-style license (found in the -# LICENSE file in the root directory of this source tree) and the GPLv2 (found -# in the COPYING file in the root directory of this source tree). -# ################################################################ - -# This Makefile presumes libzstd is built, using `make` in / or /lib/ - -ZSTDLIB_PATH = ../../../lib -ZSTDLIB_NAME = libzstd.a -ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME) - -CPPFLAGS += -I../ -I../../../lib -I../../../lib/common - -CFLAGS ?= -O3 -CFLAGS += -g - -SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB) - -.PHONY: default all clean test - -default: all - -all: seekable_compression seekable_decompression seekable_decompression_mem \ - parallel_processing - -$(ZSTDLIB): - make -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME) - -seekable_compression : seekable_compression.c $(SEEKABLE_OBJS) - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -seekable_decompression : seekable_decompression.c $(SEEKABLE_OBJS) - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -seekable_decompression_mem : seekable_decompression_mem.c $(SEEKABLE_OBJS) - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -parallel_processing : parallel_processing.c $(SEEKABLE_OBJS) - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread - -parallel_compression : parallel_compression.c $(SEEKABLE_OBJS) - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread - -clean: - @rm -f core *.o tmp* result* *.zst \ - seekable_compression seekable_decompression \ - seekable_decompression_mem \ - parallel_processing parallel_compression - @echo Cleaning completed diff --git a/contrib/seekable_format/examples/parallel_compression.c b/contrib/seekable_format/examples/parallel_compression.c deleted file mode 100644 index 69644d2b3c800..0000000000000 --- a/contrib/seekable_format/examples/parallel_compression.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -#include <stdlib.h> // malloc, free, exit, atoi -#include <stdio.h> // fprintf, perror, feof, fopen, etc. -#include <string.h> // strlen, memset, strcat -#define ZSTD_STATIC_LINKING_ONLY -#include <zstd.h> // presumes zstd library is installed -#include <zstd_errors.h> -#if defined(WIN32) || defined(_WIN32) -# include <windows.h> -# define SLEEP(x) Sleep(x) -#else -# include <unistd.h> -# define SLEEP(x) usleep(x * 1000) -#endif - -#define XXH_NAMESPACE ZSTD_ -#include "xxhash.h" - -#include "pool.h" // use zstd thread pool for demo - -#include "zstd_seekable.h" - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc:"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - -static void fseek_orDie(FILE* file, long int offset, int origin) -{ - if (!fseek(file, offset, origin)) { - if (!fflush(file)) return; - } - /* error */ - perror("fseek"); - exit(7); -} - -static long int ftell_orDie(FILE* file) -{ - long int off = ftell(file); - if (off != -1) return off; - /* error */ - perror("ftell"); - exit(8); -} - -struct job { - const void* src; - size_t srcSize; - void* dst; - size_t dstSize; - - unsigned checksum; - - int compressionLevel; - int done; -}; - -static void compressFrame(void* opaque) -{ - struct job* job = opaque; - - job->checksum = XXH64(job->src, job->srcSize, 0); - - size_t ret = ZSTD_compress(job->dst, job->dstSize, job->src, job->srcSize, job->compressionLevel); - if (ZSTD_isError(ret)) { - fprintf(stderr, "ZSTD_compress() error : %s \n", ZSTD_getErrorName(ret)); - exit(20); - } - - job->dstSize = ret; - job->done = 1; -} - -static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize, int nbThreads) -{ - POOL_ctx* pool = POOL_create(nbThreads, nbThreads); - if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); } - - FILE* const fin = fopen_orDie(fname, "rb"); - FILE* const fout = fopen_orDie(outName, "wb"); - - if (ZSTD_compressBound(frameSize) > 0xFFFFFFFFU) { fprintf(stderr, "Frame size too large \n"); exit(10); } - unsigned dstSize = ZSTD_compressBound(frameSize); - - - fseek_orDie(fin, 0, SEEK_END); - long int length = ftell_orDie(fin); - fseek_orDie(fin, 0, SEEK_SET); - - size_t numFrames = (length + frameSize - 1) / frameSize; - - struct job* jobs = malloc_orDie(sizeof(struct job) * numFrames); - - size_t i; - for(i = 0; i < numFrames; i++) { - void* in = malloc_orDie(frameSize); - void* out = malloc_orDie(dstSize); - - size_t inSize = fread_orDie(in, frameSize, fin); - - jobs[i].src = in; - jobs[i].srcSize = inSize; - jobs[i].dst = out; - jobs[i].dstSize = dstSize; - jobs[i].compressionLevel = cLevel; - jobs[i].done = 0; - POOL_add(pool, compressFrame, &jobs[i]); - } - - ZSTD_frameLog* fl = ZSTD_seekable_createFrameLog(1); - if (fl == NULL) { fprintf(stderr, "ZSTD_seekable_createFrameLog() failed \n"); exit(11); } - for (i = 0; i < numFrames; i++) { - while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */ - fwrite_orDie(jobs[i].dst, jobs[i].dstSize, fout); - free((void*)jobs[i].src); - free(jobs[i].dst); - - size_t ret = ZSTD_seekable_logFrame(fl, jobs[i].dstSize, jobs[i].srcSize, jobs[i].checksum); - if (ZSTD_isError(ret)) { fprintf(stderr, "ZSTD_seekable_logFrame() error : %s \n", ZSTD_getErrorName(ret)); } - } - - { unsigned char seekTableBuff[1024]; - ZSTD_outBuffer out = {seekTableBuff, 1024, 0}; - while (ZSTD_seekable_writeSeekTable(fl, &out) != 0) { - fwrite_orDie(seekTableBuff, out.pos, fout); - out.pos = 0; - } - fwrite_orDie(seekTableBuff, out.pos, fout); - } - - ZSTD_seekable_freeFrameLog(fl); - free(jobs); - fclose_orDie(fout); - fclose_orDie(fin); -} - -static const char* createOutFilename_orDie(const char* filename) -{ - size_t const inL = strlen(filename); - size_t const outL = inL + 5; - void* outSpace = malloc_orDie(outL); - memset(outSpace, 0, outL); - strcat(outSpace, filename); - strcat(outSpace, ".zst"); - return (const char*)outSpace; -} - -int main(int argc, const char** argv) { - const char* const exeName = argv[0]; - if (argc!=4) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE FRAME_SIZE NB_THREADS\n", exeName); - return 1; - } - - { const char* const inFileName = argv[1]; - unsigned const frameSize = (unsigned)atoi(argv[2]); - int const nbThreads = atoi(argv[3]); - - const char* const outFileName = createOutFilename_orDie(inFileName); - compressFile_orDie(inFileName, outFileName, 5, frameSize, nbThreads); - } - - return 0; -} diff --git a/contrib/seekable_format/examples/parallel_processing.c b/contrib/seekable_format/examples/parallel_processing.c deleted file mode 100644 index 36226b49fd3c1..0000000000000 --- a/contrib/seekable_format/examples/parallel_processing.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -/* - * A simple demo that sums up all the bytes in the file in parallel using - * seekable decompression and the zstd thread pool - */ - -#include <stdlib.h> // malloc, exit -#include <stdio.h> // fprintf, perror, feof -#include <string.h> // strerror -#include <errno.h> // errno -#define ZSTD_STATIC_LINKING_ONLY -#include <zstd.h> // presumes zstd library is installed -#include <zstd_errors.h> -#if defined(WIN32) || defined(_WIN32) -# include <windows.h> -# define SLEEP(x) Sleep(x) -#else -# include <unistd.h> -# define SLEEP(x) usleep(x * 1000) -#endif - -#include "pool.h" // use zstd thread pool for demo - -#include "zstd_seekable.h" - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc"); - exit(1); -} - -static void* realloc_orDie(void* ptr, size_t size) -{ - ptr = realloc(ptr, size); - if (ptr) return ptr; - /* error */ - perror("realloc"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - -static void fseek_orDie(FILE* file, long int offset, int origin) { - if (!fseek(file, offset, origin)) { - if (!fflush(file)) return; - } - /* error */ - perror("fseek"); - exit(7); -} - -struct sum_job { - const char* fname; - unsigned long long sum; - unsigned frameNb; - int done; -}; - -static void sumFrame(void* opaque) -{ - struct sum_job* job = (struct sum_job*)opaque; - job->done = 0; - - FILE* const fin = fopen_orDie(job->fname, "rb"); - - ZSTD_seekable* const seekable = ZSTD_seekable_create(); - if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); } - - size_t const initResult = ZSTD_seekable_initFile(seekable, fin); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - - size_t const frameSize = ZSTD_seekable_getFrameDecompressedSize(seekable, job->frameNb); - unsigned char* data = malloc_orDie(frameSize); - - size_t result = ZSTD_seekable_decompressFrame(seekable, data, frameSize, job->frameNb); - if (ZSTD_isError(result)) { fprintf(stderr, "ZSTD_seekable_decompressFrame() error : %s \n", ZSTD_getErrorName(result)); exit(12); } - - unsigned long long sum = 0; - size_t i; - for (i = 0; i < frameSize; i++) { - sum += data[i]; - } - job->sum = sum; - job->done = 1; - - fclose(fin); - ZSTD_seekable_free(seekable); - free(data); -} - -static void sumFile_orDie(const char* fname, int nbThreads) -{ - POOL_ctx* pool = POOL_create(nbThreads, nbThreads); - if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); } - - FILE* const fin = fopen_orDie(fname, "rb"); - - ZSTD_seekable* const seekable = ZSTD_seekable_create(); - if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); } - - size_t const initResult = ZSTD_seekable_initFile(seekable, fin); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - - unsigned const numFrames = ZSTD_seekable_getNumFrames(seekable); - struct sum_job* jobs = (struct sum_job*)malloc(numFrames * sizeof(struct sum_job)); - - unsigned fnb; - for (fnb = 0; fnb < numFrames; fnb++) { - jobs[fnb] = (struct sum_job){ fname, 0, fnb, 0 }; - POOL_add(pool, sumFrame, &jobs[fnb]); - } - - unsigned long long total = 0; - - for (fnb = 0; fnb < numFrames; fnb++) { - while (!jobs[fnb].done) SLEEP(5); /* wake up every 5 milliseconds to check */ - total += jobs[fnb].sum; - } - - printf("Sum: %llu\n", total); - - POOL_free(pool); - ZSTD_seekable_free(seekable); - fclose(fin); - free(jobs); -} - - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc!=3) { - fprintf(stderr, "wrong arguments\n"); - fprintf(stderr, "usage:\n"); - fprintf(stderr, "%s FILE NB_THREADS\n", exeName); - return 1; - } - - { - const char* const inFilename = argv[1]; - int const nbThreads = atoi(argv[2]); - sumFile_orDie(inFilename, nbThreads); - } - - return 0; -} diff --git a/contrib/seekable_format/examples/seekable_compression.c b/contrib/seekable_format/examples/seekable_compression.c deleted file mode 100644 index 9a331a89531e5..0000000000000 --- a/contrib/seekable_format/examples/seekable_compression.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -#include <stdlib.h> // malloc, free, exit, atoi -#include <stdio.h> // fprintf, perror, feof, fopen, etc. -#include <string.h> // strlen, memset, strcat -#define ZSTD_STATIC_LINKING_ONLY -#include <zstd.h> // presumes zstd library is installed - -#include "zstd_seekable.h" - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc:"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - -static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize) -{ - FILE* const fin = fopen_orDie(fname, "rb"); - FILE* const fout = fopen_orDie(outName, "wb"); - size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */ - void* const buffIn = malloc_orDie(buffInSize); - size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */ - void* const buffOut = malloc_orDie(buffOutSize); - - ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream(); - if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); } - size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - - size_t read, toRead = buffInSize; - while( (read = fread_orDie(buffIn, toRead, fin)) ) { - ZSTD_inBuffer input = { buffIn, read, 0 }; - while (input.pos < input.size) { - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - toRead = ZSTD_seekable_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */ - if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_seekable_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); } - if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/ - fwrite_orDie(buffOut, output.pos, fout); - } - } - - while (1) { - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - size_t const remainingToFlush = ZSTD_seekable_endStream(cstream, &output); /* close stream */ - if (ZSTD_isError(remainingToFlush)) { fprintf(stderr, "ZSTD_seekable_endStream() error : %s \n", ZSTD_getErrorName(remainingToFlush)); exit(13); } - fwrite_orDie(buffOut, output.pos, fout); - if (!remainingToFlush) break; - } - - ZSTD_seekable_freeCStream(cstream); - fclose_orDie(fout); - fclose_orDie(fin); - free(buffIn); - free(buffOut); -} - -static char* createOutFilename_orDie(const char* filename) -{ - size_t const inL = strlen(filename); - size_t const outL = inL + 5; - void* outSpace = malloc_orDie(outL); - memset(outSpace, 0, outL); - strcat(outSpace, filename); - strcat(outSpace, ".zst"); - return (char*)outSpace; -} - -int main(int argc, const char** argv) { - const char* const exeName = argv[0]; - if (argc!=3) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE FRAME_SIZE\n", exeName); - return 1; - } - - { const char* const inFileName = argv[1]; - unsigned const frameSize = (unsigned)atoi(argv[2]); - - char* const outFileName = createOutFilename_orDie(inFileName); - compressFile_orDie(inFileName, outFileName, 5, frameSize); - free(outFileName); - } - - return 0; -} diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c deleted file mode 100644 index 7050e0fa5c641..0000000000000 --- a/contrib/seekable_format/examples/seekable_decompression.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - - -#include <stdlib.h> // malloc, exit -#include <stdio.h> // fprintf, perror, feof -#include <string.h> // strerror -#include <errno.h> // errno -#define ZSTD_STATIC_LINKING_ONLY -#include <zstd.h> // presumes zstd library is installed -#include <zstd_errors.h> - -#include "zstd_seekable.h" - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc"); - exit(1); -} - -static void* realloc_orDie(void* ptr, size_t size) -{ - ptr = realloc(ptr, size); - if (ptr) return ptr; - /* error */ - perror("realloc"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - -static void fseek_orDie(FILE* file, long int offset, int origin) { - if (!fseek(file, offset, origin)) { - if (!fflush(file)) return; - } - /* error */ - perror("fseek"); - exit(7); -} - - -static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset) -{ - FILE* const fin = fopen_orDie(fname, "rb"); - FILE* const fout = stdout; - size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ - void* const buffOut = malloc_orDie(buffOutSize); - - ZSTD_seekable* const seekable = ZSTD_seekable_create(); - if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); } - - size_t const initResult = ZSTD_seekable_initFile(seekable, fin); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - - while (startOffset < endOffset) { - size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset); - - if (ZSTD_isError(result)) { - fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", - ZSTD_getErrorName(result)); - exit(12); - } - fwrite_orDie(buffOut, result, fout); - startOffset += result; - } - - ZSTD_seekable_free(seekable); - fclose_orDie(fin); - fclose_orDie(fout); - free(buffOut); -} - - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc!=4) { - fprintf(stderr, "wrong arguments\n"); - fprintf(stderr, "usage:\n"); - fprintf(stderr, "%s FILE START END\n", exeName); - return 1; - } - - { - const char* const inFilename = argv[1]; - off_t const startOffset = atoll(argv[2]); - off_t const endOffset = atoll(argv[3]); - decompressFile_orDie(inFilename, startOffset, endOffset); - } - - return 0; -} diff --git a/contrib/seekable_format/examples/seekable_decompression_mem.c b/contrib/seekable_format/examples/seekable_decompression_mem.c deleted file mode 100644 index c36d2221f97e7..0000000000000 --- a/contrib/seekable_format/examples/seekable_decompression_mem.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - - -#include <stdlib.h> // malloc, exit -#include <stdio.h> // fprintf, perror, feof -#include <string.h> // strerror -#include <errno.h> // errno -#define ZSTD_STATIC_LINKING_ONLY -#include <zstd.h> // presumes zstd library is installed -#include <zstd_errors.h> - -#include "zstd_seekable.h" - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -#define MAX_FILE_SIZE (8 * 1024 * 1024) - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc"); - exit(1); -} - -static void* realloc_orDie(void* ptr, size_t size) -{ - ptr = realloc(ptr, size); - if (ptr) return ptr; - /* error */ - perror("realloc"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - -static void fseek_orDie(FILE* file, long int offset, int origin) { - if (!fseek(file, offset, origin)) { - if (!fflush(file)) return; - } - /* error */ - perror("fseek"); - exit(7); -} - - -static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset) -{ - FILE* const fin = fopen_orDie(fname, "rb"); - FILE* const fout = stdout; - // Just for demo purposes, assume file is <= MAX_FILE_SIZE - void* const buffIn = malloc_orDie(MAX_FILE_SIZE); - size_t const inSize = fread_orDie(buffIn, MAX_FILE_SIZE, fin); - size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ - void* const buffOut = malloc_orDie(buffOutSize); - - ZSTD_seekable* const seekable = ZSTD_seekable_create(); - if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); } - - size_t const initResult = ZSTD_seekable_initBuff(seekable, buffIn, inSize); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - - while (startOffset < endOffset) { - size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset); - - if (ZSTD_isError(result)) { - fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", - ZSTD_getErrorName(result)); - exit(12); - } - fwrite_orDie(buffOut, result, fout); - startOffset += result; - } - - ZSTD_seekable_free(seekable); - fclose_orDie(fin); - fclose_orDie(fout); - free(buffIn); - free(buffOut); -} - - -int main(int argc, const char** argv) -{ - const char* const exeName = argv[0]; - - if (argc!=4) { - fprintf(stderr, "wrong arguments\n"); - fprintf(stderr, "usage:\n"); - fprintf(stderr, "%s FILE START END\n", exeName); - return 1; - } - - { - const char* const inFilename = argv[1]; - off_t const startOffset = atoll(argv[2]); - off_t const endOffset = atoll(argv[3]); - decompressFile_orDie(inFilename, startOffset, endOffset); - } - - return 0; -} diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h deleted file mode 100644 index 7ffd1ba0a72b5..0000000000000 --- a/contrib/seekable_format/zstd_seekable.h +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef SEEKABLE_H -#define SEEKABLE_H - -#if defined (__cplusplus) -extern "C" { -#endif - -#include <stdio.h> -#include "zstd.h" /* ZSTDLIB_API */ - - -#define ZSTD_seekTableFooterSize 9 - -#define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1 - -#define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U - -/* Limit the maximum size to avoid any potential issues storing the compressed size */ -#define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U - -/*-**************************************************************************** -* Seekable Format -* -* The seekable format splits the compressed data into a series of "frames", -* each compressed individually so that decompression of a section in the -* middle of an archive only requires zstd to decompress at most a frame's -* worth of extra data, instead of the entire archive. -******************************************************************************/ - -typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream; -typedef struct ZSTD_seekable_s ZSTD_seekable; - -/*-**************************************************************************** -* Seekable compression - HowTo -* A ZSTD_seekable_CStream object is required to tracking streaming operation. -* Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/ -* release resources. -* -* Streaming objects are reusable to avoid allocation and deallocation, -* to start a new compression operation call ZSTD_seekable_initCStream() on the -* compressor. -* -* Data streamed to the seekable compressor will automatically be split into -* frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()), -* or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is -* called or when the default maximum frame size (2GB) is reached. -* -* Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object -* for a new compression operation. -* `maxFrameSize` indicates the size at which to automatically start a new -* seekable frame. `maxFrameSize == 0` implies the default maximum size. -* `checksumFlag` indicates whether or not the seek table should include frame -* checksums on the uncompressed data for verification. -* @return : a size hint for input to provide for compression, or an error code -* checkable with ZSTD_isError() -* -* Use ZSTD_seekable_compressStream() repetitively to consume input stream. -* The function will automatically update both `pos` fields. -* Note that it may not consume the entire input, in which case `pos < size`, -* and it's up to the caller to present again remaining data. -* @return : a size hint, preferred nb of bytes to use as input for next -* function call or an error code, which can be tested using -* ZSTD_isError(). -* Note 1 : it's just a hint, to help latency a little, any other -* value will work fine. -* -* At any time, call ZSTD_seekable_endFrame() to end the current frame and -* start a new one. -* -* ZSTD_seekable_endStream() will end the current frame, and then write the seek -* table so that decompressors can efficiently find compressed frames. -* ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush -* all the necessary data to `output`. In this case, it should be called again -* until all remaining data is flushed out and 0 is returned. -******************************************************************************/ - -/*===== Seekable compressor management =====*/ -ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void); -ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs); - -/*===== Seekable compression functions =====*/ -ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize); -ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); -ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); - -/*= Raw seek table API - * These functions allow for the seek table to be constructed directly. - * This table can then be appended to a file of concatenated frames. - * This allows the frames to be compressed independently, even in parallel, - * and compiled together afterward into a seekable archive. - * - * Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking - * structure. - * - * Call ZSTD_seekable_logFrame() once for each frame in the archive. - * checksum is optional, and will not be used if checksumFlag was 0 when the - * frame log was created. If present, it should be the least significant 32 - * bits of the XXH64 hash of the uncompressed data. - * - * Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table. - * If the entire table was written, the return value will be 0. Otherwise, - * it will be equal to the number of bytes left to write. */ -typedef struct ZSTD_frameLog_s ZSTD_frameLog; -ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag); -ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl); -ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum); -ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output); - -/*-**************************************************************************** -* Seekable decompression - HowTo -* A ZSTD_seekable object is required to tracking the seekTable. -* -* Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the -* the seek table provided in the input. -* There are three modes for ZSTD_seekable_init: -* - ZSTD_seekable_initBuff() : An in-memory API. The data contained in -* `src` should be the entire seekable file, including the seek table. -* `src` should be kept alive and unmodified until the ZSTD_seekable object -* is freed or reset. -* - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and -* fseek will be used to access the required data for building the seek -* table and doing decompression operations. `src` should not be closed -* or modified until the ZSTD_seekable object is freed or reset. -* - ZSTD_seekable_initAdvanced() : A general API allowing the client to -* provide its own read and seek callbacks. -* + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`. -* Premature EOF should be treated as an error. -* + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`, -* where origin is either SEEK_SET (beginning of -* file), or SEEK_END (end of file). -* Both functions should return a non-negative value in case of success, and a -* negative value in case of failure. If implementing using this API and -* stdio, be careful with files larger than 4GB and fseek. All of these -* functions return an error code checkable with ZSTD_isError(). -* -* Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed -* offset `offset`. ZSTD_seekable_decompress may have to decompress the entire -* prefix of the frame before the desired data if it has not already processed -* this section. If ZSTD_seekable_decompress is called multiple times for a -* consecutive range of data, it will efficiently retain the decompressor object -* and avoid redecompressing frame prefixes. The return value is the number of -* bytes decompressed, or an error code checkable with ZSTD_isError(). -* -* The seek table access functions can be used to obtain the data contained -* in the seek table. If frameIndex is larger than the value returned by -* ZSTD_seekable_getNumFrames(), they will return error codes checkable with -* ZSTD_isError(). Note that since the offset access functions return -* unsigned long long instead of size_t, in this case they will instead return -* the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE. -******************************************************************************/ - -/*===== Seekable decompressor management =====*/ -ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); -ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); - -/*===== Seekable decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); -ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset); -ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex); - -#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) -/*===== Seek Table access functions =====*/ -ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs); -ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); -ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset); - -/*===== Seekable advanced I/O API =====*/ -typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); -typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin); -typedef struct { - void* opaque; - ZSTD_seekable_read* read; - ZSTD_seekable_seek* seek; -} ZSTD_seekable_customFile; -ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src); - -#if defined (__cplusplus) -} -#endif - -#endif diff --git a/contrib/seekable_format/zstd_seekable_compression_format.md b/contrib/seekable_format/zstd_seekable_compression_format.md deleted file mode 100644 index bf3080f7bbed2..0000000000000 --- a/contrib/seekable_format/zstd_seekable_compression_format.md +++ /dev/null @@ -1,116 +0,0 @@ -# Zstandard Seekable Format - -### Notices - -Copyright (c) 2017-present Facebook, Inc. - -Permission is granted to copy and distribute this document -for any purpose and without charge, -including translations into other languages -and incorporation into compilations, -provided that the copyright notice and this notice are preserved, -and that any substantive changes or deletions from the original -are clearly marked. -Distribution of this document is unlimited. - -### Version -0.1.0 (11/04/17) - -## Introduction -This document defines a format for compressed data to be stored so that subranges of the data can be efficiently decompressed without requiring the entire document to be decompressed. -This is done by splitting up the input data into frames, -each of which are compressed independently, -and so can be decompressed independently. -Decompression then takes advantage of a provided 'seek table', which allows the decompressor to immediately jump to the desired data. This is done in a way that is compatible with the original Zstandard format by placing the seek table in a Zstandard skippable frame. - -### Overall conventions -In this document: -- square brackets i.e. `[` and `]` are used to indicate optional fields or parameters. -- the naming convention for identifiers is `Mixed_Case_With_Underscores` -- All numeric fields are little-endian unless specified otherwise - -## Format - -The format consists of a number of frames (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table. - -### Seek Table Format -The structure of the seek table frame is as follows: - -|`Skippable_Magic_Number`|`Frame_Size`|`[Seek_Table_Entries]`|`Seek_Table_Footer`| -|------------------------|------------|----------------------|-------------------| -| 4 bytes | 4 bytes | 8-12 bytes each | 9 bytes | - -__`Skippable_Magic_Number`__ - -Value : 0x184D2A5E. -This is for compatibility with [Zstandard skippable frames]. -Since it is legal for other Zstandard skippable frames to use the same -magic number, it is not recommended for a decoder to recognize frames -solely on this. - -__`Frame_Size`__ - -The total size of the skippable frame, not including the `Skippable_Magic_Number` or `Frame_Size`. -This is for compatibility with [Zstandard skippable frames]. - -[Zstandard skippable frames]: https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#skippable-frames - -#### `Seek_Table_Footer` -The seek table footer format is as follows: - -|`Number_Of_Frames`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`| -|------------------|-----------------------|-----------------------| -| 4 bytes | 1 byte | 4 bytes | - -__`Seekable_Magic_Number`__ - -Value : 0x8F92EAB1. -This value must be the last bytes present in the compressed file so that decoders -can efficiently find it and determine if there is an actual seek table present. - -__`Number_Of_Frames`__ - -The number of stored frames in the data. - -__`Seek_Table_Descriptor`__ - -A bitfield describing the format of the seek table. - -| Bit number | Field name | -| ---------- | ---------- | -| 7 | `Checksum_Flag` | -| 6-2 | `Reserved_Bits` | -| 1-0 | `Unused_Bits` | - -While only `Checksum_Flag` currently exists, there are 7 other bits in this field that can be used for future changes to the format, -for example the addition of inline dictionaries. - -__`Checksum_Flag`__ - -If the checksum flag is set, each of the seek table entries contains a 4 byte checksum of the uncompressed data contained in its frame. - -`Reserved_Bits` are not currently used but may be used in the future for breaking changes, so a compliant decoder should ensure they are set to 0. `Unused_Bits` may be used in the future for non-breaking changes, so a compliant decoder should not interpret these bits. - -#### __`Seek_Table_Entries`__ - -`Seek_Table_Entries` consists of `Number_Of_Frames` (one for each frame in the data, not including the seek table frame) entries of the following form, in sequence: - -|`Compressed_Size`|`Decompressed_Size`|`[Checksum]`| -|-----------------|-------------------|------------| -| 4 bytes | 4 bytes | 4 bytes | - -__`Compressed_Size`__ - -The compressed size of the frame. -The cumulative sum of the `Compressed_Size` fields of frames `0` to `i` gives the offset in the compressed file of frame `i+1`. - -__`Decompressed_Size`__ - -The size of the decompressed data contained in the frame. For skippable or otherwise empty frames, this value is 0. - -__`Checksum`__ - -Only present if `Checksum_Flag` is set in the `Seek_Table_Descriptor`. Value : the least significant 32 bits of the XXH64 digest of the uncompressed data, stored in little-endian format. - -## Version Changes -- 0.1.0: initial version diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c deleted file mode 100644 index 5a75714fac5b0..0000000000000 --- a/contrib/seekable_format/zstdseek_compress.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - */ - -#include <stdlib.h> /* malloc, free */ -#include <limits.h> /* UINT_MAX */ -#include <assert.h> - -#define XXH_STATIC_LINKING_ONLY -#define XXH_NAMESPACE ZSTD_ -#include "xxhash.h" - -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -#include "zstd_errors.h" -#include "mem.h" -#include "zstd_seekable.h" - -#define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; } - -#undef ERROR -#define ERROR(name) ((size_t)-ZSTD_error_##name) - -#undef MIN -#undef MAX -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -typedef struct { - U32 cSize; - U32 dSize; - U32 checksum; -} framelogEntry_t; - -struct ZSTD_frameLog_s { - framelogEntry_t* entries; - U32 size; - U32 capacity; - - int checksumFlag; - - /* for use when streaming out the seek table */ - U32 seekTablePos; - U32 seekTableIndex; -} framelog_t; - -struct ZSTD_seekable_CStream_s { - ZSTD_CStream* cstream; - ZSTD_frameLog framelog; - - U32 frameCSize; - U32 frameDSize; - - XXH64_state_t xxhState; - - U32 maxFrameSize; - - int writingSeekTable; -}; - -size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl) -{ - /* allocate some initial space */ - size_t const FRAMELOG_STARTING_CAPACITY = 16; - fl->entries = (framelogEntry_t*)malloc( - sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY); - if (fl->entries == NULL) return ERROR(memory_allocation); - fl->capacity = FRAMELOG_STARTING_CAPACITY; - - return 0; -} - -size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl) -{ - if (fl != NULL) free(fl->entries); - return 0; -} - -ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag) -{ - ZSTD_frameLog* fl = malloc(sizeof(ZSTD_frameLog)); - if (fl == NULL) return NULL; - - if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) { - free(fl); - return NULL; - } - - fl->checksumFlag = checksumFlag; - fl->seekTablePos = 0; - fl->seekTableIndex = 0; - fl->size = 0; - - return fl; -} - -size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl) -{ - ZSTD_seekable_frameLog_freeVec(fl); - free(fl); - return 0; -} - -ZSTD_seekable_CStream* ZSTD_seekable_createCStream() -{ - ZSTD_seekable_CStream* zcs = malloc(sizeof(ZSTD_seekable_CStream)); - - if (zcs == NULL) return NULL; - - memset(zcs, 0, sizeof(*zcs)); - - zcs->cstream = ZSTD_createCStream(); - if (zcs->cstream == NULL) goto failed1; - - if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(&zcs->framelog))) goto failed2; - - return zcs; - -failed2: - ZSTD_freeCStream(zcs->cstream); -failed1: - free(zcs); - return NULL; -} - -size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs) -{ - if (zcs == NULL) return 0; /* support free on null */ - ZSTD_freeCStream(zcs->cstream); - ZSTD_seekable_frameLog_freeVec(&zcs->framelog); - free(zcs); - - return 0; -} - -size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, - int compressionLevel, - int checksumFlag, - unsigned maxFrameSize) -{ - zcs->framelog.size = 0; - zcs->frameCSize = 0; - zcs->frameDSize = 0; - - /* make sure maxFrameSize has a reasonable value */ - if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) { - return ERROR(frameParameter_unsupported); - } - - zcs->maxFrameSize = maxFrameSize - ? maxFrameSize - : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE; - - zcs->framelog.checksumFlag = checksumFlag; - if (zcs->framelog.checksumFlag) { - XXH64_reset(&zcs->xxhState, 0); - } - - zcs->framelog.seekTablePos = 0; - zcs->framelog.seekTableIndex = 0; - zcs->writingSeekTable = 0; - - return ZSTD_initCStream(zcs->cstream, compressionLevel); -} - -size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, - unsigned compressedSize, - unsigned decompressedSize, - unsigned checksum) -{ - if (fl->size == ZSTD_SEEKABLE_MAXFRAMES) - return ERROR(frameIndex_tooLarge); - - /* grow the buffer if required */ - if (fl->size == fl->capacity) { - /* exponential size increase for constant amortized runtime */ - size_t const newCapacity = fl->capacity * 2; - framelogEntry_t* const newEntries = realloc(fl->entries, - sizeof(framelogEntry_t) * newCapacity); - - if (newEntries == NULL) return ERROR(memory_allocation); - - fl->entries = newEntries; - assert(newCapacity <= UINT_MAX); - fl->capacity = (U32)newCapacity; - } - - fl->entries[fl->size] = (framelogEntry_t){ - compressedSize, decompressedSize, checksum - }; - fl->size++; - - return 0; -} - -size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) -{ - size_t const prevOutPos = output->pos; - /* end the frame */ - size_t ret = ZSTD_endStream(zcs->cstream, output); - - zcs->frameCSize += output->pos - prevOutPos; - - /* need to flush before doing the rest */ - if (ret) return ret; - - /* frame done */ - - /* store the frame data for later */ - ret = ZSTD_seekable_logFrame( - &zcs->framelog, zcs->frameCSize, zcs->frameDSize, - zcs->framelog.checksumFlag - ? XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU - : 0); - if (ret) return ret; - - /* reset for the next frame */ - zcs->frameCSize = 0; - zcs->frameDSize = 0; - - ZSTD_resetCStream(zcs->cstream, 0); - if (zcs->framelog.checksumFlag) - XXH64_reset(&zcs->xxhState, 0); - - return 0; -} - -size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - const BYTE* const inBase = (const BYTE*) input->src + input->pos; - size_t inLen = input->size - input->pos; - - inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize)); - - /* if we haven't finished flushing the last frame, don't start writing a new one */ - if (inLen > 0) { - ZSTD_inBuffer inTmp = { inBase, inLen, 0 }; - size_t const prevOutPos = output->pos; - - size_t const ret = ZSTD_compressStream(zcs->cstream, output, &inTmp); - - if (zcs->framelog.checksumFlag) { - XXH64_update(&zcs->xxhState, inBase, inTmp.pos); - } - - zcs->frameCSize += output->pos - prevOutPos; - zcs->frameDSize += inTmp.pos; - - input->pos += inTmp.pos; - - if (ZSTD_isError(ret)) return ret; - } - - if (zcs->maxFrameSize == zcs->frameDSize) { - /* log the frame and start over */ - size_t const ret = ZSTD_seekable_endFrame(zcs, output); - if (ZSTD_isError(ret)) return ret; - - /* get the client ready for the next frame */ - return (size_t)zcs->maxFrameSize; - } - - return (size_t)(zcs->maxFrameSize - zcs->frameDSize); -} - -static inline size_t ZSTD_seekable_seekTableSize(const ZSTD_frameLog* fl) -{ - size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0); - size_t const seekTableLen = ZSTD_SKIPPABLEHEADERSIZE + - sizePerFrame * fl->size + - ZSTD_seekTableFooterSize; - - return seekTableLen; -} - -static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl, - ZSTD_outBuffer* output, U32 const value, - U32 const offset) -{ - if (fl->seekTablePos < offset + 4) { - BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */ - size_t const lenWrite = - MIN(output->size - output->pos, offset + 4 - fl->seekTablePos); - MEM_writeLE32(tmp, value); - memcpy((BYTE*)output->dst + output->pos, - tmp + (fl->seekTablePos - offset), lenWrite); - output->pos += lenWrite; - fl->seekTablePos += lenWrite; - - if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos; - } - return 0; -} - -size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output) -{ - /* seekTableIndex: the current index in the table and - * seekTableSize: the amount of the table written so far - * - * This function is written this way so that if it has to return early - * because of a small buffer, it can keep going where it left off. - */ - - size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0); - size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl); - - CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0)); - assert(seekTableLen <= (size_t)UINT_MAX); - CHECK_Z(ZSTD_stwrite32(fl, output, (U32)seekTableLen - ZSTD_SKIPPABLEHEADERSIZE, 4)); - - while (fl->seekTableIndex < fl->size) { - unsigned long long const start = ZSTD_SKIPPABLEHEADERSIZE + sizePerFrame * fl->seekTableIndex; - assert(start + 8 <= UINT_MAX); - CHECK_Z(ZSTD_stwrite32(fl, output, - fl->entries[fl->seekTableIndex].cSize, - (U32)start + 0)); - - CHECK_Z(ZSTD_stwrite32(fl, output, - fl->entries[fl->seekTableIndex].dSize, - (U32)start + 4)); - - if (fl->checksumFlag) { - CHECK_Z(ZSTD_stwrite32( - fl, output, fl->entries[fl->seekTableIndex].checksum, - (U32)start + 8)); - } - - fl->seekTableIndex++; - } - - assert(seekTableLen <= UINT_MAX); - CHECK_Z(ZSTD_stwrite32(fl, output, fl->size, - (U32)seekTableLen - ZSTD_seekTableFooterSize)); - - if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos; - if (fl->seekTablePos < seekTableLen - 4) { - BYTE sfd = 0; - sfd |= (fl->checksumFlag) << 7; - - ((BYTE*)output->dst)[output->pos] = sfd; - output->pos++; - fl->seekTablePos++; - } - - CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER, - (U32)seekTableLen - 4)); - - if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC); - return 0; -} - -size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) -{ - if (!zcs->writingSeekTable && zcs->frameDSize) { - const size_t endFrame = ZSTD_seekable_endFrame(zcs, output); - if (ZSTD_isError(endFrame)) return endFrame; - /* return an accurate size hint */ - if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(&zcs->framelog); - } - - zcs->writingSeekTable = 1; - - return ZSTD_seekable_writeSeekTable(&zcs->framelog, output); -} diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c deleted file mode 100644 index abfd1e9027175..0000000000000 --- a/contrib/seekable_format/zstdseek_decompress.c +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* ********************************************************* -* Turn on Large Files support (>4GB) for 32-bit Linux/Unix -***********************************************************/ -#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ -# if !defined(_FILE_OFFSET_BITS) -# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ -# endif -# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ -# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ -# endif -# if defined(_AIX) || defined(__hpux) -# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ -# endif -#endif - -/* ************************************************************ -* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW -***************************************************************/ -#if defined(_MSC_VER) && _MSC_VER >= 1400 -# define LONG_SEEK _fseeki64 -#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ -# define LONG_SEEK fseeko -#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) -# define LONG_SEEK fseeko64 -#elif defined(_WIN32) && !defined(__DJGPP__) -# include <windows.h> - static int LONG_SEEK(FILE* file, __int64 offset, int origin) { - LARGE_INTEGER off; - DWORD method; - off.QuadPart = offset; - if (origin == SEEK_END) - method = FILE_END; - else if (origin == SEEK_CUR) - method = FILE_CURRENT; - else - method = FILE_BEGIN; - - if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) - return 0; - else - return -1; - } -#else -# define LONG_SEEK fseek -#endif - -#include <stdlib.h> /* malloc, free */ -#include <stdio.h> /* FILE* */ -#include <limits.h> /* UNIT_MAX */ -#include <assert.h> - -#define XXH_STATIC_LINKING_ONLY -#define XXH_NAMESPACE ZSTD_ -#include "xxhash.h" - -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -#include "zstd_errors.h" -#include "mem.h" -#include "zstd_seekable.h" - -#undef ERROR -#define ERROR(name) ((size_t)-ZSTD_error_##name) - -#define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); } - -#undef MIN -#undef MAX -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -/* Special-case callbacks for FILE* and in-memory modes, so that we can treat - * them the same way as the advanced API */ -static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n) -{ - size_t const result = fread(buffer, 1, n, (FILE*)opaque); - if (result != n) { - return -1; - } - return 0; -} - -static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin) -{ - int const ret = LONG_SEEK((FILE*)opaque, offset, origin); - if (ret) return ret; - return fflush((FILE*)opaque); -} - -typedef struct { - const void *ptr; - size_t size; - size_t pos; -} buffWrapper_t; - -static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n) -{ - buffWrapper_t* buff = (buffWrapper_t*) opaque; - if (buff->pos + n > buff->size) return -1; - memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n); - buff->pos += n; - return 0; -} - -static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) -{ - buffWrapper_t* const buff = (buffWrapper_t*) opaque; - unsigned long long newOffset; - switch (origin) { - case SEEK_SET: - newOffset = offset; - break; - case SEEK_CUR: - newOffset = (unsigned long long)buff->pos + offset; - break; - case SEEK_END: - newOffset = (unsigned long long)buff->size + offset; - break; - default: - assert(0); /* not possible */ - } - if (newOffset > buff->size) { - return -1; - } - buff->pos = newOffset; - return 0; -} - -typedef struct { - U64 cOffset; - U64 dOffset; - U32 checksum; -} seekEntry_t; - -typedef struct { - seekEntry_t* entries; - size_t tableLen; - - int checksumFlag; -} seekTable_t; - -#define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX - -struct ZSTD_seekable_s { - ZSTD_DStream* dstream; - seekTable_t seekTable; - ZSTD_seekable_customFile src; - - U64 decompressedOffset; - U32 curFrame; - - BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */ - BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the - starts of chunks before we get to the - desired section */ - ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */ - buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */ - - XXH64_state_t xxhState; -}; - -ZSTD_seekable* ZSTD_seekable_create(void) -{ - ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable)); - - if (zs == NULL) return NULL; - - /* also initializes stage to zsds_init */ - memset(zs, 0, sizeof(*zs)); - - zs->dstream = ZSTD_createDStream(); - if (zs->dstream == NULL) { - free(zs); - return NULL; - } - - return zs; -} - -size_t ZSTD_seekable_free(ZSTD_seekable* zs) -{ - if (zs == NULL) return 0; /* support free on null */ - ZSTD_freeDStream(zs->dstream); - free(zs->seekTable.entries); - free(zs); - - return 0; -} - -/** ZSTD_seekable_offsetToFrameIndex() : - * Performs a binary search to find the last frame with a decompressed offset - * <= pos - * @return : the frame's index */ -unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos) -{ - U32 lo = 0; - U32 hi = (U32)zs->seekTable.tableLen; - assert(zs->seekTable.tableLen <= UINT_MAX); - - if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) { - return (U32)zs->seekTable.tableLen; - } - - while (lo + 1 < hi) { - U32 const mid = lo + ((hi - lo) >> 1); - if (zs->seekTable.entries[mid].dOffset <= pos) { - lo = mid; - } else { - hi = mid; - } - } - return lo; -} - -unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs) -{ - assert(zs->seekTable.tableLen <= UINT_MAX); - return (unsigned)zs->seekTable.tableLen; -} - -unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) -{ - if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; - return zs->seekTable.entries[frameIndex].cOffset; -} - -unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) -{ - if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; - return zs->seekTable.entries[frameIndex].dOffset; -} - -size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) -{ - if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); - return zs->seekTable.entries[frameIndex + 1].cOffset - - zs->seekTable.entries[frameIndex].cOffset; -} - -size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) -{ - if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); - return zs->seekTable.entries[frameIndex + 1].dOffset - - zs->seekTable.entries[frameIndex].dOffset; -} - -static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) -{ - int checksumFlag; - ZSTD_seekable_customFile src = zs->src; - /* read the footer, fixed size */ - CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END)); - CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize)); - - if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) { - return ERROR(prefix_unknown); - } - - { BYTE const sfd = zs->inBuff[4]; - checksumFlag = sfd >> 7; - - /* check reserved bits */ - if ((checksumFlag >> 2) & 0x1f) { - return ERROR(corruption_detected); - } - } - - { U32 const numFrames = MEM_readLE32(zs->inBuff); - U32 const sizePerEntry = 8 + (checksumFlag?4:0); - U32 const tableSize = sizePerEntry * numFrames; - U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE; - - U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */ - { - U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); - - CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END)); - CHECK_IO(src.read(src.opaque, zs->inBuff, toRead)); - - remaining -= toRead; - } - - if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) { - return ERROR(prefix_unknown); - } - if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) { - return ERROR(prefix_unknown); - } - - { /* Allocate an extra entry at the end so that we can do size - * computations on the last element without special case */ - seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); - - U32 idx = 0; - U32 pos = 8; - - - U64 cOffset = 0; - U64 dOffset = 0; - - if (!entries) { - free(entries); - return ERROR(memory_allocation); - } - - /* compute cumulative positions */ - for (; idx < numFrames; idx++) { - if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) { - U32 const offset = SEEKABLE_BUFF_SIZE - pos; - U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset); - memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */ - CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead)); - remaining -= toRead; - pos = 0; - } - entries[idx].cOffset = cOffset; - entries[idx].dOffset = dOffset; - - cOffset += MEM_readLE32(zs->inBuff + pos); - pos += 4; - dOffset += MEM_readLE32(zs->inBuff + pos); - pos += 4; - if (checksumFlag) { - entries[idx].checksum = MEM_readLE32(zs->inBuff + pos); - pos += 4; - } - } - entries[numFrames].cOffset = cOffset; - entries[numFrames].dOffset = dOffset; - - zs->seekTable.entries = entries; - zs->seekTable.tableLen = numFrames; - zs->seekTable.checksumFlag = checksumFlag; - return 0; - } - } -} - -size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize) -{ - zs->buffWrapper = (buffWrapper_t){src, srcSize, 0}; - { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper, - &ZSTD_seekable_read_buff, - &ZSTD_seekable_seek_buff}; - return ZSTD_seekable_initAdvanced(zs, srcFile); } -} - -size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src) -{ - ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE, - &ZSTD_seekable_seek_FILE}; - return ZSTD_seekable_initAdvanced(zs, srcFile); -} - -size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src) -{ - zs->src = src; - - { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs); - if (ZSTD_isError(seekTableInit)) return seekTableInit; } - - zs->decompressedOffset = (U64)-1; - zs->curFrame = (U32)-1; - - { const size_t dstreamInit = ZSTD_initDStream(zs->dstream); - if (ZSTD_isError(dstreamInit)) return dstreamInit; } - return 0; -} - -size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset) -{ - U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset); - do { - /* check if we can continue from a previous decompress job */ - if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) { - zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset; - zs->curFrame = targetFrame; - - CHECK_IO(zs->src.seek(zs->src.opaque, - zs->seekTable.entries[targetFrame].cOffset, - SEEK_SET)); - zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0}; - XXH64_reset(&zs->xxhState, 0); - ZSTD_resetDStream(zs->dstream); - } - - while (zs->decompressedOffset < offset + len) { - size_t toRead; - ZSTD_outBuffer outTmp; - size_t prevOutPos; - if (zs->decompressedOffset < offset) { - /* dummy decompressions until we get to the target offset */ - outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0}; - } else { - outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset}; - } - - prevOutPos = outTmp.pos; - toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in); - if (ZSTD_isError(toRead)) { - return toRead; - } - - if (zs->seekTable.checksumFlag) { - XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos, - outTmp.pos - prevOutPos); - } - zs->decompressedOffset += outTmp.pos - prevOutPos; - - if (toRead == 0) { - /* frame complete */ - - /* verify checksum */ - if (zs->seekTable.checksumFlag && - (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) != - zs->seekTable.entries[targetFrame].checksum) { - return ERROR(corruption_detected); - } - - if (zs->decompressedOffset < offset + len) { - /* go back to the start and force a reset of the stream */ - targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset); - } - break; - } - - /* read in more data if we're done with this buffer */ - if (zs->in.pos == zs->in.size) { - toRead = MIN(toRead, SEEKABLE_BUFF_SIZE); - CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead)); - zs->in.size = toRead; - zs->in.pos = 0; - } - } - } while (zs->decompressedOffset != offset + len); - - return len; -} - -size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex) -{ - if (frameIndex >= zs->seekTable.tableLen) { - return ERROR(frameIndex_tooLarge); - } - - { - size_t const decompressedSize = - zs->seekTable.entries[frameIndex + 1].dOffset - - zs->seekTable.entries[frameIndex].dOffset; - if (dstSize < decompressedSize) { - return ERROR(dstSize_tooSmall); - } - return ZSTD_seekable_decompress( - zs, dst, decompressedSize, - zs->seekTable.entries[frameIndex].dOffset); - } -} diff --git a/contrib/snap/snapcraft.yaml b/contrib/snap/snapcraft.yaml deleted file mode 100644 index 0a77946ae0aae..0000000000000 --- a/contrib/snap/snapcraft.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: zstd -version: git -summary: Zstandard - Fast real-time compression algorithm -description: | - Zstandard, or zstd as short version, is a fast lossless compression - algorithm, targeting real-time compression scenarios at zlib-level and better - compression ratios. It's backed by a very fast entropy stage, provided by - Huff0 and FSE library - -grade: devel # must be 'stable' to release into candidate/stable channels -confinement: devmode # use 'strict' once you have the right plugs and slots - -apps: - zstd: - command: usr/local/bin/zstd - plugs: [home, removable-media] - zstdgrep: - command: usr/local/bin/zstdgrep - plugs: [home, removable-media] - zstdless: - command: usr/local/bin/zstdless - plugs: [home, removable-media] - -parts: - zstd: - source: . - plugin: make - build-packages: [g++] |