summaryrefslogtreecommitdiff
path: root/programs/bench.c
diff options
context:
space:
mode:
Diffstat (limited to 'programs/bench.c')
-rw-r--r--programs/bench.c101
1 files changed, 58 insertions, 43 deletions
diff --git a/programs/bench.c b/programs/bench.c
index 5d2568f104f50..014a4fd41b1cf 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -22,7 +22,7 @@
* Compiler Warnings
****************************************/
#ifdef _MSC_VER
-# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
@@ -34,6 +34,7 @@
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */
#include <stdio.h> /* fprintf, fopen */
+#include <assert.h> /* assert */
#include "mem.h"
#define ZSTD_STATIC_LINKING_ONLY
@@ -51,8 +52,9 @@
# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
#endif
-#define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */
-#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
+#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
+#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
+#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
#define COOLPERIOD_SEC 10
#define KB *(1 <<10)
@@ -122,12 +124,12 @@ void BMK_setBlockSize(size_t blockSize)
void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); }
-static U32 g_nbThreads = 1;
-void BMK_setNbThreads(unsigned nbThreads) {
+static U32 g_nbWorkers = 0;
+void BMK_setNbWorkers(unsigned nbWorkers) {
#ifndef ZSTD_MULTITHREAD
- if (nbThreads > 1) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
+ if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
#endif
- g_nbThreads = nbThreads;
+ g_nbWorkers = nbWorkers;
}
static U32 g_realTime = 0;
@@ -212,6 +214,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
/* init */
if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */
+ if (g_nbWorkers==1) g_nbWorkers=0; /* prefer synchronous mode */
if (g_decodeOnly) { /* benchmark only decompression : source must be already compressed */
const char* srcPtr = (const char*)srcBuffer;
@@ -258,13 +261,19 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
} } }
/* warmimg up memory */
- RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
+ if (g_decodeOnly) {
+ memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
+ } else {
+ RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
+ }
/* Bench */
{ U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0);
UTIL_time_t coolTime;
- U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 1;
+ U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 1;
+ U32 nbDecodeLoops = (U32)((100 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */
+ U32 nbCompressionLoops = (U32)((2 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */
U64 totalCTime=0, totalDTime=0;
U32 cCompleted=g_decodeOnly, dCompleted=0;
# define NB_MARKS 4
@@ -283,19 +292,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
}
if (!g_decodeOnly) {
- UTIL_time_t clockStart;
/* Compression */
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */
- UTIL_sleepMilli(1); /* give processor time to other processes */
+ UTIL_sleepMilli(5); /* give processor time to other processes */
UTIL_waitForNextTick();
- clockStart = UTIL_getTime();
if (!cCompleted) { /* still some time to do compression tests */
- U64 const clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
U32 nbLoops = 0;
- ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
+ UTIL_time_t const clockStart = UTIL_getTime();
+ ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
@@ -307,13 +314,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
}
ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
+ ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
- do {
+
+ if (!g_nbSeconds) nbCompressionLoops=1;
+ for (nbLoops=0; nbLoops<nbCompressionLoops; nbLoops++) {
U32 blockNb;
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
#if 0 /* direct compression function, for occasional comparison */
@@ -342,12 +352,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
}
blockTable[blockNb].cSize = out.pos;
#endif
+ } }
+ { U64 const loopDuration = UTIL_clockSpanNano(clockStart);
+ if (loopDuration > 0) {
+ if (loopDuration < fastestC * nbCompressionLoops)
+ fastestC = loopDuration / nbCompressionLoops;
+ nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;
+ } else {
+ assert(nbCompressionLoops < 40000000); /* avoid overflow */
+ nbCompressionLoops *= 100;
}
- nbLoops++;
- } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
- { U64 const loopDuration = UTIL_clockSpanMicro(clockStart);
- if (loopDuration < fastestC*nbLoops)
- fastestC = loopDuration / nbLoops;
totalCTime += loopDuration;
cCompleted = (totalCTime >= maxTime); /* end compression tests */
} }
@@ -357,16 +371,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
ratio = (double)srcSize / (double)cSize;
markNb = (markNb+1) % NB_MARKS;
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
- double const compressionSpeed = (double)srcSize / fastestC;
+ double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
ratioAccuracy, ratio,
cSpeedAccuracy, compressionSpeed );
}
- } else { /* g_decodeOnly */
- memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
- }
+ } /* if (!g_decodeOnly) */
#if 0 /* disable decompression test */
dCompleted=1;
@@ -375,16 +387,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
/* Decompression */
if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */
- UTIL_sleepMilli(1); /* give processor time to other processes */
+ UTIL_sleepMilli(5); /* give processor time to other processes */
UTIL_waitForNextTick();
if (!dCompleted) {
- U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
U32 nbLoops = 0;
ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
UTIL_time_t const clockStart = UTIL_getTime();
if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
- do {
+ if (!g_nbSeconds) nbDecodeLoops = 1;
+ for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
U32 blockNb;
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
size_t const regenSize = ZSTD_decompress_usingDDict(dctx,
@@ -396,22 +408,26 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
}
blockTable[blockNb].resSize = regenSize;
- }
- nbLoops++;
- } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
+ } }
ZSTD_freeDDict(ddict);
- { U64 const loopDuration = UTIL_clockSpanMicro(clockStart);
- if (loopDuration < fastestD*nbLoops)
- fastestD = loopDuration / nbLoops;
+ { U64 const loopDuration = UTIL_clockSpanNano(clockStart);
+ if (loopDuration > 0) {
+ if (loopDuration < fastestD * nbDecodeLoops)
+ fastestD = loopDuration / nbDecodeLoops;
+ nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;
+ } else {
+ assert(nbDecodeLoops < 40000000); /* avoid overflow */
+ nbDecodeLoops *= 100;
+ }
totalDTime += loopDuration;
dCompleted = (totalDTime >= maxTime);
} }
markNb = (markNb+1) % NB_MARKS;
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
- double const compressionSpeed = (double)srcSize / fastestC;
+ double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
- double const decompressionSpeed = (double)srcSize / fastestD;
+ double const decompressionSpeed = ((double)srcSize / fastestD) * 1000;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
ratioAccuracy, ratio,
@@ -460,8 +476,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
} /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
if (g_displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
- double cSpeed = (double)srcSize / fastestC;
- double dSpeed = (double)srcSize / fastestD;
+ double const cSpeed = ((double)srcSize / fastestC) * 1000;
+ double const dSpeed = ((double)srcSize / fastestD) * 1000;
if (g_additionalParam)
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
else
@@ -518,9 +534,8 @@ static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
if (g_displayLevel == 1 && !g_additionalParam)
DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
- if (cLevelLast < cLevel) cLevelLast = cLevel;
-
for (l=cLevel; l <= cLevelLast; l++) {
+ if (l==0) continue; /* skip level 0 */
BMK_benchMem(srcBuffer, benchedSize,
displayName, l,
fileSizes, nbFiles,
@@ -530,8 +545,8 @@ static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
/*! BMK_loadFiles() :
- Loads `buffer` with content of files listed within `fileNamesTable`.
- At most, fills `buffer` entirely */
+ * Loads `buffer` with content of files listed within `fileNamesTable`.
+ * At most, fills `buffer` entirely. */
static void BMK_loadFiles(void* buffer, size_t bufferSize,
size_t* fileSizes,
const char* const * const fileNamesTable, unsigned nbFiles)
@@ -633,7 +648,8 @@ static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigne
}
-static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, const ZSTD_compressionParameters* compressionParams)
+static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
+ const ZSTD_compressionParameters* compressionParams)
{
char name[20] = {0};
size_t benchedSize = 10000000;
@@ -661,7 +677,6 @@ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
{
double const compressibility = (double)g_compressibilityDefault / 100;
- if (cLevel < 1) cLevel = 1; /* minimum compression level */
if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
if (cLevelLast < cLevel) cLevelLast = cLevel;