diff options
Diffstat (limited to 'contrib/long_distance_matching/ldm.h')
-rw-r--r-- | contrib/long_distance_matching/ldm.h | 197 |
1 files changed, 0 insertions, 197 deletions
diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h deleted file mode 100644 index 4adadbd0add62..0000000000000 --- a/contrib/long_distance_matching/ldm.h +++ /dev/null @@ -1,197 +0,0 @@ -#ifndef LDM_H -#define LDM_H - -#include "mem.h" // from /lib/common/mem.h - -//#include "ldm_params.h" - -// ============================================================================= -// Modify the parameters in ldm_params.h if "ldm_params.h" is included. -// Otherwise, modify the parameters here. -// ============================================================================= - -#ifndef LDM_PARAMS_H - // Defines the size of the hash table. - // Note that this is not the number of buckets. - // Currently this should be less than WINDOW_SIZE_LOG + 4. - #define LDM_MEMORY_USAGE 23 - - // The number of entries in a hash bucket. - #define HASH_BUCKET_SIZE_LOG 3 // The maximum is 4 for now. - - // Defines the lag in inserting elements into the hash table. - #define LDM_LAG 0 - - // The maximum window size when searching for matches. - // The maximum value is 30 - #define LDM_WINDOW_SIZE_LOG 28 - - // The minimum match length. - // This should be a multiple of four. - #define LDM_MIN_MATCH_LENGTH 64 - - // If INSERT_BY_TAG, insert entries into the hash table as a function of the - // hash. Certain hashes will not be inserted. - // - // Otherwise, insert as a function of the position. - #define INSERT_BY_TAG 1 - - // Store a checksum with the hash table entries for faster comparison. - // This halves the number of entries the hash table can contain. - #define USE_CHECKSUM 1 -#endif - -// Output compression statistics. -#define COMPUTE_STATS - -// Output the configuration. -#define OUTPUT_CONFIGURATION - -// If defined, forces the probability of insertion to be approximately -// one per (1 << HASH_ONLY_EVERY_LOG). If not defined, the probability will be -// calculated based on the memory usage and window size for "even" insertion -// throughout the window. - -// #define HASH_ONLY_EVERY_LOG 8 - -// ============================================================================= - -// The number of bytes storing the compressed and decompressed size -// in the header. -#define LDM_COMPRESSED_SIZE 8 -#define LDM_DECOMPRESSED_SIZE 8 -#define LDM_HEADER_SIZE ((LDM_COMPRESSED_SIZE)+(LDM_DECOMPRESSED_SIZE)) - -#define ML_BITS 4 -#define ML_MASK ((1U<<ML_BITS)-1) -#define RUN_BITS (8-ML_BITS) -#define RUN_MASK ((1U<<RUN_BITS)-1) - -// The number of bytes storing the offset. -#define LDM_OFFSET_SIZE 4 - -#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG)) - -// TODO: Match lengths that are too small do not use the hash table efficiently. -// There should be a minimum hash length given the hash table size. -#define LDM_HASH_LENGTH LDM_MIN_MATCH_LENGTH - -typedef struct LDM_compressStats LDM_compressStats; -typedef struct LDM_CCtx LDM_CCtx; -typedef struct LDM_DCtx LDM_DCtx; - -/** - * Compresses src into dst. - * Returns the compressed size if successful, 0 otherwise. - * - * NB: This currently ignores maxDstSize and assumes enough space is available. - * - * Block format (see lz4 documentation for more information): - * github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md - * - * A block is composed of sequences. Each sequence begins with a token, which - * is a one-byte value separated into two 4-bit fields. - * - * The first field uses the four high bits of the token and encodes the literal - * length. If the field value is 0, there is no literal. If it is 15, - * additional bytes are added (each ranging from 0 to 255) to the previous - * value to produce a total length. - * - * Following the token and optional length bytes are the literals. - * - * Next are the 4 bytes representing the offset of the match (2 in lz4), - * representing the position to copy the literals. - * - * The lower four bits of the token encode the match length. With additional - * bytes added similarly to the additional literal length bytes after the offset. - * - * The last sequence is incomplete and stops right after the literals. - */ -size_t LDM_compress(const void *src, size_t srcSize, - void *dst, size_t maxDstSize); - -/** - * Initialize the compression context. - * - * Allocates memory for the hash table. - * - * Returns 0 if successful, 1 otherwise. - */ -size_t LDM_initializeCCtx(LDM_CCtx *cctx, - const void *src, size_t srcSize, - void *dst, size_t maxDstSize); - -/** - * Frees up memory allocated in LDM_initializeCCtx(). - */ -void LDM_destroyCCtx(LDM_CCtx *cctx); - -/** - * Prints the distribution of offsets in the hash table. - * - * The offsets are defined as the distance of the hash table entry from the - * current input position of the cctx. - */ -void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx); - -/** - * Outputs compression statistics to stdout. - */ -void LDM_printCompressStats(const LDM_compressStats *stats); - -/** - * Encode the literal length followed by the literals. - * - * The literal length is written to the upper four bits of pToken, with - * additional bytes written to the output as needed (see lz4). - * - * This is followed by literalLength bytes corresponding to the literals. - */ -void LDM_encodeLiteralLengthAndLiterals(LDM_CCtx *cctx, BYTE *pToken, - const U64 literalLength); - -/** - * Write current block (literals, literal length, match offset, - * match length). - */ -void LDM_outputBlock(LDM_CCtx *cctx, - const U64 literalLength, - const U32 offset, - const U64 matchLength); - -/** - * Decompresses src into dst. - * - * Note: assumes src does not have a header. - */ -size_t LDM_decompress(const void *src, size_t srcSize, - void *dst, size_t maxDstSize); - -/** - * Initialize the decompression context. - */ -void LDM_initializeDCtx(LDM_DCtx *dctx, - const void *src, size_t compressedSize, - void *dst, size_t maxDecompressedSize); - -/** - * Reads the header from src and writes the compressed size and - * decompressed size into compressedSize and decompressedSize respectively. - * - * NB: LDM_compress and LDM_decompress currently do not add/read headers. - */ -void LDM_readHeader(const void *src, U64 *compressedSize, - U64 *decompressedSize); - -/** - * Write the compressed and decompressed size. - */ -void LDM_writeHeader(void *memPtr, U64 compressedSize, - U64 decompressedSize); - -/** - * Output the configuration used. - */ -void LDM_outputConfiguration(void); - -#endif /* LDM_H */ |