Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
893 changes: 574 additions & 319 deletions contrib/seekable_format/tests/seekable_tests.c

Large diffs are not rendered by default.

168 changes: 131 additions & 37 deletions contrib/seqBench/seqBench.c
Original file line number Diff line number Diff line change
@@ -1,53 +1,147 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/

/* seqBench: roundtrip benchmark using sequence-level compression API.
*
* Usage: seqBench <file>
*
* Generates sequences from the input file, then recompresses them
* using ZSTD_compressSequences() with explicit block delimiters,
* finally validates the output by decompressing and comparing.
*/

#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <zstd.h>

int main(int argc, char *argv[]) {
ZSTD_CCtx* zc = ZSTD_createCCtx();
int ret = 0;

if (argc != 2) {
printf("Usage: seqBench <file>\n"); // TODO provide the block delim option here
return 1;
}
if (argc != 2) {
fprintf(stderr, "Usage: seqBench <file>\n");
fprintf(stderr,
"\nRoundtrip benchmark using sequence-level compression.\n");
fprintf(stderr, "Generates sequences with ZSTD_generateSequences(),\n");
fprintf(stderr, "recompresses with ZSTD_compressSequences() using\n");
fprintf(stderr,
"explicit block delimiters, and validates by decompression.\n");
return 1;
}

ZSTD_CCtx *zc = ZSTD_createCCtx();
if (zc == NULL) {
fprintf(stderr, "ERROR: ZSTD_createCCtx() failed\n");
return 1;
}

FILE *f = fopen(argv[1], "rb");
fseek(f, 0, SEEK_END);
long inBufSize = ftell(f);
fseek(f, 0, SEEK_SET);
FILE *f = fopen(argv[1], "rb");
if (f == NULL) {
fprintf(stderr, "ERROR: could not open file '%s'\n", argv[1]);
ZSTD_freeCCtx(zc);
return 1;
}

char *inBuf = malloc(inBufSize + 1);
fread(inBuf, inBufSize, 1, f);
fseek(f, 0, SEEK_END);
long inBufSize = ftell(f);
fseek(f, 0, SEEK_SET);

if (inBufSize <= 0) {
fprintf(stderr, "ERROR: file '%s' is empty or unreadable\n", argv[1]);
fclose(f);
ZSTD_freeCCtx(zc);
return 1;
}

char *inBuf = (char *)malloc((size_t)inBufSize + 1);
if (inBuf == NULL) {
fprintf(stderr, "ERROR: could not allocate %ld bytes for input\n",
inBufSize);
fclose(f);
ZSTD_freeCCtx(zc);
return 1;
}

size_t seqsSize = ZSTD_sequenceBound(inBufSize);
ZSTD_Sequence *seqs = (ZSTD_Sequence*)malloc(seqsSize * sizeof(ZSTD_Sequence));
char *outBuf = malloc(ZSTD_compressBound(inBufSize));
size_t const bytesRead = fread(inBuf, 1, (size_t)inBufSize, f);
fclose(f);
if ((long)bytesRead != inBufSize) {
fprintf(stderr, "ERROR: read only %zu of %ld bytes\n", bytesRead,
inBufSize);
free(inBuf);
ZSTD_freeCCtx(zc);
return 1;
}

ZSTD_generateSequences(zc, seqs, seqsSize, inBuf, inBufSize);
ZSTD_CCtx_setParameter(zc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
size_t outBufSize = ZSTD_compressSequences(zc, outBuf, inBufSize, seqs, seqsSize, inBuf, inBufSize);
if (ZSTD_isError(outBufSize)) {
printf("ERROR: %lu\n", outBufSize);
return 1;
}
size_t seqsSize = ZSTD_sequenceBound((size_t)inBufSize);
ZSTD_Sequence *seqs =
(ZSTD_Sequence *)malloc(seqsSize * sizeof(ZSTD_Sequence));
size_t const outBufCapacity = ZSTD_compressBound((size_t)inBufSize);
char *outBuf = (char *)malloc(outBufCapacity);
char *validationBuf = (char *)malloc((size_t)inBufSize);

if (seqs == NULL || outBuf == NULL || validationBuf == NULL) {
fprintf(stderr, "ERROR: memory allocation failed\n");
ret = 1;
goto cleanup;
}

ZSTD_generateSequences(zc, seqs, seqsSize, inBuf, (size_t)inBufSize);
ZSTD_CCtx_setParameter(zc, ZSTD_c_blockDelimiters,
ZSTD_sf_explicitBlockDelimiters);
size_t outBufSize = ZSTD_compressSequences(
zc, outBuf, outBufCapacity, seqs, seqsSize, inBuf, (size_t)inBufSize);
if (ZSTD_isError(outBufSize)) {
fprintf(stderr, "ERROR: ZSTD_compressSequences failed: %s\n",
ZSTD_getErrorName(outBufSize));
ret = 1;
goto cleanup;
}

size_t const decSize =
ZSTD_decompress(validationBuf, (size_t)inBufSize, outBuf, outBufSize);
if (ZSTD_isError(decSize)) {
fprintf(stderr, "ERROR: ZSTD_decompress failed: %s\n",
ZSTD_getErrorName(decSize));
ret = 1;
goto cleanup;
}

if ((long)decSize != inBufSize) {
fprintf(stderr, "ERROR: decompressed size (%zu) != original size (%ld)\n",
decSize, inBufSize);
ret = 1;
goto cleanup;
}

char *validationBuf = malloc(inBufSize);
ZSTD_decompress(validationBuf, inBufSize, outBuf, outBufSize);

if (memcmp(inBuf, validationBuf, inBufSize) == 0) {
printf("Compression and decompression were successful!\n");
} else {
printf("ERROR: input and validation buffers don't match!\n");
for (int i = 0; i < inBufSize; i++) {
if (inBuf[i] != validationBuf[i]) {
printf("First bad index: %d\n", i);
break;
}
}
if (memcmp(inBuf, validationBuf, (size_t)inBufSize) == 0) {
printf("Compression and decompression were successful!\n");
printf(" Original size: %ld bytes\n", inBufSize);
printf(" Compressed size: %zu bytes\n", outBufSize);
printf(" Ratio: %.2f\n", (double)inBufSize / (double)outBufSize);
} else {
fprintf(stderr, "ERROR: input and validation buffers don't match!\n");
for (long i = 0; i < inBufSize; i++) {
if (inBuf[i] != validationBuf[i]) {
fprintf(stderr, "First bad index: %ld\n", i);
break;
}
}
ret = 1;
}

return 0;
cleanup:
free(validationBuf);
free(outBuf);
free(seqs);
free(inBuf);
ZSTD_freeCCtx(zc);
return ret;
}
9 changes: 7 additions & 2 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ all: simple_compression simple_decompression \
multiple_simple_compression\
dictionary_compression dictionary_decompression \
streaming_compression streaming_decompression \
multiple_streaming_compression streaming_memory_usage
multiple_streaming_compression streaming_memory_usage \
streaming_dictionary_compression

$(LIB) :
$(MAKE) -C $(LIBDIR) libzstd.a
Expand Down Expand Up @@ -53,6 +54,9 @@ streaming_decompression : $(LIB)
streaming_memory_usage.o: common.h
streaming_memory_usage : $(LIB)

streaming_dictionary_compression.o: common.h
streaming_dictionary_compression : $(LIB)


.PHONY:clean
clean:
Expand All @@ -61,7 +65,8 @@ clean:
multiple_simple_compression \
dictionary_compression dictionary_decompression \
streaming_compression streaming_decompression \
multiple_streaming_compression streaming_memory_usage
multiple_streaming_compression streaming_memory_usage \
streaming_dictionary_compression
@echo Cleaning completed

.PHONY:test
Expand Down
4 changes: 4 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,7 @@ Zstandard library : usage examples
Decompress multiple files using the same dictionary.
Result remains in memory.
Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()`

- [Streaming dictionary compression](streaming_dictionary_compression.c) :
Compress multiple files in streaming mode using the same dictionary.
Introduces usage of : `ZSTD_CCtx_loadDictionary()` and `ZSTD_compressStream2()`
138 changes: 138 additions & 0 deletions examples/streaming_dictionary_compression.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

/* This example demonstrates streaming compression with a dictionary.
* It combines the streaming approach from streaming_compression.c
* with the dictionary approach from dictionary_compression.c.
*
* This is useful when compressing many small files (e.g. database records,
* JSON objects, log entries) in a streaming fashion using a pre-trained
* dictionary for better compression ratios on small data.
*
* It uses the advanced API:
* - ZSTD_CCtx_loadDictionary() to load the dictionary
* - ZSTD_compressStream2() to stream-compress the input
*
* Usage: streaming_dictionary_compression DICT FILES...
*/

#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD()
#include <stdio.h> // printf
#include <stdlib.h> // free
#include <string.h> // memset, strcat, strlen
#include <zstd.h> // presumes zstd library is installed

static void compressFile_orDie(const char *fname, const char *outName,
const void *dictBuffer, size_t dictSize,
int cLevel) {
/* Open the input and output files. */
FILE *const fin = fopen_orDie(fname, "rb");
FILE *const fout = fopen_orDie(outName, "wb");

/* Create the input and output buffers.
* They may be any size, but we recommend using these functions to size them.
*/
size_t const buffInSize = ZSTD_CStreamInSize();
void *const buffIn = malloc_orDie(buffInSize);
size_t const buffOutSize = ZSTD_CStreamOutSize();
void *const buffOut = malloc_orDie(buffOutSize);

/* Create the compression context. */
ZSTD_CCtx *const cctx = ZSTD_createCCtx();
CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");

/* Set compression parameters. */
CHECK_ZSTD(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel));
CHECK_ZSTD(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));

/* Load the dictionary.
* The dictionary will be used for all subsequent compressions using this
* context, until it is reset or a new dictionary is loaded.
* ZSTD_CCtx_loadDictionary() makes an internal copy of the dictionary,
* so we can free dictBuffer after this call if we wanted to.
*/
CHECK_ZSTD(ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize));

/* Stream-compress the input file. */
size_t const toRead = buffInSize;
for (;;) {
size_t read = fread_orDie(buffIn, toRead, fin);
int const lastChunk = (read < toRead);
ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;
ZSTD_inBuffer input = {buffIn, read, 0};
int finished;
do {
ZSTD_outBuffer output = {buffOut, buffOutSize, 0};
size_t const remaining =
ZSTD_compressStream2(cctx, &output, &input, mode);
CHECK_ZSTD(remaining);
fwrite_orDie(buffOut, output.pos, fout);
finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
} while (!finished);
CHECK(input.pos == input.size, "Impossible: zstd only returns 0 when the "
"input is completely consumed!");
if (lastChunk) {
break;
}
}

ZSTD_freeCCtx(cctx);
fclose_orDie(fout);
fclose_orDie(fin);
free(buffIn);
free(buffOut);
}

static char *createOutFilename_orDie(const char *filename) {
size_t const inL = strlen(filename);
size_t const outL = inL + 5;
void *outSpace = malloc_orDie(outL);
memset(outSpace, 0, outL);
strcat(outSpace, filename);
strcat(outSpace, ".zst");
return (char *)outSpace;
}

int main(int argc, const char **argv) {
const char *const exeName = argv[0];
int const cLevel = 3;

if (argc < 3) {
fprintf(stderr, "wrong arguments\n");
fprintf(stderr, "usage:\n");
fprintf(stderr, "%s DICT [FILES...]\n", exeName);
fprintf(stderr,
"\nCompress FILES using streaming mode with a dictionary.\n");
fprintf(stderr, "DICT is a dictionary file created with `zstd --train`.\n");
return 1;
}

/* Load dictionary into memory.
* The dictionary is loaded once and reused for all files. */
const char *const dictName = argv[1];
size_t dictSize;
void *const dictBuffer = mallocAndLoadFile_orDie(dictName, &dictSize);
printf("loading dictionary %s (%zu bytes)\n", dictName, dictSize);

/* Compress each file with the dictionary. */
int u;
for (u = 2; u < argc; u++) {
const char *const inFilename = argv[u];
char *const outFilename = createOutFilename_orDie(inFilename);
compressFile_orDie(inFilename, outFilename, dictBuffer, dictSize, cLevel);
printf("%25s : compressed with dictionary -> %s\n", inFilename,
outFilename);
free(outFilename);
}

free(dictBuffer);
printf("All %u files compressed with dictionary. \n", argc - 2);
return 0;
}
Loading