Last active
February 3, 2026 17:50
-
-
Save Rexagon/70c6cbda8109b973df67ba70e7194b31 to your computer and use it in GitHub Desktop.
Rust-like lz4 usage for data compression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // clang test_lz4.c -o test_lz4 -llz4 -O3 && ./test_lz4 | |
| #include <assert.h> | |
| #include <lz4frame.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <time.h> | |
| #define min(a, b) \ | |
| ({ \ | |
| __typeof__(a) _a = (a); \ | |
| __typeof__(b) _b = (b); \ | |
| _a < _b ? _a : _b; \ | |
| }) | |
| #define max(a, b) \ | |
| ({ \ | |
| __typeof__(a) _a = (a); \ | |
| __typeof__(b) _b = (b); \ | |
| _a > _b ? _a : _b; \ | |
| }) | |
| typedef struct { | |
| char *data; | |
| size_t len; | |
| } slice_t; | |
| slice_t slice_alloc(size_t len); | |
| slice_t slice_prefix(slice_t slice, size_t len); | |
| void slice_free(slice_t slice); | |
| typedef struct { | |
| char *data; | |
| size_t len; | |
| size_t capacity; | |
| } vec_t; | |
| vec_t vec_new(); | |
| void vec_free(vec_t *vec); | |
| void vec_push(vec_t *vec, slice_t slice); | |
| slice_t fs_read(const char *path); | |
| size_t lz4_compress(slice_t data, int level, vec_t *output); | |
| int main() { | |
| // Read file | |
| slice_t data = fs_read("./data/full_compressed.bin"); | |
| if (data.len == 0) { | |
| fprintf(stderr, "failed to read file\n"); | |
| return -1; | |
| } | |
| printf("data_len=%ld\n", data.len); | |
| // Compress file | |
| struct timespec start, stop; | |
| clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); | |
| vec_t compressed = vec_new(); | |
| size_t res = lz4_compress(data, 1, &compressed); | |
| if (LZ4F_isError(res)) { | |
| fprintf(stderr, "failed to compress data: %s\n", LZ4F_getErrorName(res)); | |
| slice_free(data); | |
| return -1; | |
| } | |
| clock_gettime(CLOCK_THREAD_CPUTIME_ID, &stop); | |
| double elapsed_s = (double)(stop.tv_sec - start.tv_sec) + | |
| (stop.tv_nsec - start.tv_nsec) / 1e9; | |
| printf("result_len=%ld, took=%lfs\n", compressed.len, elapsed_s); | |
| slice_free(data); | |
| vec_free(&compressed); | |
| return 0; | |
| } | |
| slice_t slice_alloc(size_t len) { | |
| return (slice_t){ | |
| .data = malloc(len), | |
| .len = len, | |
| }; | |
| } | |
| slice_t slice_prefix(slice_t slice, size_t len) { | |
| assert(slice.len >= len); | |
| return (slice_t){ | |
| .data = slice.data, | |
| .len = len, | |
| }; | |
| } | |
| void slice_free(slice_t slice) { free(slice.data); } | |
| vec_t vec_new() { return (vec_t){.data = NULL, .len = 0, .capacity = 0}; } | |
| void vec_free(vec_t *vec) { free(vec->data); } | |
| void vec_push(vec_t *vec, slice_t slice) { | |
| size_t new_size = vec->len + slice.len; | |
| if (new_size > vec->capacity) { | |
| size_t new_capacity = max(vec->capacity * 2, 4); | |
| while (new_size > new_capacity) { | |
| new_capacity *= 2; | |
| } | |
| char *new_output; | |
| if (vec->data != NULL) { | |
| new_output = realloc(vec->data, new_capacity); | |
| } else { | |
| assert(vec->len == 0); | |
| new_output = malloc(new_capacity); | |
| } | |
| assert(new_output != NULL); | |
| vec->data = new_output; | |
| vec->capacity = new_capacity; | |
| } | |
| memcpy(vec->data + vec->len, slice.data, slice.len); | |
| vec->len = new_size; | |
| } | |
| slice_t fs_read(const char *path) { | |
| FILE *file = fopen(path, "r"); | |
| if (file == NULL) { | |
| return (slice_t){.data = NULL, .len = 0}; | |
| } | |
| fseek(file, 0, SEEK_END); | |
| size_t file_size = ftell(file); | |
| rewind(file); | |
| char *data = malloc(file_size); | |
| size_t read_total = fread(data, 1, file_size, file); | |
| assert(read_total == file_size); | |
| fclose(file); | |
| return (slice_t){.data = data, .len = file_size}; | |
| } | |
| size_t lz4_compress(slice_t data, int level, vec_t *output) { | |
| LZ4F_compressionContext_t context; | |
| size_t res = LZ4F_createCompressionContext(&context, LZ4F_VERSION); | |
| if (LZ4F_isError(res)) { | |
| return res; | |
| } | |
| LZ4F_blockSizeID_t block_size_id = LZ4F_max4MB; | |
| size_t block_size = 4 * 1024 * 1024; | |
| LZ4F_preferences_t preferences = { | |
| .frameInfo = | |
| { | |
| .blockSizeID = block_size_id, | |
| .blockMode = LZ4F_blockLinked, | |
| .contentChecksumFlag = LZ4F_contentChecksumEnabled, | |
| .frameType = LZ4F_frame, | |
| .contentSize = data.len, | |
| .dictID = 0, | |
| .blockChecksumFlag = LZ4F_blockChecksumEnabled, | |
| }, | |
| .compressionLevel = level, | |
| .autoFlush = 0, | |
| .favorDecSpeed = 0, | |
| .reserved = {0, 0, 0}, | |
| }; | |
| slice_t buffer = slice_alloc(LZ4F_compressBound(block_size, &preferences)); | |
| res = LZ4F_compressBegin(context, buffer.data, buffer.len, &preferences); | |
| if (LZ4F_isError(res)) | |
| goto error; | |
| vec_push(output, slice_prefix(buffer, res)); | |
| size_t offset = 0; | |
| while (offset < data.len) { | |
| size_t to_write = min(data.len - offset, block_size); | |
| res = LZ4F_compressUpdate(context, buffer.data, buffer.len, | |
| data.data + offset, to_write, NULL); | |
| if (LZ4F_isError(res)) | |
| goto error; | |
| vec_push(output, slice_prefix(buffer, res)); | |
| offset += to_write; | |
| } | |
| for (;;) { | |
| res = LZ4F_flush(context, buffer.data, buffer.len, NULL); | |
| if (LZ4F_isError(res)) | |
| goto error; | |
| if (res == 0) { | |
| break; | |
| } | |
| vec_push(output, slice_prefix(buffer, res)); | |
| } | |
| res = LZ4F_compressEnd(context, buffer.data, buffer.len, NULL); | |
| if (LZ4F_isError(res)) | |
| goto error; | |
| vec_push(output, slice_prefix(buffer, res)); | |
| res = output->len; | |
| error: | |
| slice_free(buffer); | |
| LZ4F_freeCompressionContext(context); | |
| return res; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment