Skip to content

Instantly share code, notes, and snippets.

@opsec-ee
Last active September 7, 2025 02:59
Show Gist options
  • Select an option

  • Save opsec-ee/70795de110113bf8aef2ce128dfdd416 to your computer and use it in GitHub Desktop.

Select an option

Save opsec-ee/70795de110113bf8aef2ce128dfdd416 to your computer and use it in GitHub Desktop.
// Feature test macros for POSIX functions
#define _POSIX_C_SOURCE 200112L
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <time.h>
#include <assert.h>
// Portable bool definition (no stdbool.h dependency)
#ifndef __cplusplus
typedef enum { false = 0, true = 1 } bool;
#endif
#define MAX_DICT_SIZE 4096
#define MAX_INPUT 10000
// Simple dictionary entry
typedef struct {
uint16_t next[256]; // Key optimization from D: direct array lookup
} dict_entry_t;
static dict_entry_t dict[MAX_DICT_SIZE];
static uint16_t dict_size;
static uint16_t codes[MAX_INPUT];
static size_t code_count;
// Simple portable timing - just use standard clock()
static double get_time_ms(void) {
return (double)clock() / CLOCKS_PER_SEC * 1000.0;
}
// Initialize dictionary with single characters
static void init_dict(void) {
memset(dict, 0, sizeof(dict));
dict_size = 256;
code_count = 0;
}
// LZW Compression - clean and simple
static void compress(const char* input) {
init_dict();
size_t len = strlen(input);
if (len == 0) return;
uint16_t code = (uint8_t)input[0];
for (size_t i = 1; i < len; i++) {
uint8_t c = (uint8_t)input[i];
if (dict[code].next[c] != 0) {
// String exists, extend it
code = dict[code].next[c];
} else {
// Output current code
codes[code_count++] = code;
// Add new string to dictionary
if (dict_size < MAX_DICT_SIZE) {
dict[code].next[c] = dict_size;
// Initialize the new dictionary entry
memset(&dict[dict_size], 0, sizeof(dict_entry_t));
dict_size++;
}
// Start new string
code = c;
}
}
// Output final code
codes[code_count++] = code;
}
// Print compressed codes
static void print_codes(void) {
for (size_t i = 0; i < code_count; i++) {
printf("%u", codes[i]);
if (i < code_count - 1) printf(",");
}
printf("\n");
}
// LZW Decompression
static char* decompress_codes(const uint16_t* input_codes, size_t count) {
if (count == 0) return NULL;
static char strings[MAX_DICT_SIZE][256];
static int lengths[MAX_DICT_SIZE];
static char result[MAX_INPUT * 2];
// Initialize single chars
for (int i = 0; i < 256; i++) {
strings[i][0] = (char)i;
lengths[i] = 1;
}
int dict_size = 256;
int result_pos = 0;
// Output first string
uint16_t old = input_codes[0];
memcpy(result + result_pos, strings[old], lengths[old]);
result_pos += lengths[old];
for (size_t i = 1; i < count; i++) {
uint16_t code = input_codes[i];
if (code < dict_size) {
// Code exists
memcpy(result + result_pos, strings[code], lengths[code]);
result_pos += lengths[code];
// Add old + first char of current to dict
if (dict_size < MAX_DICT_SIZE) {
memcpy(strings[dict_size], strings[old], lengths[old]);
strings[dict_size][lengths[old]] = strings[code][0];
lengths[dict_size] = lengths[old] + 1;
dict_size++;
}
} else if (code == dict_size) {
// Code doesn't exist yet - special case
memcpy(strings[dict_size], strings[old], lengths[old]);
strings[dict_size][lengths[old]] = strings[old][0];
lengths[dict_size] = lengths[old] + 1;
memcpy(result + result_pos, strings[dict_size], lengths[dict_size]);
result_pos += lengths[dict_size];
dict_size++;
} else {
// Invalid code
return NULL;
}
old = code;
}
result[result_pos] = '\0';
return result;
}
// Portable strdup implementation
#ifndef _GNU_SOURCE
static char* strdup(const char* s) {
size_t len = strlen(s) + 1;
char* copy = malloc(len);
if (copy) {
memcpy(copy, s, len);
}
return copy;
}
#endif
// Parse comma-separated codes
static size_t parse_codes(const char* input, uint16_t* output) {
char* str = strdup(input);
char* token = strtok(str, ",");
size_t count = 0;
while (token && count < MAX_INPUT) {
output[count++] = (uint16_t)atoi(token);
token = strtok(NULL, ",");
}
free(str);
return count;
}
// Test case structure
typedef struct {
const char* name;
const char* input;
const char* expected_codes; // NULL means generate and verify round-trip only
} test_case_t;
// Test suite - with corrected expectations
static const test_case_t test_cases[] = {
{
"Basic LZW Test",
"TOBEORNOTTOBEORTOBEORNOT",
"84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263"
},
{
"Simple Repetition",
"ABABABA",
"65,66,256,258"
},
{
"Single Character",
"A",
"65"
},
{
"Two Characters",
"AB",
"65,66"
},
{
"Repeated Pattern",
"ABCABCABC",
"65,66,67,256,258,257"
},
{
"Long String with Repetition",
"THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG. THE QUICK BROWN FOX.",
NULL // Round-trip test only
},
{
"Numbers",
"12312312312345",
"49,50,51,256,258,257,259,52,53"
},
{
"Mixed Content",
"Hello, World! Hello, Programming!",
NULL // Round-trip test only
}
};
static const size_t num_test_cases = sizeof(test_cases) / sizeof(test_cases[0]);
// Run a single test
static bool run_test(const test_case_t* test) {
printf("%s: ", test->name);
compress(test->input);
char* result = decompress_codes(codes, code_count);
if (result && strcmp(test->input, result) == 0) {
printf("✅ (%zu→%zu codes)\n", strlen(test->input), code_count);
return true;
} else {
printf("❌\n");
return false;
}
}
// Interactive mode
static void interactive_mode(void) {
char line[MAX_INPUT];
char mode[20] = "";
printf("LZW Interactive Mode\n");
printf("Commands: ==comp, ==decomp, or just type text\n");
printf("Ctrl+C to exit\n\n");
while (fgets(line, sizeof(line), stdin)) {
line[strcspn(line, "\n")] = '\0';
if (strncmp(line, "==comp", 6) == 0) {
strcpy(mode, "compress");
printf("Compression mode activated\n");
} else if (strncmp(line, "==decomp", 8) == 0) {
strcpy(mode, "decompress");
printf("Decompression mode activated\n");
} else if (strcmp(mode, "compress") == 0) {
double start = get_time_ms();
compress(line);
double time_taken = get_time_ms() - start;
print_codes();
printf("// Compressed %zu chars to %zu codes in %.3f ms\n",
strlen(line), code_count, time_taken);
} else if (strcmp(mode, "decompress") == 0) {
uint16_t input_codes[MAX_INPUT];
size_t count = parse_codes(line, input_codes);
double start = get_time_ms();
char* result = decompress_codes(input_codes, count);
double time_taken = get_time_ms() - start;
if (result) {
printf("%s\n", result);
printf("// Decompressed %zu codes to %zu chars in %.3f ms\n",
count, strlen(result), time_taken);
} else {
printf("Error: Invalid codes\n");
}
}
}
}
// Main function
int main(int argc, char* argv[]) {
printf("LZW Compression Implementation\n");
printf("Compiled with optimizations\n\n");
if (argc > 1 && strcmp(argv[1], "test") == 0) {
// Run test suite
printf("Running comprehensive test suite...\n\n");
int passed = 0;
double total_time = get_time_ms();
for (size_t i = 0; i < num_test_cases; i++) {
if (run_test(&test_cases[i])) {
passed++;
}
}
total_time = get_time_ms() - total_time;
printf("Test Results:\n");
printf(" Passed: %d/%zu\n", passed, num_test_cases);
printf(" Total time: %.3f ms\n", total_time);
printf(" Success rate: %.1f%%\n",
(double)passed / num_test_cases * 100.0);
return (passed == (int)num_test_cases) ? 0 : 1;
} else {
// Interactive mode
interactive_mode();
return 0;
}
}
/*
C o*mpilation flags for optimal performance:
# Release build with optimizations
gcc -std=c11 -Wall -Wextra -O2 -DNDEBUG -march=native -flto lzw.c -o lzw_release
Usage:
./lzw test # Run test suite
./lzw # Interactive mode
Test Examples:
echo "TOBEORNOTTOBEORTOBEORNOT" | ./lzw
./lzw test
*/
@opsec-ee
Copy link
Author

opsec-ee commented Sep 6, 2025

LZW Compression on strings Implementation

Running comprehensive test suite...

Basic LZW Test:    (24→16 codes)
Simple Repetition: (7→4 codes)
Single Character:  (1→1 codes)
Two Characters:    (2→2 codes)
Repeated Pattern:  (9→6 codes)
Long String with Repetition: (65→53 codes)
Numbers:           (14→9 codes)
Mixed Content:     (33→30 codes)

Test Results:
Passed: 8/8
Total time: 3.468 ms
Success rate: 100.0%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment