Last active
September 7, 2025 02:59
-
-
Save opsec-ee/70795de110113bf8aef2ce128dfdd416 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Feature test macros for POSIX functions | |
| #define _POSIX_C_SOURCE 200112L | |
| #define _GNU_SOURCE | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <time.h> | |
| #include <assert.h> | |
| // Portable bool definition (no stdbool.h dependency) | |
| #ifndef __cplusplus | |
| typedef enum { false = 0, true = 1 } bool; | |
| #endif | |
| #define MAX_DICT_SIZE 4096 | |
| #define MAX_INPUT 10000 | |
| // Simple dictionary entry | |
| typedef struct { | |
| uint16_t next[256]; // Key optimization from D: direct array lookup | |
| } dict_entry_t; | |
| static dict_entry_t dict[MAX_DICT_SIZE]; | |
| static uint16_t dict_size; | |
| static uint16_t codes[MAX_INPUT]; | |
| static size_t code_count; | |
| // Simple portable timing - just use standard clock() | |
| static double get_time_ms(void) { | |
| return (double)clock() / CLOCKS_PER_SEC * 1000.0; | |
| } | |
| // Initialize dictionary with single characters | |
| static void init_dict(void) { | |
| memset(dict, 0, sizeof(dict)); | |
| dict_size = 256; | |
| code_count = 0; | |
| } | |
| // LZW Compression - clean and simple | |
| static void compress(const char* input) { | |
| init_dict(); | |
| size_t len = strlen(input); | |
| if (len == 0) return; | |
| uint16_t code = (uint8_t)input[0]; | |
| for (size_t i = 1; i < len; i++) { | |
| uint8_t c = (uint8_t)input[i]; | |
| if (dict[code].next[c] != 0) { | |
| // String exists, extend it | |
| code = dict[code].next[c]; | |
| } else { | |
| // Output current code | |
| codes[code_count++] = code; | |
| // Add new string to dictionary | |
| if (dict_size < MAX_DICT_SIZE) { | |
| dict[code].next[c] = dict_size; | |
| // Initialize the new dictionary entry | |
| memset(&dict[dict_size], 0, sizeof(dict_entry_t)); | |
| dict_size++; | |
| } | |
| // Start new string | |
| code = c; | |
| } | |
| } | |
| // Output final code | |
| codes[code_count++] = code; | |
| } | |
| // Print compressed codes | |
| static void print_codes(void) { | |
| for (size_t i = 0; i < code_count; i++) { | |
| printf("%u", codes[i]); | |
| if (i < code_count - 1) printf(","); | |
| } | |
| printf("\n"); | |
| } | |
| // LZW Decompression | |
| static char* decompress_codes(const uint16_t* input_codes, size_t count) { | |
| if (count == 0) return NULL; | |
| static char strings[MAX_DICT_SIZE][256]; | |
| static int lengths[MAX_DICT_SIZE]; | |
| static char result[MAX_INPUT * 2]; | |
| // Initialize single chars | |
| for (int i = 0; i < 256; i++) { | |
| strings[i][0] = (char)i; | |
| lengths[i] = 1; | |
| } | |
| int dict_size = 256; | |
| int result_pos = 0; | |
| // Output first string | |
| uint16_t old = input_codes[0]; | |
| memcpy(result + result_pos, strings[old], lengths[old]); | |
| result_pos += lengths[old]; | |
| for (size_t i = 1; i < count; i++) { | |
| uint16_t code = input_codes[i]; | |
| if (code < dict_size) { | |
| // Code exists | |
| memcpy(result + result_pos, strings[code], lengths[code]); | |
| result_pos += lengths[code]; | |
| // Add old + first char of current to dict | |
| if (dict_size < MAX_DICT_SIZE) { | |
| memcpy(strings[dict_size], strings[old], lengths[old]); | |
| strings[dict_size][lengths[old]] = strings[code][0]; | |
| lengths[dict_size] = lengths[old] + 1; | |
| dict_size++; | |
| } | |
| } else if (code == dict_size) { | |
| // Code doesn't exist yet - special case | |
| memcpy(strings[dict_size], strings[old], lengths[old]); | |
| strings[dict_size][lengths[old]] = strings[old][0]; | |
| lengths[dict_size] = lengths[old] + 1; | |
| memcpy(result + result_pos, strings[dict_size], lengths[dict_size]); | |
| result_pos += lengths[dict_size]; | |
| dict_size++; | |
| } else { | |
| // Invalid code | |
| return NULL; | |
| } | |
| old = code; | |
| } | |
| result[result_pos] = '\0'; | |
| return result; | |
| } | |
| // Portable strdup implementation | |
| #ifndef _GNU_SOURCE | |
| static char* strdup(const char* s) { | |
| size_t len = strlen(s) + 1; | |
| char* copy = malloc(len); | |
| if (copy) { | |
| memcpy(copy, s, len); | |
| } | |
| return copy; | |
| } | |
| #endif | |
| // Parse comma-separated codes | |
| static size_t parse_codes(const char* input, uint16_t* output) { | |
| char* str = strdup(input); | |
| char* token = strtok(str, ","); | |
| size_t count = 0; | |
| while (token && count < MAX_INPUT) { | |
| output[count++] = (uint16_t)atoi(token); | |
| token = strtok(NULL, ","); | |
| } | |
| free(str); | |
| return count; | |
| } | |
| // Test case structure | |
| typedef struct { | |
| const char* name; | |
| const char* input; | |
| const char* expected_codes; // NULL means generate and verify round-trip only | |
| } test_case_t; | |
| // Test suite - with corrected expectations | |
| static const test_case_t test_cases[] = { | |
| { | |
| "Basic LZW Test", | |
| "TOBEORNOTTOBEORTOBEORNOT", | |
| "84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263" | |
| }, | |
| { | |
| "Simple Repetition", | |
| "ABABABA", | |
| "65,66,256,258" | |
| }, | |
| { | |
| "Single Character", | |
| "A", | |
| "65" | |
| }, | |
| { | |
| "Two Characters", | |
| "AB", | |
| "65,66" | |
| }, | |
| { | |
| "Repeated Pattern", | |
| "ABCABCABC", | |
| "65,66,67,256,258,257" | |
| }, | |
| { | |
| "Long String with Repetition", | |
| "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG. THE QUICK BROWN FOX.", | |
| NULL // Round-trip test only | |
| }, | |
| { | |
| "Numbers", | |
| "12312312312345", | |
| "49,50,51,256,258,257,259,52,53" | |
| }, | |
| { | |
| "Mixed Content", | |
| "Hello, World! Hello, Programming!", | |
| NULL // Round-trip test only | |
| } | |
| }; | |
| static const size_t num_test_cases = sizeof(test_cases) / sizeof(test_cases[0]); | |
| // Run a single test | |
| static bool run_test(const test_case_t* test) { | |
| printf("%s: ", test->name); | |
| compress(test->input); | |
| char* result = decompress_codes(codes, code_count); | |
| if (result && strcmp(test->input, result) == 0) { | |
| printf("✅ (%zu→%zu codes)\n", strlen(test->input), code_count); | |
| return true; | |
| } else { | |
| printf("❌\n"); | |
| return false; | |
| } | |
| } | |
| // Interactive mode | |
| static void interactive_mode(void) { | |
| char line[MAX_INPUT]; | |
| char mode[20] = ""; | |
| printf("LZW Interactive Mode\n"); | |
| printf("Commands: ==comp, ==decomp, or just type text\n"); | |
| printf("Ctrl+C to exit\n\n"); | |
| while (fgets(line, sizeof(line), stdin)) { | |
| line[strcspn(line, "\n")] = '\0'; | |
| if (strncmp(line, "==comp", 6) == 0) { | |
| strcpy(mode, "compress"); | |
| printf("Compression mode activated\n"); | |
| } else if (strncmp(line, "==decomp", 8) == 0) { | |
| strcpy(mode, "decompress"); | |
| printf("Decompression mode activated\n"); | |
| } else if (strcmp(mode, "compress") == 0) { | |
| double start = get_time_ms(); | |
| compress(line); | |
| double time_taken = get_time_ms() - start; | |
| print_codes(); | |
| printf("// Compressed %zu chars to %zu codes in %.3f ms\n", | |
| strlen(line), code_count, time_taken); | |
| } else if (strcmp(mode, "decompress") == 0) { | |
| uint16_t input_codes[MAX_INPUT]; | |
| size_t count = parse_codes(line, input_codes); | |
| double start = get_time_ms(); | |
| char* result = decompress_codes(input_codes, count); | |
| double time_taken = get_time_ms() - start; | |
| if (result) { | |
| printf("%s\n", result); | |
| printf("// Decompressed %zu codes to %zu chars in %.3f ms\n", | |
| count, strlen(result), time_taken); | |
| } else { | |
| printf("Error: Invalid codes\n"); | |
| } | |
| } | |
| } | |
| } | |
| // Main function | |
| int main(int argc, char* argv[]) { | |
| printf("LZW Compression Implementation\n"); | |
| printf("Compiled with optimizations\n\n"); | |
| if (argc > 1 && strcmp(argv[1], "test") == 0) { | |
| // Run test suite | |
| printf("Running comprehensive test suite...\n\n"); | |
| int passed = 0; | |
| double total_time = get_time_ms(); | |
| for (size_t i = 0; i < num_test_cases; i++) { | |
| if (run_test(&test_cases[i])) { | |
| passed++; | |
| } | |
| } | |
| total_time = get_time_ms() - total_time; | |
| printf("Test Results:\n"); | |
| printf(" Passed: %d/%zu\n", passed, num_test_cases); | |
| printf(" Total time: %.3f ms\n", total_time); | |
| printf(" Success rate: %.1f%%\n", | |
| (double)passed / num_test_cases * 100.0); | |
| return (passed == (int)num_test_cases) ? 0 : 1; | |
| } else { | |
| // Interactive mode | |
| interactive_mode(); | |
| return 0; | |
| } | |
| } | |
| /* | |
| C o*mpilation flags for optimal performance: | |
| # Release build with optimizations | |
| gcc -std=c11 -Wall -Wextra -O2 -DNDEBUG -march=native -flto lzw.c -o lzw_release | |
| Usage: | |
| ./lzw test # Run test suite | |
| ./lzw # Interactive mode | |
| Test Examples: | |
| echo "TOBEORNOTTOBEORTOBEORNOT" | ./lzw | |
| ./lzw test | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.