-
-
Save Rexicon226/17727a7f8365144b20c74b0457f2f9b5 to your computer and use it in GitHub Desktop.
Dead simple KangarooTwelve-128 implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <stdlib.h> | |
| // Chunk size for tree hashing | |
| #define CHUNK_SIZE 8192 | |
| #define ALIGN __attribute__ ((aligned (128))) | |
| #define MODE 128 | |
| #if MODE==128 | |
| #define CV_SIZE 32 | |
| #define RATE 168 | |
| #elif | |
| #error "TODO: kt256" | |
| #endif | |
| /* Helper functions */ | |
| typedef unsigned char uchar; | |
| typedef unsigned int uint; | |
| typedef unsigned long ulong; | |
| static inline uint64_t rotl(uint64_t x, unsigned int n) { | |
| return (x << n) | (x >> (-n & 63)); | |
| } | |
| static inline uint64_t load_u64(const uchar *p) { | |
| uint64_t v; | |
| memcpy(&v, p, sizeof(v)); | |
| return v; | |
| } | |
| static inline void store_u64( uint64_t v, uchar *p ) { | |
| memcpy(p, &v, sizeof(v)); | |
| } | |
| #define min(x, y) (x<y) ? x : y | |
| typedef struct re { | |
| uchar bytes[9]; | |
| uchar len; | |
| } re_t; | |
| void kt_right_encode(re_t *r, ulong x) { | |
| if (x == 0) { | |
| r->bytes[0] = 0; | |
| r->len = 1; | |
| return; | |
| } | |
| uchar temp[9]; | |
| ulong len = 0; | |
| for (; x > 0; x /= 256) { | |
| temp[len++] = x % 256; | |
| } | |
| for (int i = 0; i < len; i++) { | |
| r->bytes[i] = temp[len - 1 - i]; | |
| } | |
| r->bytes[len] = len; | |
| r->len = len + 1; | |
| } | |
| typedef struct { | |
| const uchar *ptr; | |
| ulong len; | |
| } slice_t; | |
| typedef struct { | |
| slice_t slices[3]; | |
| ulong offsets[4]; | |
| } msv_t; | |
| void msv_init( msv_t *r, | |
| const uchar *s1, ulong s1len, | |
| const uchar *s2, ulong s2len, | |
| const uchar *s3, ulong s3len ) { | |
| r->slices[0] = (slice_t){s1, s1len}; | |
| r->slices[1] = (slice_t){s2, s2len}; | |
| r->slices[2] = (slice_t){s3, s3len}; | |
| r->offsets[0] = 0; | |
| r->offsets[1] = s1len; | |
| r->offsets[2] = s1len + s2len; | |
| r->offsets[3] = s1len + s2len + s3len; | |
| } | |
| ulong msv_len( const msv_t *v ) { | |
| return v->offsets[3]; | |
| } | |
| uchar msv_byte( const msv_t *v, ulong pos ) { | |
| for (int i = 0; i<3; i++) { | |
| if (pos >= v->offsets[i] && pos < v->offsets[i + 1]) { | |
| return *(v->slices[i].ptr + (pos - v->offsets[i])); | |
| } | |
| } | |
| return 0; // impossible to reach | |
| // __builtin_unreachable(); | |
| } | |
| const uchar *msv_get_range( const msv_t *v, ulong start, ulong end ) { | |
| for (int i = 0; i<3; i++) { | |
| if (start >= v->offsets[i] && end <= v->offsets[i + 1]) { | |
| return v->slices[i].ptr + (start - v->offsets[i]); | |
| } | |
| } | |
| return NULL; | |
| } | |
| void msv_copy_range( const msv_t *v, ulong start, ulong end, uchar *dst ) { | |
| ulong pos = 0; | |
| for (ulong i = start; i<end; i++) { | |
| dst[pos] = msv_byte( v, i ); | |
| pos += 1; | |
| } | |
| } | |
| // Round constants for Keccak-p[1600,12] | |
| uint64_t RC[12] = { | |
| 0x000000008000808B, | |
| 0x800000000000008B, | |
| 0x8000000000008089, | |
| 0x8000000000008003, | |
| 0x8000000000008002, | |
| 0x8000000000000080, | |
| 0x000000000000800A, | |
| 0x800000008000000A, | |
| 0x8000000080008081, | |
| 0x8000000000008080, | |
| 0x0000000080000001, | |
| 0x8000000080008008, | |
| }; | |
| #define KECCAK_PERMUTE keccak_perm_scalar | |
| #define vec_t uint64_t | |
| #define xor(a,b) ((a) ^ (b)) | |
| #define and(a,b) ((a) & (b)) | |
| #define not(a) (~(a)) | |
| #define rotlv(a,n) rotl(a,n) | |
| #define splat(x) (x) | |
| #define lanes 1 | |
| #define R(x,y,r) \ | |
| do { \ | |
| const int dst = (y) + 5 * ((2*(x) + 3*(y)) % 5); \ | |
| B[dst] = rotlv(state[(x) + 5*(y)], (r)); \ | |
| } while (0) | |
| static inline void KECCAK_PERMUTE(vec_t state[25]) { | |
| for (int round=0; round<12; round++) { | |
| vec_t C[5], D[5]; | |
| for (int x = 0; x < 5; x++) { | |
| C[x] = xor( | |
| xor(state[x], state[x+5]), | |
| xor(state[x+10], xor(state[x+15], state[x+20])) | |
| ); | |
| } | |
| for (int x = 0; x < 5; x++) { | |
| D[x] = xor(C[(x + 4) % 5], rotlv(C[(x + 1) % 5], 1)); | |
| } | |
| for (int i = 0; i < 25; i++) { | |
| state[i] = xor(state[i], D[i % 5]); | |
| } | |
| vec_t B[25]; | |
| R(0,0, 0); R(1,0, 1); R(2,0, 62); R(3,0, 28); R(4,0, 27); | |
| R(0,1, 36); R(1,1, 44); R(2,1, 6); R(3,1, 55); R(4,1, 20); | |
| R(0,2, 3); R(1,2, 10); R(2,2, 43); R(3,2, 25); R(4,2, 39); | |
| R(0,3, 41); R(1,3, 45); R(2,3, 15); R(3,3, 21); R(4,3, 8); | |
| R(0,4, 18); R(1,4, 2); R(2,4, 61); R(3,4, 56); R(4,4, 14); | |
| for (int y = 0; y < 5; y++) { | |
| vec_t t0 = B[5 * y + 0]; | |
| vec_t t1 = B[5 * y + 1]; | |
| vec_t t2 = B[5 * y + 2]; | |
| vec_t t3 = B[5 * y + 3]; | |
| vec_t t4 = B[5 * y + 4]; | |
| state[5 * y + 0] = xor(t0, and(not(t1), t2)); | |
| state[5 * y + 1] = xor(t1, and(not(t2), t3)); | |
| state[5 * y + 2] = xor(t2, and(not(t3), t4)); | |
| state[5 * y + 3] = xor(t3, and(not(t4), t0)); | |
| state[5 * y + 4] = xor(t4, and(not(t0), t1)); | |
| } | |
| state[0] = xor(state[0], splat(RC[round])); | |
| } | |
| } | |
| typedef union keccak_state { | |
| uchar bytes[200]; | |
| uint64_t limbs[25]; | |
| } keccak_state_t; | |
| void kt_turbo_shake( const msv_t *v, uchar seperation, uchar *out, ulong outlen) { | |
| keccak_state_t state = { 0 }; | |
| ulong state_pos = 0; | |
| // Absorb all bytes from the MSV | |
| ulong total = msv_len( v ); | |
| ulong position = 0; | |
| while (position < total) { | |
| state.bytes[state_pos] ^= msv_byte( v, position ); | |
| state_pos += 1; | |
| position += 1; | |
| if (state_pos == RATE) { | |
| keccak_perm_scalar( state.limbs ); | |
| state_pos = 0; | |
| } | |
| } | |
| // Add seperation and padding | |
| state.bytes[state_pos] ^= seperation; | |
| state.bytes[RATE - 1] ^= 0x80; | |
| keccak_perm_scalar( state.limbs ); | |
| // Squeeze | |
| ulong offset = 0; | |
| while (offset < outlen) { | |
| ulong chunk = min( RATE, outlen - offset ); | |
| memcpy(out + offset, state.bytes, chunk); | |
| offset += chunk; | |
| if (offset < outlen) { | |
| keccak_perm_scalar( state.limbs ); | |
| } | |
| } | |
| } | |
| // Turbo-shake state | |
| typedef struct ts { | |
| uchar delim; | |
| ulong offset; | |
| uchar buffer[RATE]; | |
| uint64_t state[25]; | |
| } ts_t; | |
| void ts_init( ts_t *r, uchar delim ) { | |
| r->delim = delim; | |
| r->offset = 0; | |
| memset(r->state, 0, sizeof(r->state)); | |
| } | |
| void ts_add_bytes( ts_t *t, const uchar *msg, ulong len ) { | |
| ulong i = 0; | |
| for (; i + 8 <= len; i += 8) { | |
| t->state[i / 8] ^= load_u64( msg + i ); | |
| } | |
| if (i < len) { | |
| uchar padded[8] = { 0 }; | |
| memcpy(padded, msg + i, len - i); | |
| t->state[i / 8] ^= load_u64( padded ); | |
| } | |
| } | |
| void ts_add_byte( ts_t *t, uchar byte, ulong offset ) { | |
| ulong z = 8 * (offset % 8); | |
| t->state[offset / 8] ^= ((uint64_t)byte) << z; | |
| } | |
| /* Extract the first bytes from the Keccak state */ | |
| void ts_extract_bytes( ts_t *t, uchar *out, ulong outlen ) { | |
| ulong i = 0; | |
| for (; i + 8 <= outlen; i += 8) { | |
| store_u64( t->state[i / 8], out + i ); | |
| } | |
| if (i < outlen) { | |
| uchar padded[8]; | |
| store_u64( t->state[i / 8], padded ); | |
| memcpy( out + i, padded, outlen - i ); | |
| } | |
| } | |
| /* Absorb bytes into the TurboSHAKE state */ | |
| void ts_update( ts_t *t, const uchar *msg, ulong len ) { | |
| ulong i = 0; | |
| if (t->offset > 0) { | |
| ulong left = min(RATE - t->offset, len); | |
| memcpy(t->buffer + t->offset, msg, left); | |
| t->offset += left; | |
| if (left == len) return; | |
| if (t->offset == RATE) { | |
| ts_add_bytes( t, t->buffer, RATE ); | |
| keccak_perm_scalar( t->state ); | |
| t->offset = 0; | |
| } | |
| i = left; | |
| } | |
| for (; i + RATE < len; i += RATE) { | |
| ts_add_bytes( t, msg + i, RATE ); | |
| keccak_perm_scalar( t->state ); | |
| } | |
| ulong left = len - i; | |
| if (left > 0) memcpy(t->buffer, msg + i, left); | |
| t->offset = left; | |
| } | |
| /* Squeeze bytes from the TurboSHAKE state */ | |
| void ts_squeeze( ts_t *t, uchar *out, ulong outlen ) { | |
| ulong i = 0; | |
| if (t->offset == RATE) { | |
| keccak_perm_scalar( t->state ); | |
| } else if (t->offset > 0) { | |
| uchar buffer[RATE]; | |
| ts_extract_bytes( t, buffer, RATE ); | |
| ulong left = min(RATE - t->offset, outlen); | |
| memcpy(out, buffer + t->offset, left); | |
| if (left == outlen) return; | |
| if (t->offset == RATE) { | |
| t->offset = 0; | |
| keccak_perm_scalar( t->state ); | |
| } | |
| i = left; | |
| } | |
| for (; i + RATE < outlen; i += RATE) { | |
| ts_extract_bytes( t, out + i, RATE ); | |
| keccak_perm_scalar( t->state ); | |
| } | |
| ulong left = outlen - i; | |
| if (left > 0) { | |
| ts_extract_bytes( t, out + i, left ); | |
| } | |
| t->offset = left; | |
| } | |
| /* Sets the hash of the absorbed bytes */ | |
| void ts_final( ts_t *t, uchar *out, ulong outlen ) { | |
| // Mark the end of the input | |
| ts_add_bytes( t, t->buffer, t->offset ); | |
| if (t->offset == RATE) { | |
| keccak_perm_scalar( t->state ); | |
| t->offset = 0; | |
| } | |
| ts_add_byte( t, t->delim, t->offset ); | |
| ts_add_byte( t, 0x80, RATE - 1 ); | |
| keccak_perm_scalar( t->state ); | |
| t->offset = 0; | |
| // Squeeze into out from the state. | |
| ulong offset = 0; | |
| ulong full_blocks = outlen - (outlen % RATE); | |
| if (full_blocks > 0) { | |
| ts_squeeze( t, out, full_blocks ); | |
| offset += full_blocks; | |
| } | |
| ulong remaining = outlen - offset; | |
| if (remaining > 0) { | |
| uchar buffer[RATE]; | |
| ts_squeeze( t, buffer, RATE ); | |
| memcpy(out + offset, buffer, remaining); | |
| } | |
| } | |
| // KT | |
| void kt_hash(const uchar *msg, ulong msglen, uchar *out, ulong outlen) { | |
| // Right-encode customization length | |
| re_t custom_len_enc[1]; | |
| kt_right_encode(custom_len_enc, 0); | |
| msv_t view[1]; | |
| msv_init( | |
| view, | |
| msg, msglen, /* s1 */ | |
| NULL, 0, /* s2 */ | |
| custom_len_enc->bytes, custom_len_enc->len /* s3 */ | |
| ); | |
| ulong total_length = msv_len( view ); | |
| // If the input fits into a chunk, we can immediately absorb it. | |
| if (total_length <= CHUNK_SIZE) { | |
| kt_turbo_shake( view, 0x07, out, outlen ); | |
| return; | |
| } | |
| // Tree mode | |
| // 1. Create a TurboSHAKE state with delimiter 0x06 | |
| ts_t state[1]; | |
| ts_init( state, 0x06 ); | |
| // 2. Absorb first B bytes from input | |
| { | |
| const uchar *ptr = msv_get_range( view, 0, CHUNK_SIZE ); | |
| if (ptr) { | |
| ts_update( state, ptr, CHUNK_SIZE ); | |
| } else { | |
| uchar first_b_buffer[CHUNK_SIZE]; | |
| msv_copy_range( view, 0, CHUNK_SIZE, first_b_buffer ); | |
| ts_update( state, first_b_buffer, CHUNK_SIZE ); | |
| } | |
| } | |
| // 3. Absorb padding bytes | |
| const uchar padding[8] = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; | |
| ts_update( state, padding, sizeof(padding) ); | |
| ulong j = CHUNK_SIZE; | |
| ulong n = 0; | |
| uchar leaves[CHUNK_SIZE * 8] ALIGN; | |
| uchar cv_buffer[CV_SIZE]; | |
| // 4. Process leaves in SIMD batches | |
| // 5. Process remaining leaves, one at a time | |
| while (j < total_length) { | |
| ulong chunk = min( CHUNK_SIZE, total_length - j ); | |
| msv_t cv[1]; | |
| const uchar *ptr = msv_get_range( view, j, j + chunk ); | |
| if (ptr) { | |
| msv_init( cv, ptr, chunk, NULL, 0, NULL, 0 ); | |
| } else { | |
| msv_init( cv, leaves, chunk, NULL, 0, NULL, 0 ); | |
| msv_copy_range( view, j, j + chunk, leaves ); | |
| } | |
| kt_turbo_shake( cv, 0x0B, cv_buffer, CV_SIZE ); | |
| ts_update( state, cv_buffer, CV_SIZE ); | |
| j += CHUNK_SIZE; | |
| n += 1; | |
| } | |
| // 6. Absorb right_encode(n) | |
| re_t n_enc[1]; | |
| kt_right_encode( n_enc, n ); | |
| ts_update( state, n_enc->bytes, n_enc->len ); | |
| // 7. Absorb terminator | |
| const uchar terminator[2] = { 0xFF, 0xFF }; | |
| ts_update( state, terminator, 2 ); | |
| ts_final( state, out, outlen ); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment