Last active
December 31, 2025 10:37
-
-
Save vurtun/a8df34afb7a4575a2ed076fd3380f56e to your computer and use it in GitHub Desktop.
Kessler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #define _GNU_SOURCE | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <math.h> | |
| #include <time.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <immintrin.h> | |
| #include <raylib.h> | |
| #include <raymath.h> | |
| #include <rlgl.h> | |
| /* --- CONFIG --- */ | |
| #define SIM_AGENT_COUNT 100000 | |
| #define SENTINEL_INDEX SIM_AGENT_COUNT | |
| #define REAL_ARRAY_SIZE (SIM_AGENT_COUNT + 32) | |
| #define NUM_SATELLITES 10000 | |
| #define EARTH_RAD_M 6371000.0f | |
| #define RENDER_SCALE (1.0f / EARTH_RAD_M) | |
| #define EARTH_RAD_KM 6371.0f | |
| #define RECORD_INTERVAL 2 | |
| #define TIME_SCALE 20.0f | |
| /* Colors */ | |
| #define CYAN (Color){ 0, 255, 255, 255 } | |
| /* Collision Params */ | |
| #define PREDICTION_TIME 2.0f | |
| #define WARNING_DIST 20000.0f | |
| #define KILL_DIST 100.0f | |
| #define COORD_OFFSET 2147483648U | |
| /* Physics */ | |
| #define R_LEO_550 (EARTH_RAD_M + 550000.0f) | |
| #define R_GPS (EARTH_RAD_M + 20200000.0f) | |
| #define R_GEO (EARTH_RAD_M + 35786000.0f) | |
| #define R_DEBRIS_MIN (EARTH_RAD_M + 400000.0f) | |
| #define R_DEBRIS_MAX (EARTH_RAD_M + 40000000.0f) | |
| #define MU_EARTH 3.986e14f | |
| #define GOP_SIZE 30 | |
| #define UNIT_SCALE 1.0f /* Meter Scale */ | |
| #define SIM_AGENT_COUNT 100000 | |
| #define REAL_ARRAY_SIZE (SIM_AGENT_COUNT + 32) | |
| #define RECORD_INTERVAL 2 | |
| #define MAX_EVENTS 512 | |
| #define COMP_BUF_SIZE (4 * 1024 * 1024 + 64) | |
| #define SCRUB_MAGIC 0x52425253 /* "SRBR" */ | |
| #define SCRUB_SYNC 0x594E5321 /* "!SNY" */ | |
| #define FORCE_INLINE inline __attribute__((always_inline)) | |
| #define ALIGNED(x) __attribute__((aligned(x))) | |
| #define LIKELY(x) __builtin_expect(!!(x), 1) | |
| #define UNLIKELY(x) __builtin_expect(!!(x), 0) | |
| #define ALIGN_AVX __attribute__((aligned(32))) | |
| typedef __uint128_t uint128; | |
| enum agent_type { | |
| TYPE_DEBRIS = 0, | |
| TYPE_SAT = 1 | |
| }; | |
| enum agent_status { | |
| STATUS_OK = 0, | |
| STATUS_WARN = 1, /* Taking Evasive Action */ | |
| STATUS_HIT = 2 /* Destroyed */ | |
| }; | |
| struct ALIGNED(64) sim_state { | |
| float a[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float e[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float i[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float O[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float v[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float n[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float sz[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float x[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float y[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float z[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float vx[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float vy[REAL_ARRAY_SIZE] ALIGNED(64); | |
| float vz[REAL_ARRAY_SIZE] ALIGNED(64); | |
| /* collision */ | |
| uint128 mkeys[REAL_ARRAY_SIZE]; | |
| uint32_t sort_idx[REAL_ARRAY_SIZE]; | |
| uint32_t sort_tmp[REAL_ARRAY_SIZE]; | |
| /* visuals */ | |
| float anim[REAL_ARRAY_SIZE] ALIGNED(64); | |
| uint8_t type[REAL_ARRAY_SIZE]; | |
| uint8_t status[REAL_ARRAY_SIZE]; | |
| Color colors[REAL_ARRAY_SIZE]; | |
| /* Fixed-Point Persistence (64-bit mm) */ | |
| int64_t last_x[REAL_ARRAY_SIZE] ALIGN_AVX; | |
| int64_t last_y[REAL_ARRAY_SIZE] ALIGN_AVX; | |
| int64_t last_z[REAL_ARRAY_SIZE] ALIGN_AVX; | |
| /* Columnar Delta Staging (32-bit mm) */ | |
| int32_t dx[REAL_ARRAY_SIZE] ALIGN_AVX; | |
| int32_t dy[REAL_ARRAY_SIZE] ALIGN_AVX; | |
| int32_t dz[REAL_ARRAY_SIZE] ALIGN_AVX; | |
| uint8_t comp_buf[COMP_BUF_SIZE]; | |
| uint64_t frame_offsets[1000000]; | |
| uint32_t recorded_frame_count; | |
| }; | |
| static struct sim_state g_sim; | |
| static uint64_t g_total_maneuvers = 0; | |
| static inline unsigned long long | |
| rnd_gen(unsigned long long val, int idx) { | |
| return val + (unsigned long long)idx * 0x9E3779B97F4A7C15llu; | |
| } | |
| static inline unsigned long long | |
| rnd_mix(unsigned long long val) { | |
| val = (val ^ (val >> 30)) * 0xBF58476D1CE4E5B9llu; | |
| val = (val ^ (val >> 27)) * 0x94D049BB133111EBllu; | |
| return val ^ (val >> 31llu); | |
| } | |
| static inline unsigned long long | |
| rnd_split_mix(unsigned long long *state, int idx) { | |
| *state = rnd_gen(*state, idx); | |
| return rnd_mix(*state); | |
| } | |
| static inline unsigned long long | |
| rnd(unsigned long long *state) { | |
| return rnd_split_mix(state, 1); | |
| } | |
| static inline float | |
| rnd_f32(unsigned long long *state) { | |
| unsigned long long r = rnd(state); | |
| return (float)(r >> 40) / (float)(1ULL << 24); | |
| } | |
| static unsigned long long | |
| get_os_seed(void) { | |
| unsigned long long seed = 0; | |
| FILE *f = fopen("/dev/urandom", "rb"); | |
| if (!f || fread(&seed, sizeof(seed), 1, f) != 1) { | |
| seed = (unsigned long long)time(NULL); | |
| } | |
| if (f) { | |
| fclose(f); | |
| } | |
| return seed; | |
| } | |
| static FORCE_INLINE uint64_t | |
| spread_bits_64(uint64_t v) { | |
| v &= 0x1FFFFF; | |
| v = (v | v << 32) & 0x1F00000000FFFFULL; | |
| v = (v | v << 16) & 0x1F0000FF0000FFULL; | |
| v = (v | v << 8) & 0x100F00F00F00F00FULL; | |
| v = (v | v << 4) & 0x10c30c30c30c30c3ULL; | |
| v = (v | v << 2) & 0x1249249249249249ULL; | |
| return v; | |
| } | |
| static FORCE_INLINE uint128 | |
| spread_coord_128(uint32_t c) { | |
| #if defined(VISGRID_USE_AVX2) && defined(__BMI2__) | |
| const uint64_t M1 = 0x9249249249249249ULL; | |
| uint64_t lo = _pdep_u64(c & 0x7FFF, M1); | |
| uint64_t hi = _pdep_u64((c >> 15) & 0x7FFF, M1); | |
| return ((uint128)hi << 45) | (uint128)lo; | |
| #else | |
| uint64_t lo = spread_bits_64(c & 0x7FFF); | |
| uint64_t hi = spread_bits_64((c >> 15) & 0x7FFF); | |
| return ((uint128)hi << 45) | (uint128)lo; | |
| #endif | |
| } | |
| static FORCE_INLINE uint128 | |
| pack_mkey(uint32_t x, uint32_t y, uint32_t z, uint64_t t_mod, uint32_t lvl) { | |
| uint128 sx = spread_coord_128(x); | |
| uint128 sy = spread_coord_128(y); | |
| uint128 sz = spread_coord_128(z); | |
| uint128 spatial = sx | (sy << 1) | (sz << 2); | |
| uint128 k; | |
| const uint128 mask_90 = ((uint128)1 << 90) - 1; | |
| k = ((uint128)lvl << 120) | ((uint128)(t_mod & 0x3FFFFFFF) << 90) | (spatial & mask_90); | |
| uint64_t hi = (uint64_t)(k >> 64); | |
| uint64_t lo = (uint64_t)k; | |
| return ((uint128)__builtin_bswap64(lo) << 64) | __builtin_bswap64(hi); | |
| } | |
| static FORCE_INLINE uint128 | |
| calc_spatial_key(float fx, float fy, float fz) { | |
| int32_t x = (int32_t)fx; | |
| int32_t y = (int32_t)fy; | |
| int32_t z = (int32_t)fz; | |
| uint32_t gx = (x + COORD_OFFSET) >> 12; | |
| uint32_t gy = (y + COORD_OFFSET) >> 12; | |
| uint32_t gz = (z + COORD_OFFSET) >> 12; | |
| return pack_mkey(gx, gy, gz, 0, 0); | |
| } | |
| static void | |
| sort_agents_by_pos(int cnt) { | |
| uint32_t *src = g_sim.sort_idx; | |
| uint32_t *dst = g_sim.sort_tmp; | |
| for(int i=0; i<cnt; i++) { | |
| src[i] = i; | |
| } | |
| for(int i=cnt; i<REAL_ARRAY_SIZE; i++) { | |
| src[i] = SENTINEL_INDEX; | |
| } | |
| for(int i=0; i<cnt; i++) { | |
| g_sim.mkeys[i] = calc_spatial_key(g_sim.x[i], g_sim.y[i], g_sim.z[i]); | |
| } | |
| g_sim.mkeys[SENTINEL_INDEX] = (uint128)-1; | |
| uint32_t hist[256]; | |
| for (int b = 15; b >= 0; b--) { | |
| memset(hist, 0, sizeof(hist)); | |
| for (int i = 0; i < cnt; i++) { | |
| hist[((uint8_t*)&g_sim.mkeys[src[i]])[b]]++; | |
| } | |
| uint32_t sum = 0; | |
| for (int i = 0; i < 256; i++) { | |
| uint32_t t = hist[i]; | |
| hist[i] = sum; | |
| sum += t; | |
| } | |
| for (int i = 0; i < cnt; i++) { | |
| uint32_t si = src[i]; | |
| dst[hist[((uint8_t*)&g_sim.mkeys[si])[b]]++] = si; | |
| } | |
| uint32_t *t = src; | |
| src = dst; | |
| dst = t; | |
| } | |
| } | |
| static int | |
| count_hits_avx2(int cnt) { | |
| int n_hit = 0, i = 0; | |
| __m256i target = _mm256_set1_epi8(STATUS_HIT); | |
| for (; i <= cnt - 32; i += 32) { | |
| __m256i v = _mm256_load_si256((__m256i*)&g_sim.status[i]); | |
| __m256i cmp = _mm256_cmpeq_epi8(v, target); | |
| unsigned int mask = (unsigned int)_mm256_movemask_epi8(cmp); | |
| n_hit += __builtin_popcount(mask); | |
| } | |
| for (; i < cnt; i++) { | |
| if (g_sim.status[i] == STATUS_HIT) { | |
| n_hit++; | |
| } | |
| } | |
| return n_hit; | |
| } | |
| static void | |
| init_sim(int cnt) { | |
| srand(12345); | |
| int s_idx = 0; | |
| int c_leo = 4000; | |
| int c_gps = 3000; | |
| int c_geo = 3000; | |
| /* LEO */ | |
| for (int i = 0; i < c_leo; i++) { | |
| int j = s_idx++; | |
| g_sim.type[j] = TYPE_SAT; | |
| g_sim.colors[j] = CYAN; | |
| g_sim.sz[j] = 50.0f; | |
| g_sim.status[j] = STATUS_OK; | |
| g_sim.anim[j] = 0.0f; | |
| g_sim.e[j] = 0.0f; | |
| g_sim.a[j] = R_LEO_550; | |
| g_sim.i[j] = 53.0f * (PI/180.0f); | |
| int plane = i % 72; | |
| int slot = i / 72; | |
| int per_plane = c_leo / 72; | |
| g_sim.O[j] = ((float)plane / 72.0f) * 2.0f * PI; | |
| g_sim.v[j] = ((float)slot / (float)per_plane) * 2.0f * PI; | |
| g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j])); | |
| } | |
| /* GPS */ | |
| for (int i = 0; i < c_gps; i++) { | |
| int j = s_idx++; | |
| g_sim.type[j] = TYPE_SAT; | |
| g_sim.colors[j] = CYAN; | |
| g_sim.sz[j] = 50.0f; | |
| g_sim.status[j] = STATUS_OK; | |
| g_sim.anim[j] = 0.0f; | |
| g_sim.e[j] = 0.0f; | |
| g_sim.a[j] = R_GPS; | |
| g_sim.i[j] = 55.0f * (PI/180.0f); | |
| g_sim.O[j] = ((float)rand()/(float)RAND_MAX) * 2.0f * PI; | |
| g_sim.v[j] = ((float)rand()/(float)RAND_MAX) * 2.0f * PI; | |
| g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j])); | |
| } | |
| /* GEO */ | |
| for (int i = 0; i < c_geo; i++) { | |
| int j = s_idx++; | |
| g_sim.type[j] = TYPE_SAT; | |
| g_sim.colors[j] = CYAN; | |
| g_sim.sz[j] = 50.0f; | |
| g_sim.status[j] = STATUS_OK; | |
| g_sim.anim[j] = 0.0f; | |
| g_sim.e[j] = 0.0f; | |
| g_sim.a[j] = R_GEO; | |
| g_sim.i[j] = 0.0f; | |
| g_sim.O[j] = 0.0f; | |
| double prog = (double)i / (double)c_geo; | |
| g_sim.v[j] = (float)(prog * 2.0 * PI); | |
| g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j])); | |
| } | |
| /* Debris */ | |
| for (int j = s_idx; j < cnt; j++) { | |
| g_sim.type[j] = TYPE_DEBRIS; | |
| g_sim.colors[j] = WHITE; | |
| g_sim.status[j] = STATUS_OK; | |
| g_sim.anim[j] = 0.0f; | |
| float alt; | |
| int safe = 0; | |
| int attempts = 0; | |
| while (!safe && attempts < 50) { | |
| float r = (float)rand() / (float)RAND_MAX; | |
| alt = R_DEBRIS_MIN + r * r * (R_DEBRIS_MAX - R_DEBRIS_MIN); | |
| if (fabsf(alt - R_GEO) > 2000000.0f) { | |
| safe = 1; | |
| } | |
| attempts++; | |
| } | |
| if (!safe) { | |
| alt = R_LEO_550 + 200000.0f; | |
| } | |
| g_sim.a[j] = alt; | |
| g_sim.e[j] = ((float)rand() / (float)RAND_MAX) * 0.2f; | |
| g_sim.i[j] = ((float)rand() / (float)RAND_MAX) * PI; | |
| g_sim.O[j] = ((float)rand() / (float)RAND_MAX) * 2*PI; | |
| g_sim.v[j] = ((float)rand() / (float)RAND_MAX) * 2*PI; | |
| g_sim.sz[j] = (float)(10 + (rand() % 40)); | |
| g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j])); | |
| } | |
| /* Sentinel Init */ | |
| g_sim.x[SENTINEL_INDEX] = 1e20f; | |
| g_sim.y[SENTINEL_INDEX] = 1e20f; | |
| g_sim.z[SENTINEL_INDEX] = 1e20f; | |
| g_sim.vx[SENTINEL_INDEX] = 0; | |
| g_sim.vy[SENTINEL_INDEX] = 0; | |
| g_sim.vz[SENTINEL_INDEX] = 0; | |
| g_sim.status[SENTINEL_INDEX] = 0; | |
| } | |
| static FORCE_INLINE void | |
| avx_sincos_ps(__m256 x, __m256 *s, __m256 *c) { | |
| #define _AVX_CONST(Name, Val)\ | |
| static const float Name[8] ALIGNED(32) = { Val, Val, Val, Val, Val, Val, Val, Val } | |
| _AVX_CONST(avx_inv_2pi, 0.1591549430918953358f); /* 1 / 2pi */ | |
| _AVX_CONST(avx_2pi, 6.283185307179586477f); | |
| /* Pi split into 3 parts for precision */ | |
| _AVX_CONST(avx_dp1, -6.28125f); | |
| _AVX_CONST(avx_dp2, -0.0019353071795864769253f); | |
| _AVX_CONST(avx_dp3, -0.00000000000000008797f); // Precision tail | |
| /* Cosine Coefficients (Degree 6) */ | |
| _AVX_CONST(avx_c0, -0.4999999963f); | |
| _AVX_CONST(avx_c1, 0.0416666418f); | |
| _AVX_CONST(avx_c2, -0.0013888397f); | |
| _AVX_CONST(avx_c3, 0.0000247609f); | |
| _AVX_CONST(avx_c4, -0.0000002605f); | |
| /* Sine Coefficients (Degree 5) */ | |
| _AVX_CONST(avx_s0, -0.1666666664f); | |
| _AVX_CONST(avx_s1, 0.0083333315f); | |
| _AVX_CONST(avx_s2, -0.0001984090f); | |
| _AVX_CONST(avx_s3, 0.0000027526f); | |
| _AVX_CONST(avx_s4, -0.0000000239f); | |
| _AVX_CONST(avx_one, 1.0f); | |
| _AVX_CONST(avx_half, 0.5f); | |
| /* 1. Range Reduction (Payne-Hanek style) */ | |
| /* q = round(x / 2pi) */ | |
| __m256 q = _mm256_mul_ps(x, _mm256_load_ps(avx_inv_2pi)); | |
| q = _mm256_round_ps(q, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); | |
| /* x = x - q * 2pi (Extended Precision) */ | |
| /* This prevents the "Exploding Orbit" bug by keeping x small [-PI, PI] */ | |
| x = _mm256_fmadd_ps(q, _mm256_load_ps(avx_dp1), x); | |
| x = _mm256_fmadd_ps(q, _mm256_load_ps(avx_dp2), x); | |
| x = _mm256_fmadd_ps(q, _mm256_load_ps(avx_dp3), x); | |
| /* 2. Polynomial Approximation (Minimax) */ | |
| __m256 x2 = _mm256_mul_ps(x, x); | |
| /* Cosine: 1 + x^2 * (C0 + x^2 * (C1 + ...)) */ | |
| __m256 y_cos = _mm256_load_ps(avx_c4); | |
| y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c3)); | |
| y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c2)); | |
| y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c1)); | |
| y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c0)); | |
| y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_one)); | |
| /* Sine: x * (1 + x^2 * (S0 + x^2 * (S1 + ...))) */ | |
| __m256 y_sin = _mm256_load_ps(avx_s4); | |
| y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s3)); | |
| y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s2)); | |
| y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s1)); | |
| y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s0)); | |
| y_sin = _mm256_mul_ps(y_sin, x2); | |
| y_sin = _mm256_fmadd_ps(y_sin, x, x); | |
| *s = y_sin; | |
| *c = y_cos; | |
| } | |
| static void | |
| update_physics(int cnt, float dt_scalar) { | |
| __m256 dt = _mm256_set1_ps(dt_scalar); | |
| __m256 max_anom = _mm256_set1_ps(6.2831853f); /* 2*PI */ | |
| __m256 one = _mm256_set1_ps(1.0f); | |
| __m256 anim_decay = _mm256_set1_ps(0.016f * 0.5f); | |
| __m256 zero = _mm256_setzero_ps(); | |
| for (int i = 0; i < cnt; i += 8) { | |
| /* 1. Dead/Alive Masking */ | |
| /* Note: Status is uint8. Load 64-bit chunk, expand to 32-bit int */ | |
| uint64_t s_chunk = *(uint64_t*)&g_sim.status[i]; | |
| __m128i s_128 = _mm_set_epi64x(0, s_chunk); | |
| __m256i s_int = _mm256_cvtepu8_epi32(s_128); | |
| __m256i dead_mask_i = _mm256_cmpeq_epi32(s_int, _mm256_set1_epi32(2)); | |
| __m256 dead_mask = _mm256_castsi256_ps(dead_mask_i); | |
| /* 2. Load Orbital State */ | |
| __m256 a = _mm256_load_ps(&g_sim.a[i]); | |
| __m256 e = _mm256_load_ps(&g_sim.e[i]); | |
| __m256 incl = _mm256_load_ps(&g_sim.i[i]); | |
| __m256 O = _mm256_load_ps(&g_sim.O[i]); | |
| __m256 v = _mm256_load_ps(&g_sim.v[i]); | |
| __m256 n = _mm256_load_ps(&g_sim.n[i]); | |
| /* 3. Propagate Anomaly */ | |
| /* v += n * dt */ | |
| __m256 v_next = _mm256_fmadd_ps(n, dt, v); | |
| /* Store raw v (let the SinCos range reducer handle the wrapping precision) */ | |
| /* We can do a simple modulo here to keep v from reaching infinity over weeks */ | |
| __m256 wrap_mask = _mm256_cmp_ps(v_next, max_anom, _CMP_GT_OQ); | |
| v_next = _mm256_sub_ps(v_next, _mm256_and_ps(wrap_mask, max_anom)); | |
| _mm256_store_ps(&g_sim.v[i], v_next); | |
| /* 4. High-Precision Trig */ | |
| __m256 cos_v, sin_v, cos_O, sin_O, cos_i, sin_i; | |
| avx_sincos_ps(v_next, &sin_v, &cos_v); | |
| avx_sincos_ps(O, &sin_O, &cos_O); | |
| avx_sincos_ps(incl, &sin_i, &cos_i); | |
| /* 5. Kepler Solution */ | |
| __m256 e2 = _mm256_mul_ps(e, e); | |
| __m256 num = _mm256_mul_ps(a, _mm256_sub_ps(one, e2)); | |
| /* den = 1.0 + e * cos_v */ | |
| __m256 den = _mm256_fmadd_ps(e, cos_v, one); | |
| __m256 r = _mm256_div_ps(num, den); | |
| __m256 px = _mm256_mul_ps(r, cos_v); | |
| __m256 py = _mm256_mul_ps(r, sin_v); | |
| /* 6. Rotation to World */ | |
| /* x = px * cos_O - py * cos_i * sin_O */ | |
| __m256 term = _mm256_mul_ps(cos_i, sin_O); | |
| __m256 new_x = _mm256_fmsub_ps(py, term, _mm256_mul_ps(px, cos_O)); | |
| /* fmsub(a,b,c) = -(a*b) + c ... wait, we want (px*cosO) - (py*term) */ | |
| /* FMSUB: (a*b) - c. FNMADD: -(a*b) + c */ | |
| new_x = _mm256_fnmadd_ps(py, term, _mm256_mul_ps(px, cos_O)); | |
| /* y = px * sin_O + py * cos_i * cos_O */ | |
| term = _mm256_mul_ps(cos_i, cos_O); | |
| __m256 new_y = _mm256_fmadd_ps(py, term, _mm256_mul_ps(px, sin_O)); | |
| /* z = py * sin_i */ | |
| __m256 new_z = _mm256_mul_ps(py, sin_i); | |
| /* 7. Velocity Calc */ | |
| __m256 old_x = _mm256_load_ps(&g_sim.x[i]); | |
| __m256 old_y = _mm256_load_ps(&g_sim.y[i]); | |
| __m256 old_z = _mm256_load_ps(&g_sim.z[i]); | |
| __m256 inv_dt = _mm256_set1_ps(1.0f / dt_scalar); | |
| __m256 new_vx = _mm256_mul_ps(_mm256_sub_ps(new_x, old_x), inv_dt); | |
| __m256 new_vy = _mm256_mul_ps(_mm256_sub_ps(new_y, old_y), inv_dt); | |
| __m256 new_vz = _mm256_mul_ps(_mm256_sub_ps(new_z, old_z), inv_dt); | |
| /* 8. Dead/Alive Blending */ | |
| __m256 anim = _mm256_load_ps(&g_sim.anim[i]); | |
| __m256 anim_next = _mm256_sub_ps(anim, anim_decay); | |
| anim_next = _mm256_max_ps(anim_next, zero); | |
| __m256 final_x = _mm256_blendv_ps(new_x, old_x, dead_mask); | |
| __m256 final_y = _mm256_blendv_ps(new_y, old_y, dead_mask); | |
| __m256 final_z = _mm256_blendv_ps(new_z, old_z, dead_mask); | |
| __m256 final_vx = _mm256_blendv_ps(new_vx, zero, dead_mask); | |
| __m256 final_vy = _mm256_blendv_ps(new_vy, zero, dead_mask); | |
| __m256 final_vz = _mm256_blendv_ps(new_vz, zero, dead_mask); | |
| __m256 final_anim = _mm256_blendv_ps(anim, anim_next, dead_mask); | |
| /* 9. Store */ | |
| _mm256_store_ps(&g_sim.x[i], final_x); | |
| _mm256_store_ps(&g_sim.y[i], final_y); | |
| _mm256_store_ps(&g_sim.z[i], final_z); | |
| _mm256_store_ps(&g_sim.vx[i], final_vx); | |
| _mm256_store_ps(&g_sim.vy[i], final_vy); | |
| _mm256_store_ps(&g_sim.vz[i], final_vz); | |
| _mm256_store_ps(&g_sim.anim[i], final_anim); | |
| } | |
| } | |
| static void | |
| collision_step(int cnt) { | |
| sort_agents_by_pos(cnt); | |
| __m256 pred_t = _mm256_set1_ps(PREDICTION_TIME); | |
| __m256 kill_sq = _mm256_set1_ps(KILL_DIST * KILL_DIST); | |
| __m256 warn_sq = _mm256_set1_ps(WARNING_DIST * WARNING_DIST); | |
| __m256 warn_dist_abs = _mm256_set1_ps(WARNING_DIST); | |
| for (int k = 0; k < cnt; k++) { | |
| int i = g_sim.sort_idx[k]; | |
| if (g_sim.status[i] == STATUS_HIT) { | |
| continue; | |
| } | |
| float pxi_s = g_sim.x[i] + (g_sim.vx[i] * PREDICTION_TIME); | |
| float pyi_s = g_sim.y[i] + (g_sim.vy[i] * PREDICTION_TIME); | |
| float pzi_s = g_sim.z[i] + (g_sim.vz[i] * PREDICTION_TIME); | |
| __m256 pxi = _mm256_set1_ps(pxi_s); | |
| __m256 pyi = _mm256_set1_ps(pyi_s); | |
| __m256 pzi = _mm256_set1_ps(pzi_s); | |
| for (int chunk = 0; chunk < 2; chunk++) { | |
| int base_offset = k + 1 + (chunk * 8); | |
| if (base_offset >= cnt) { | |
| break; | |
| } | |
| __m256i v_idx = _mm256_loadu_si256((__m256i*)&g_sim.sort_idx[base_offset]); | |
| __m256 xj = _mm256_i32gather_ps(g_sim.x, v_idx, 4); | |
| __m256 yj = _mm256_i32gather_ps(g_sim.y, v_idx, 4); | |
| __m256 zj = _mm256_i32gather_ps(g_sim.z, v_idx, 4); | |
| __m256 vxj = _mm256_i32gather_ps(g_sim.vx, v_idx, 4); | |
| __m256 vyj = _mm256_i32gather_ps(g_sim.vy, v_idx, 4); | |
| __m256 vzj = _mm256_i32gather_ps(g_sim.vz, v_idx, 4); | |
| __m256 pxj = _mm256_add_ps(xj, _mm256_mul_ps(vxj, pred_t)); | |
| __m256 pyj = _mm256_add_ps(yj, _mm256_mul_ps(vyj, pred_t)); | |
| __m256 pzj = _mm256_add_ps(zj, _mm256_mul_ps(vzj, pred_t)); | |
| __m256 dx = _mm256_sub_ps(pxi, pxj); | |
| __m256 dy = _mm256_sub_ps(pyi, pyj); | |
| __m256 dz = _mm256_sub_ps(pzi, pzj); | |
| __m256 abs_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)); | |
| __m256 abs_dx = _mm256_and_ps(dx, abs_mask); | |
| __m256 valid_mask = _mm256_cmp_ps(abs_dx, warn_dist_abs, _CMP_LE_OQ); | |
| __m256 d2 = _mm256_add_ps(_mm256_mul_ps(dx,dx), _mm256_add_ps(_mm256_mul_ps(dy,dy), _mm256_mul_ps(dz,dz))); | |
| __m256 is_kill = _mm256_cmp_ps(d2, kill_sq, _CMP_LT_OQ); | |
| __m256 is_warn = _mm256_cmp_ps(d2, warn_sq, _CMP_LT_OQ); | |
| is_kill = _mm256_and_ps(is_kill, valid_mask); | |
| is_warn = _mm256_and_ps(is_warn, valid_mask); | |
| int mask_k = _mm256_movemask_ps(is_kill); | |
| int mask_w = _mm256_movemask_ps(is_warn); | |
| if (mask_k == 0 && mask_w == 0) { | |
| continue; | |
| } | |
| for (int bit = 0; bit < 8; bit++) { | |
| int current_idx_offset = base_offset + bit; | |
| if (current_idx_offset >= cnt) { | |
| break; | |
| } | |
| if ((mask_k >> bit) & 1) { | |
| int j = g_sim.sort_idx[current_idx_offset]; | |
| if (g_sim.status[j] == STATUS_HIT) { | |
| continue; | |
| } | |
| g_sim.status[i] = STATUS_HIT; | |
| g_sim.anim[i] = 1.0f; | |
| g_sim.status[j] = STATUS_HIT; | |
| g_sim.anim[j] = 1.0f; | |
| } | |
| else if ((mask_w >> bit) & 1) { | |
| int j = g_sim.sort_idx[current_idx_offset]; | |
| if (g_sim.status[j] == STATUS_HIT) { | |
| continue; | |
| } | |
| if (g_sim.type[i] == TYPE_SAT) { | |
| g_sim.status[i] = STATUS_WARN; | |
| g_sim.v[i] += 0.0002f; | |
| g_total_maneuvers++; | |
| } | |
| if (g_sim.type[j] == TYPE_SAT) { | |
| g_sim.status[j] = STATUS_WARN; | |
| g_sim.v[j] += 0.0002f; | |
| g_total_maneuvers++; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| static void | |
| DrawRingOrtho(float cx, float cy, float cz, float r, int axis) { | |
| int segs = 16; | |
| float step = (2.0f * PI) / segs; | |
| for (int i = 0; i < segs; i++) { | |
| float a1 = i * step; float a2 = (i + 1) * step; | |
| float x1=0, y1=0, z1=0, x2=0, y2=0, z2=0; | |
| if (axis == 0) { | |
| x1 = cosf(a1)*r; | |
| y1 = sinf(a1)*r; | |
| x2 = cosf(a2)*r; | |
| y2 = sinf(a2)*r; | |
| } else if (axis == 1) { | |
| x1 = cosf(a1)*r; | |
| z1 = sinf(a1)*r; | |
| x2 = cosf(a2)*r; | |
| z2 = sinf(a2)*r; | |
| } else { | |
| y1 = cosf(a1)*r; | |
| z1 = sinf(a1)*r; | |
| y2 = cosf(a2)*r; | |
| z2 = sinf(a2)*r; | |
| } | |
| rlVertex3f(cx + x1, cy + y1, cz + z1); | |
| rlVertex3f(cx + x2, cy + y2, cz + z2); | |
| } | |
| } | |
| static void | |
| DrawScene(int cnt) { | |
| rlBegin(RL_LINES); | |
| for (int i = 0; i < cnt; i++) { | |
| /* SWIZZLE: Map Z-Up Physics to Y-Up Render */ | |
| float rx = g_sim.x[i] * RENDER_SCALE; | |
| float ry = g_sim.z[i] * RENDER_SCALE; /* Z becomes Up (Y) */ | |
| float rz = g_sim.y[i] * RENDER_SCALE; /* Y becomes Depth (Z) */ | |
| /* Pre-calc Velocity Tail Swizzled */ | |
| float rvx = g_sim.vx[i] * RENDER_SCALE; | |
| float rvy = g_sim.vz[i] * RENDER_SCALE; | |
| float rvz = g_sim.vy[i] * RENDER_SCALE; | |
| if (g_sim.status[i] == 2) { | |
| /* Explosions */ | |
| if (g_sim.anim[i] > 0.01f) { | |
| int alpha = (int)(g_sim.anim[i] * 255.0f); | |
| rlColor4ub(255, 60, 0, alpha); | |
| float radius = (1.0f - g_sim.anim[i]) * 3.0f; | |
| DrawRingOrtho(rx, ry, rz, radius, 0); | |
| DrawRingOrtho(rx, ry, rz, radius, 1); | |
| DrawRingOrtho(rx, ry, rz, radius, 2); | |
| float c = 0.1f; | |
| rlVertex3f(rx-c, ry, rz); rlVertex3f(rx+c, ry, rz); | |
| rlVertex3f(rx, ry-c, rz); rlVertex3f(rx, ry+c, rz); | |
| rlVertex3f(rx, ry, rz-c); rlVertex3f(rx, ry, rz+c); | |
| } else { | |
| /* Wreckage */ | |
| rlColor4ub(80, 0, 0, 255); | |
| float w = 0.02f; | |
| rlVertex3f(rx-w, ry, rz-w); rlVertex3f(rx+w, ry, rz+w); | |
| rlVertex3f(rx-w, ry, rz+w); rlVertex3f(rx+w, ry, rz-w); | |
| } | |
| continue; | |
| } | |
| /* Alive Objects */ | |
| Color c = g_sim.colors[i]; | |
| rlColor4ub(c.r, c.g, c.b, 255); | |
| rlVertex3f(rx, ry, rz); | |
| if (g_sim.type[i] == TYPE_SAT) { | |
| rlVertex3f(rx + 0.02f, ry, rz); | |
| } else { | |
| rlVertex3f(rx + 0.01f, ry, rz); | |
| } | |
| } | |
| rlEnd(); | |
| } | |
| static void | |
| UpdateCameraCustom(Camera3D *cam) { | |
| /* 1. Orbit (Right Mouse) */ | |
| if (IsMouseButtonDown(MOUSE_BUTTON_RIGHT)) { | |
| Vector2 delta = GetMouseDelta(); | |
| float speed = 0.003f; | |
| Vector3 pos = Vector3Subtract(cam->position, cam->target); | |
| float radius = Vector3Length(pos); | |
| /* Calculate Theta (Azimuth) and Phi (Elevation) */ | |
| float theta = atan2f(pos.x, pos.z); | |
| float phi = acosf(pos.y / radius); | |
| theta -= delta.x * speed; | |
| phi -= delta.y * speed; /* Drag Up = Decrease Phi = Move Up */ | |
| if (phi < 0.01f) { | |
| phi = 0.01f; | |
| } | |
| if (phi > PI - 0.01f) { | |
| phi = (float)PI - 0.01f; | |
| } | |
| pos.x = radius * sinf(phi) * sinf(theta); | |
| pos.z = radius * sinf(phi) * cosf(theta); | |
| pos.y = radius * cosf(phi); | |
| cam->position = Vector3Add(cam->target, pos); | |
| } | |
| /* 2. Zoom (Mouse Wheel) */ | |
| float wheel = GetMouseWheelMove(); | |
| if (wheel != 0) { | |
| Vector3 pos = Vector3Subtract(cam->position, cam->target); | |
| float radius = Vector3Length(pos); | |
| radius -= wheel * 2.0f; | |
| if (radius < 1.5f) { | |
| radius = 1.5f; | |
| } | |
| pos = Vector3Scale(Vector3Normalize(pos), radius); | |
| cam->position = Vector3Add(cam->target, pos); | |
| } | |
| } | |
| extern int | |
| main(void) { | |
| InitWindow(800, 600, "KESSLER"); | |
| SetTargetFPS(60); | |
| init_sim(SIM_AGENT_COUNT); | |
| Camera3D cam = { 0 }; | |
| cam.position = (Vector3){ 15.0f, 15.0f, 15.0f }; | |
| cam.target = (Vector3){ 0.0f, 0.0f, 0.0f }; | |
| cam.up = (Vector3){ 0.0f, 1.0f, 0.0f }; | |
| cam.fovy = 45.0f; | |
| cam.projection = CAMERA_PERSPECTIVE; | |
| Model earth = {0}; | |
| bool has_model = false; | |
| float model_scale = 1.0f; | |
| if (FileExists("resources/earth.glb")) { | |
| earth = LoadModel("resources/earth.glb"); | |
| BoundingBox bb = GetModelBoundingBox(earth); | |
| float dx = bb.max.x - bb.min.x; | |
| float dy = bb.max.y - bb.min.y; | |
| float dz = bb.max.z - bb.min.z; | |
| float max_dim = (dx > dy) ? (dx > dz ? dx : dz) : (dy > dz ? dy : dz); | |
| if (max_dim > 0.0f) { | |
| model_scale = 2.0f / max_dim; | |
| } | |
| has_model = true; | |
| } | |
| uint64_t sim_time = (uint64_t)time(NULL) * 1000; | |
| uint32_t frame_counter = 0; | |
| bool paused = false; | |
| while (!WindowShouldClose()) { | |
| if (!paused) { | |
| update_physics(SIM_AGENT_COUNT, 0.016f * TIME_SCALE); | |
| collision_step(SIM_AGENT_COUNT); | |
| sim_time += (uint64_t)(16 * TIME_SCALE); | |
| frame_counter++; | |
| } | |
| BeginDrawing(); | |
| ClearBackground(BLACK); | |
| BeginMode3D(cam); | |
| if (has_model) { | |
| double day_prog = (double)(sim_time % 86400000) / 86400000.0; | |
| DrawModelEx(earth, (Vector3){0,0,0}, (Vector3){0,1,0}, (float)(day_prog*360.0), (Vector3){model_scale,model_scale,model_scale}, WHITE); | |
| } else { | |
| DrawSphereWires((Vector3){0,0,0}, 1.0f, 16, 16, DARKGREEN); | |
| DrawSphere((Vector3){0,0,0}, 0.95f, BLACK); | |
| } | |
| DrawScene(SIM_AGENT_COUNT); | |
| EndMode3D(); | |
| int sw = GetScreenWidth(); | |
| DrawFPS(10, 10); | |
| char time_buf[64]; | |
| { | |
| time_t raw_time = sim_time / 1000; | |
| struct tm * tinfo = gmtime(&raw_time); | |
| strftime(time_buf, sizeof(time_buf), "%H:%M:%S UTC", tinfo); | |
| } | |
| int n_hit = count_hits_avx2(SIM_AGENT_COUNT); | |
| DrawText("KESSLER", sw - 280, 10, 20, WHITE); | |
| DrawText("-----------------------", sw - 280, 30, 20, DARKGRAY); | |
| DrawText(TextFormat("TIME: %s", time_buf), sw - 280, 50, 10, LIGHTGRAY); | |
| DrawText(TextFormat("ASSETS: %d", NUM_SATELLITES), sw - 280, 65, 10, CYAN); | |
| float rec_rate = 60.0f / RECORD_INTERVAL; | |
| int ops = (int)(SIM_AGENT_COUNT * rec_rate); | |
| float mb = ops * 80.0f / (1024*1024); | |
| DrawText(TextFormat("INGEST: %.1f M/sec", ops/1000000.0f), sw - 280, 90, 10, GREEN); | |
| DrawText(TextFormat("LOG RATE: %.1f GB/hr", (mb*3600)/1024), sw - 280, 105, 10, GREEN); | |
| DrawText(TextFormat("EVASIONS:%llu", (unsigned long long)g_total_maneuvers), sw - 280, 130, 10, YELLOW); | |
| if (n_hit > 0) { | |
| DrawText(TextFormat("IMPACTS: %d", n_hit), sw - 280, 145, 10, RED); | |
| } else { | |
| DrawText("IMPACTS: NONE", sw - 280, 145, 10, LIME); | |
| } | |
| if (IsKeyPressed(KEY_SPACE)) { | |
| paused = !paused; | |
| } | |
| if (paused) { | |
| UpdateCameraCustom(&cam); | |
| } else { | |
| UpdateCamera(&cam, CAMERA_ORBITAL); | |
| } | |
| EndDrawing(); | |
| } | |
| if(has_model) { | |
| UnloadModel(earth); | |
| } | |
| CloseWindow(); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.