Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active December 31, 2025 10:37
Show Gist options
  • Select an option

  • Save vurtun/a8df34afb7a4575a2ed076fd3380f56e to your computer and use it in GitHub Desktop.

Select an option

Save vurtun/a8df34afb7a4575a2ed076fd3380f56e to your computer and use it in GitHub Desktop.
Kessler
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <stdint.h>
#include <immintrin.h>
#include <raylib.h>
#include <raymath.h>
#include <rlgl.h>
/* --- CONFIG --- */
#define SIM_AGENT_COUNT 100000
#define SENTINEL_INDEX SIM_AGENT_COUNT
#define REAL_ARRAY_SIZE (SIM_AGENT_COUNT + 32)
#define NUM_SATELLITES 10000
#define EARTH_RAD_M 6371000.0f
#define RENDER_SCALE (1.0f / EARTH_RAD_M)
#define EARTH_RAD_KM 6371.0f
#define RECORD_INTERVAL 2
#define TIME_SCALE 20.0f
/* Colors */
#define CYAN (Color){ 0, 255, 255, 255 }
/* Collision Params */
#define PREDICTION_TIME 2.0f
#define WARNING_DIST 20000.0f
#define KILL_DIST 100.0f
#define COORD_OFFSET 2147483648U
/* Physics */
#define R_LEO_550 (EARTH_RAD_M + 550000.0f)
#define R_GPS (EARTH_RAD_M + 20200000.0f)
#define R_GEO (EARTH_RAD_M + 35786000.0f)
#define R_DEBRIS_MIN (EARTH_RAD_M + 400000.0f)
#define R_DEBRIS_MAX (EARTH_RAD_M + 40000000.0f)
#define MU_EARTH 3.986e14f
#define GOP_SIZE 30
#define UNIT_SCALE 1.0f /* Meter Scale */
#define SIM_AGENT_COUNT 100000
#define REAL_ARRAY_SIZE (SIM_AGENT_COUNT + 32)
#define RECORD_INTERVAL 2
#define MAX_EVENTS 512
#define COMP_BUF_SIZE (4 * 1024 * 1024 + 64)
#define SCRUB_MAGIC 0x52425253 /* "SRBR" */
#define SCRUB_SYNC 0x594E5321 /* "!SNY" */
#define FORCE_INLINE inline __attribute__((always_inline))
#define ALIGNED(x) __attribute__((aligned(x)))
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#define ALIGN_AVX __attribute__((aligned(32)))
typedef __uint128_t uint128;
enum agent_type {
TYPE_DEBRIS = 0,
TYPE_SAT = 1
};
enum agent_status {
STATUS_OK = 0,
STATUS_WARN = 1, /* Taking Evasive Action */
STATUS_HIT = 2 /* Destroyed */
};
struct ALIGNED(64) sim_state {
float a[REAL_ARRAY_SIZE] ALIGNED(64);
float e[REAL_ARRAY_SIZE] ALIGNED(64);
float i[REAL_ARRAY_SIZE] ALIGNED(64);
float O[REAL_ARRAY_SIZE] ALIGNED(64);
float v[REAL_ARRAY_SIZE] ALIGNED(64);
float n[REAL_ARRAY_SIZE] ALIGNED(64);
float sz[REAL_ARRAY_SIZE] ALIGNED(64);
float x[REAL_ARRAY_SIZE] ALIGNED(64);
float y[REAL_ARRAY_SIZE] ALIGNED(64);
float z[REAL_ARRAY_SIZE] ALIGNED(64);
float vx[REAL_ARRAY_SIZE] ALIGNED(64);
float vy[REAL_ARRAY_SIZE] ALIGNED(64);
float vz[REAL_ARRAY_SIZE] ALIGNED(64);
/* collision */
uint128 mkeys[REAL_ARRAY_SIZE];
uint32_t sort_idx[REAL_ARRAY_SIZE];
uint32_t sort_tmp[REAL_ARRAY_SIZE];
/* visuals */
float anim[REAL_ARRAY_SIZE] ALIGNED(64);
uint8_t type[REAL_ARRAY_SIZE];
uint8_t status[REAL_ARRAY_SIZE];
Color colors[REAL_ARRAY_SIZE];
/* Fixed-Point Persistence (64-bit mm) */
int64_t last_x[REAL_ARRAY_SIZE] ALIGN_AVX;
int64_t last_y[REAL_ARRAY_SIZE] ALIGN_AVX;
int64_t last_z[REAL_ARRAY_SIZE] ALIGN_AVX;
/* Columnar Delta Staging (32-bit mm) */
int32_t dx[REAL_ARRAY_SIZE] ALIGN_AVX;
int32_t dy[REAL_ARRAY_SIZE] ALIGN_AVX;
int32_t dz[REAL_ARRAY_SIZE] ALIGN_AVX;
uint8_t comp_buf[COMP_BUF_SIZE];
uint64_t frame_offsets[1000000];
uint32_t recorded_frame_count;
};
static struct sim_state g_sim;
static uint64_t g_total_maneuvers = 0;
static inline unsigned long long
rnd_gen(unsigned long long val, int idx) {
return val + (unsigned long long)idx * 0x9E3779B97F4A7C15llu;
}
static inline unsigned long long
rnd_mix(unsigned long long val) {
val = (val ^ (val >> 30)) * 0xBF58476D1CE4E5B9llu;
val = (val ^ (val >> 27)) * 0x94D049BB133111EBllu;
return val ^ (val >> 31llu);
}
static inline unsigned long long
rnd_split_mix(unsigned long long *state, int idx) {
*state = rnd_gen(*state, idx);
return rnd_mix(*state);
}
static inline unsigned long long
rnd(unsigned long long *state) {
return rnd_split_mix(state, 1);
}
static inline float
rnd_f32(unsigned long long *state) {
unsigned long long r = rnd(state);
return (float)(r >> 40) / (float)(1ULL << 24);
}
static unsigned long long
get_os_seed(void) {
unsigned long long seed = 0;
FILE *f = fopen("/dev/urandom", "rb");
if (!f || fread(&seed, sizeof(seed), 1, f) != 1) {
seed = (unsigned long long)time(NULL);
}
if (f) {
fclose(f);
}
return seed;
}
static FORCE_INLINE uint64_t
spread_bits_64(uint64_t v) {
v &= 0x1FFFFF;
v = (v | v << 32) & 0x1F00000000FFFFULL;
v = (v | v << 16) & 0x1F0000FF0000FFULL;
v = (v | v << 8) & 0x100F00F00F00F00FULL;
v = (v | v << 4) & 0x10c30c30c30c30c3ULL;
v = (v | v << 2) & 0x1249249249249249ULL;
return v;
}
static FORCE_INLINE uint128
spread_coord_128(uint32_t c) {
#if defined(VISGRID_USE_AVX2) && defined(__BMI2__)
const uint64_t M1 = 0x9249249249249249ULL;
uint64_t lo = _pdep_u64(c & 0x7FFF, M1);
uint64_t hi = _pdep_u64((c >> 15) & 0x7FFF, M1);
return ((uint128)hi << 45) | (uint128)lo;
#else
uint64_t lo = spread_bits_64(c & 0x7FFF);
uint64_t hi = spread_bits_64((c >> 15) & 0x7FFF);
return ((uint128)hi << 45) | (uint128)lo;
#endif
}
static FORCE_INLINE uint128
pack_mkey(uint32_t x, uint32_t y, uint32_t z, uint64_t t_mod, uint32_t lvl) {
uint128 sx = spread_coord_128(x);
uint128 sy = spread_coord_128(y);
uint128 sz = spread_coord_128(z);
uint128 spatial = sx | (sy << 1) | (sz << 2);
uint128 k;
const uint128 mask_90 = ((uint128)1 << 90) - 1;
k = ((uint128)lvl << 120) | ((uint128)(t_mod & 0x3FFFFFFF) << 90) | (spatial & mask_90);
uint64_t hi = (uint64_t)(k >> 64);
uint64_t lo = (uint64_t)k;
return ((uint128)__builtin_bswap64(lo) << 64) | __builtin_bswap64(hi);
}
static FORCE_INLINE uint128
calc_spatial_key(float fx, float fy, float fz) {
int32_t x = (int32_t)fx;
int32_t y = (int32_t)fy;
int32_t z = (int32_t)fz;
uint32_t gx = (x + COORD_OFFSET) >> 12;
uint32_t gy = (y + COORD_OFFSET) >> 12;
uint32_t gz = (z + COORD_OFFSET) >> 12;
return pack_mkey(gx, gy, gz, 0, 0);
}
static void
sort_agents_by_pos(int cnt) {
uint32_t *src = g_sim.sort_idx;
uint32_t *dst = g_sim.sort_tmp;
for(int i=0; i<cnt; i++) {
src[i] = i;
}
for(int i=cnt; i<REAL_ARRAY_SIZE; i++) {
src[i] = SENTINEL_INDEX;
}
for(int i=0; i<cnt; i++) {
g_sim.mkeys[i] = calc_spatial_key(g_sim.x[i], g_sim.y[i], g_sim.z[i]);
}
g_sim.mkeys[SENTINEL_INDEX] = (uint128)-1;
uint32_t hist[256];
for (int b = 15; b >= 0; b--) {
memset(hist, 0, sizeof(hist));
for (int i = 0; i < cnt; i++) {
hist[((uint8_t*)&g_sim.mkeys[src[i]])[b]]++;
}
uint32_t sum = 0;
for (int i = 0; i < 256; i++) {
uint32_t t = hist[i];
hist[i] = sum;
sum += t;
}
for (int i = 0; i < cnt; i++) {
uint32_t si = src[i];
dst[hist[((uint8_t*)&g_sim.mkeys[si])[b]]++] = si;
}
uint32_t *t = src;
src = dst;
dst = t;
}
}
static int
count_hits_avx2(int cnt) {
int n_hit = 0, i = 0;
__m256i target = _mm256_set1_epi8(STATUS_HIT);
for (; i <= cnt - 32; i += 32) {
__m256i v = _mm256_load_si256((__m256i*)&g_sim.status[i]);
__m256i cmp = _mm256_cmpeq_epi8(v, target);
unsigned int mask = (unsigned int)_mm256_movemask_epi8(cmp);
n_hit += __builtin_popcount(mask);
}
for (; i < cnt; i++) {
if (g_sim.status[i] == STATUS_HIT) {
n_hit++;
}
}
return n_hit;
}
static void
init_sim(int cnt) {
srand(12345);
int s_idx = 0;
int c_leo = 4000;
int c_gps = 3000;
int c_geo = 3000;
/* LEO */
for (int i = 0; i < c_leo; i++) {
int j = s_idx++;
g_sim.type[j] = TYPE_SAT;
g_sim.colors[j] = CYAN;
g_sim.sz[j] = 50.0f;
g_sim.status[j] = STATUS_OK;
g_sim.anim[j] = 0.0f;
g_sim.e[j] = 0.0f;
g_sim.a[j] = R_LEO_550;
g_sim.i[j] = 53.0f * (PI/180.0f);
int plane = i % 72;
int slot = i / 72;
int per_plane = c_leo / 72;
g_sim.O[j] = ((float)plane / 72.0f) * 2.0f * PI;
g_sim.v[j] = ((float)slot / (float)per_plane) * 2.0f * PI;
g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j]));
}
/* GPS */
for (int i = 0; i < c_gps; i++) {
int j = s_idx++;
g_sim.type[j] = TYPE_SAT;
g_sim.colors[j] = CYAN;
g_sim.sz[j] = 50.0f;
g_sim.status[j] = STATUS_OK;
g_sim.anim[j] = 0.0f;
g_sim.e[j] = 0.0f;
g_sim.a[j] = R_GPS;
g_sim.i[j] = 55.0f * (PI/180.0f);
g_sim.O[j] = ((float)rand()/(float)RAND_MAX) * 2.0f * PI;
g_sim.v[j] = ((float)rand()/(float)RAND_MAX) * 2.0f * PI;
g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j]));
}
/* GEO */
for (int i = 0; i < c_geo; i++) {
int j = s_idx++;
g_sim.type[j] = TYPE_SAT;
g_sim.colors[j] = CYAN;
g_sim.sz[j] = 50.0f;
g_sim.status[j] = STATUS_OK;
g_sim.anim[j] = 0.0f;
g_sim.e[j] = 0.0f;
g_sim.a[j] = R_GEO;
g_sim.i[j] = 0.0f;
g_sim.O[j] = 0.0f;
double prog = (double)i / (double)c_geo;
g_sim.v[j] = (float)(prog * 2.0 * PI);
g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j]));
}
/* Debris */
for (int j = s_idx; j < cnt; j++) {
g_sim.type[j] = TYPE_DEBRIS;
g_sim.colors[j] = WHITE;
g_sim.status[j] = STATUS_OK;
g_sim.anim[j] = 0.0f;
float alt;
int safe = 0;
int attempts = 0;
while (!safe && attempts < 50) {
float r = (float)rand() / (float)RAND_MAX;
alt = R_DEBRIS_MIN + r * r * (R_DEBRIS_MAX - R_DEBRIS_MIN);
if (fabsf(alt - R_GEO) > 2000000.0f) {
safe = 1;
}
attempts++;
}
if (!safe) {
alt = R_LEO_550 + 200000.0f;
}
g_sim.a[j] = alt;
g_sim.e[j] = ((float)rand() / (float)RAND_MAX) * 0.2f;
g_sim.i[j] = ((float)rand() / (float)RAND_MAX) * PI;
g_sim.O[j] = ((float)rand() / (float)RAND_MAX) * 2*PI;
g_sim.v[j] = ((float)rand() / (float)RAND_MAX) * 2*PI;
g_sim.sz[j] = (float)(10 + (rand() % 40));
g_sim.n[j] = sqrtf(MU_EARTH / (g_sim.a[j] * g_sim.a[j] * g_sim.a[j]));
}
/* Sentinel Init */
g_sim.x[SENTINEL_INDEX] = 1e20f;
g_sim.y[SENTINEL_INDEX] = 1e20f;
g_sim.z[SENTINEL_INDEX] = 1e20f;
g_sim.vx[SENTINEL_INDEX] = 0;
g_sim.vy[SENTINEL_INDEX] = 0;
g_sim.vz[SENTINEL_INDEX] = 0;
g_sim.status[SENTINEL_INDEX] = 0;
}
static FORCE_INLINE void
avx_sincos_ps(__m256 x, __m256 *s, __m256 *c) {
#define _AVX_CONST(Name, Val)\
static const float Name[8] ALIGNED(32) = { Val, Val, Val, Val, Val, Val, Val, Val }
_AVX_CONST(avx_inv_2pi, 0.1591549430918953358f); /* 1 / 2pi */
_AVX_CONST(avx_2pi, 6.283185307179586477f);
/* Pi split into 3 parts for precision */
_AVX_CONST(avx_dp1, -6.28125f);
_AVX_CONST(avx_dp2, -0.0019353071795864769253f);
_AVX_CONST(avx_dp3, -0.00000000000000008797f); // Precision tail
/* Cosine Coefficients (Degree 6) */
_AVX_CONST(avx_c0, -0.4999999963f);
_AVX_CONST(avx_c1, 0.0416666418f);
_AVX_CONST(avx_c2, -0.0013888397f);
_AVX_CONST(avx_c3, 0.0000247609f);
_AVX_CONST(avx_c4, -0.0000002605f);
/* Sine Coefficients (Degree 5) */
_AVX_CONST(avx_s0, -0.1666666664f);
_AVX_CONST(avx_s1, 0.0083333315f);
_AVX_CONST(avx_s2, -0.0001984090f);
_AVX_CONST(avx_s3, 0.0000027526f);
_AVX_CONST(avx_s4, -0.0000000239f);
_AVX_CONST(avx_one, 1.0f);
_AVX_CONST(avx_half, 0.5f);
/* 1. Range Reduction (Payne-Hanek style) */
/* q = round(x / 2pi) */
__m256 q = _mm256_mul_ps(x, _mm256_load_ps(avx_inv_2pi));
q = _mm256_round_ps(q, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
/* x = x - q * 2pi (Extended Precision) */
/* This prevents the "Exploding Orbit" bug by keeping x small [-PI, PI] */
x = _mm256_fmadd_ps(q, _mm256_load_ps(avx_dp1), x);
x = _mm256_fmadd_ps(q, _mm256_load_ps(avx_dp2), x);
x = _mm256_fmadd_ps(q, _mm256_load_ps(avx_dp3), x);
/* 2. Polynomial Approximation (Minimax) */
__m256 x2 = _mm256_mul_ps(x, x);
/* Cosine: 1 + x^2 * (C0 + x^2 * (C1 + ...)) */
__m256 y_cos = _mm256_load_ps(avx_c4);
y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c3));
y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c2));
y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c1));
y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_c0));
y_cos = _mm256_fmadd_ps(y_cos, x2, _mm256_load_ps(avx_one));
/* Sine: x * (1 + x^2 * (S0 + x^2 * (S1 + ...))) */
__m256 y_sin = _mm256_load_ps(avx_s4);
y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s3));
y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s2));
y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s1));
y_sin = _mm256_fmadd_ps(y_sin, x2, _mm256_load_ps(avx_s0));
y_sin = _mm256_mul_ps(y_sin, x2);
y_sin = _mm256_fmadd_ps(y_sin, x, x);
*s = y_sin;
*c = y_cos;
}
static void
update_physics(int cnt, float dt_scalar) {
__m256 dt = _mm256_set1_ps(dt_scalar);
__m256 max_anom = _mm256_set1_ps(6.2831853f); /* 2*PI */
__m256 one = _mm256_set1_ps(1.0f);
__m256 anim_decay = _mm256_set1_ps(0.016f * 0.5f);
__m256 zero = _mm256_setzero_ps();
for (int i = 0; i < cnt; i += 8) {
/* 1. Dead/Alive Masking */
/* Note: Status is uint8. Load 64-bit chunk, expand to 32-bit int */
uint64_t s_chunk = *(uint64_t*)&g_sim.status[i];
__m128i s_128 = _mm_set_epi64x(0, s_chunk);
__m256i s_int = _mm256_cvtepu8_epi32(s_128);
__m256i dead_mask_i = _mm256_cmpeq_epi32(s_int, _mm256_set1_epi32(2));
__m256 dead_mask = _mm256_castsi256_ps(dead_mask_i);
/* 2. Load Orbital State */
__m256 a = _mm256_load_ps(&g_sim.a[i]);
__m256 e = _mm256_load_ps(&g_sim.e[i]);
__m256 incl = _mm256_load_ps(&g_sim.i[i]);
__m256 O = _mm256_load_ps(&g_sim.O[i]);
__m256 v = _mm256_load_ps(&g_sim.v[i]);
__m256 n = _mm256_load_ps(&g_sim.n[i]);
/* 3. Propagate Anomaly */
/* v += n * dt */
__m256 v_next = _mm256_fmadd_ps(n, dt, v);
/* Store raw v (let the SinCos range reducer handle the wrapping precision) */
/* We can do a simple modulo here to keep v from reaching infinity over weeks */
__m256 wrap_mask = _mm256_cmp_ps(v_next, max_anom, _CMP_GT_OQ);
v_next = _mm256_sub_ps(v_next, _mm256_and_ps(wrap_mask, max_anom));
_mm256_store_ps(&g_sim.v[i], v_next);
/* 4. High-Precision Trig */
__m256 cos_v, sin_v, cos_O, sin_O, cos_i, sin_i;
avx_sincos_ps(v_next, &sin_v, &cos_v);
avx_sincos_ps(O, &sin_O, &cos_O);
avx_sincos_ps(incl, &sin_i, &cos_i);
/* 5. Kepler Solution */
__m256 e2 = _mm256_mul_ps(e, e);
__m256 num = _mm256_mul_ps(a, _mm256_sub_ps(one, e2));
/* den = 1.0 + e * cos_v */
__m256 den = _mm256_fmadd_ps(e, cos_v, one);
__m256 r = _mm256_div_ps(num, den);
__m256 px = _mm256_mul_ps(r, cos_v);
__m256 py = _mm256_mul_ps(r, sin_v);
/* 6. Rotation to World */
/* x = px * cos_O - py * cos_i * sin_O */
__m256 term = _mm256_mul_ps(cos_i, sin_O);
__m256 new_x = _mm256_fmsub_ps(py, term, _mm256_mul_ps(px, cos_O));
/* fmsub(a,b,c) = -(a*b) + c ... wait, we want (px*cosO) - (py*term) */
/* FMSUB: (a*b) - c. FNMADD: -(a*b) + c */
new_x = _mm256_fnmadd_ps(py, term, _mm256_mul_ps(px, cos_O));
/* y = px * sin_O + py * cos_i * cos_O */
term = _mm256_mul_ps(cos_i, cos_O);
__m256 new_y = _mm256_fmadd_ps(py, term, _mm256_mul_ps(px, sin_O));
/* z = py * sin_i */
__m256 new_z = _mm256_mul_ps(py, sin_i);
/* 7. Velocity Calc */
__m256 old_x = _mm256_load_ps(&g_sim.x[i]);
__m256 old_y = _mm256_load_ps(&g_sim.y[i]);
__m256 old_z = _mm256_load_ps(&g_sim.z[i]);
__m256 inv_dt = _mm256_set1_ps(1.0f / dt_scalar);
__m256 new_vx = _mm256_mul_ps(_mm256_sub_ps(new_x, old_x), inv_dt);
__m256 new_vy = _mm256_mul_ps(_mm256_sub_ps(new_y, old_y), inv_dt);
__m256 new_vz = _mm256_mul_ps(_mm256_sub_ps(new_z, old_z), inv_dt);
/* 8. Dead/Alive Blending */
__m256 anim = _mm256_load_ps(&g_sim.anim[i]);
__m256 anim_next = _mm256_sub_ps(anim, anim_decay);
anim_next = _mm256_max_ps(anim_next, zero);
__m256 final_x = _mm256_blendv_ps(new_x, old_x, dead_mask);
__m256 final_y = _mm256_blendv_ps(new_y, old_y, dead_mask);
__m256 final_z = _mm256_blendv_ps(new_z, old_z, dead_mask);
__m256 final_vx = _mm256_blendv_ps(new_vx, zero, dead_mask);
__m256 final_vy = _mm256_blendv_ps(new_vy, zero, dead_mask);
__m256 final_vz = _mm256_blendv_ps(new_vz, zero, dead_mask);
__m256 final_anim = _mm256_blendv_ps(anim, anim_next, dead_mask);
/* 9. Store */
_mm256_store_ps(&g_sim.x[i], final_x);
_mm256_store_ps(&g_sim.y[i], final_y);
_mm256_store_ps(&g_sim.z[i], final_z);
_mm256_store_ps(&g_sim.vx[i], final_vx);
_mm256_store_ps(&g_sim.vy[i], final_vy);
_mm256_store_ps(&g_sim.vz[i], final_vz);
_mm256_store_ps(&g_sim.anim[i], final_anim);
}
}
static void
collision_step(int cnt) {
sort_agents_by_pos(cnt);
__m256 pred_t = _mm256_set1_ps(PREDICTION_TIME);
__m256 kill_sq = _mm256_set1_ps(KILL_DIST * KILL_DIST);
__m256 warn_sq = _mm256_set1_ps(WARNING_DIST * WARNING_DIST);
__m256 warn_dist_abs = _mm256_set1_ps(WARNING_DIST);
for (int k = 0; k < cnt; k++) {
int i = g_sim.sort_idx[k];
if (g_sim.status[i] == STATUS_HIT) {
continue;
}
float pxi_s = g_sim.x[i] + (g_sim.vx[i] * PREDICTION_TIME);
float pyi_s = g_sim.y[i] + (g_sim.vy[i] * PREDICTION_TIME);
float pzi_s = g_sim.z[i] + (g_sim.vz[i] * PREDICTION_TIME);
__m256 pxi = _mm256_set1_ps(pxi_s);
__m256 pyi = _mm256_set1_ps(pyi_s);
__m256 pzi = _mm256_set1_ps(pzi_s);
for (int chunk = 0; chunk < 2; chunk++) {
int base_offset = k + 1 + (chunk * 8);
if (base_offset >= cnt) {
break;
}
__m256i v_idx = _mm256_loadu_si256((__m256i*)&g_sim.sort_idx[base_offset]);
__m256 xj = _mm256_i32gather_ps(g_sim.x, v_idx, 4);
__m256 yj = _mm256_i32gather_ps(g_sim.y, v_idx, 4);
__m256 zj = _mm256_i32gather_ps(g_sim.z, v_idx, 4);
__m256 vxj = _mm256_i32gather_ps(g_sim.vx, v_idx, 4);
__m256 vyj = _mm256_i32gather_ps(g_sim.vy, v_idx, 4);
__m256 vzj = _mm256_i32gather_ps(g_sim.vz, v_idx, 4);
__m256 pxj = _mm256_add_ps(xj, _mm256_mul_ps(vxj, pred_t));
__m256 pyj = _mm256_add_ps(yj, _mm256_mul_ps(vyj, pred_t));
__m256 pzj = _mm256_add_ps(zj, _mm256_mul_ps(vzj, pred_t));
__m256 dx = _mm256_sub_ps(pxi, pxj);
__m256 dy = _mm256_sub_ps(pyi, pyj);
__m256 dz = _mm256_sub_ps(pzi, pzj);
__m256 abs_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));
__m256 abs_dx = _mm256_and_ps(dx, abs_mask);
__m256 valid_mask = _mm256_cmp_ps(abs_dx, warn_dist_abs, _CMP_LE_OQ);
__m256 d2 = _mm256_add_ps(_mm256_mul_ps(dx,dx), _mm256_add_ps(_mm256_mul_ps(dy,dy), _mm256_mul_ps(dz,dz)));
__m256 is_kill = _mm256_cmp_ps(d2, kill_sq, _CMP_LT_OQ);
__m256 is_warn = _mm256_cmp_ps(d2, warn_sq, _CMP_LT_OQ);
is_kill = _mm256_and_ps(is_kill, valid_mask);
is_warn = _mm256_and_ps(is_warn, valid_mask);
int mask_k = _mm256_movemask_ps(is_kill);
int mask_w = _mm256_movemask_ps(is_warn);
if (mask_k == 0 && mask_w == 0) {
continue;
}
for (int bit = 0; bit < 8; bit++) {
int current_idx_offset = base_offset + bit;
if (current_idx_offset >= cnt) {
break;
}
if ((mask_k >> bit) & 1) {
int j = g_sim.sort_idx[current_idx_offset];
if (g_sim.status[j] == STATUS_HIT) {
continue;
}
g_sim.status[i] = STATUS_HIT;
g_sim.anim[i] = 1.0f;
g_sim.status[j] = STATUS_HIT;
g_sim.anim[j] = 1.0f;
}
else if ((mask_w >> bit) & 1) {
int j = g_sim.sort_idx[current_idx_offset];
if (g_sim.status[j] == STATUS_HIT) {
continue;
}
if (g_sim.type[i] == TYPE_SAT) {
g_sim.status[i] = STATUS_WARN;
g_sim.v[i] += 0.0002f;
g_total_maneuvers++;
}
if (g_sim.type[j] == TYPE_SAT) {
g_sim.status[j] = STATUS_WARN;
g_sim.v[j] += 0.0002f;
g_total_maneuvers++;
}
}
}
}
}
}
static void
DrawRingOrtho(float cx, float cy, float cz, float r, int axis) {
int segs = 16;
float step = (2.0f * PI) / segs;
for (int i = 0; i < segs; i++) {
float a1 = i * step; float a2 = (i + 1) * step;
float x1=0, y1=0, z1=0, x2=0, y2=0, z2=0;
if (axis == 0) {
x1 = cosf(a1)*r;
y1 = sinf(a1)*r;
x2 = cosf(a2)*r;
y2 = sinf(a2)*r;
} else if (axis == 1) {
x1 = cosf(a1)*r;
z1 = sinf(a1)*r;
x2 = cosf(a2)*r;
z2 = sinf(a2)*r;
} else {
y1 = cosf(a1)*r;
z1 = sinf(a1)*r;
y2 = cosf(a2)*r;
z2 = sinf(a2)*r;
}
rlVertex3f(cx + x1, cy + y1, cz + z1);
rlVertex3f(cx + x2, cy + y2, cz + z2);
}
}
static void
DrawScene(int cnt) {
rlBegin(RL_LINES);
for (int i = 0; i < cnt; i++) {
/* SWIZZLE: Map Z-Up Physics to Y-Up Render */
float rx = g_sim.x[i] * RENDER_SCALE;
float ry = g_sim.z[i] * RENDER_SCALE; /* Z becomes Up (Y) */
float rz = g_sim.y[i] * RENDER_SCALE; /* Y becomes Depth (Z) */
/* Pre-calc Velocity Tail Swizzled */
float rvx = g_sim.vx[i] * RENDER_SCALE;
float rvy = g_sim.vz[i] * RENDER_SCALE;
float rvz = g_sim.vy[i] * RENDER_SCALE;
if (g_sim.status[i] == 2) {
/* Explosions */
if (g_sim.anim[i] > 0.01f) {
int alpha = (int)(g_sim.anim[i] * 255.0f);
rlColor4ub(255, 60, 0, alpha);
float radius = (1.0f - g_sim.anim[i]) * 3.0f;
DrawRingOrtho(rx, ry, rz, radius, 0);
DrawRingOrtho(rx, ry, rz, radius, 1);
DrawRingOrtho(rx, ry, rz, radius, 2);
float c = 0.1f;
rlVertex3f(rx-c, ry, rz); rlVertex3f(rx+c, ry, rz);
rlVertex3f(rx, ry-c, rz); rlVertex3f(rx, ry+c, rz);
rlVertex3f(rx, ry, rz-c); rlVertex3f(rx, ry, rz+c);
} else {
/* Wreckage */
rlColor4ub(80, 0, 0, 255);
float w = 0.02f;
rlVertex3f(rx-w, ry, rz-w); rlVertex3f(rx+w, ry, rz+w);
rlVertex3f(rx-w, ry, rz+w); rlVertex3f(rx+w, ry, rz-w);
}
continue;
}
/* Alive Objects */
Color c = g_sim.colors[i];
rlColor4ub(c.r, c.g, c.b, 255);
rlVertex3f(rx, ry, rz);
if (g_sim.type[i] == TYPE_SAT) {
rlVertex3f(rx + 0.02f, ry, rz);
} else {
rlVertex3f(rx + 0.01f, ry, rz);
}
}
rlEnd();
}
static void
UpdateCameraCustom(Camera3D *cam) {
/* 1. Orbit (Right Mouse) */
if (IsMouseButtonDown(MOUSE_BUTTON_RIGHT)) {
Vector2 delta = GetMouseDelta();
float speed = 0.003f;
Vector3 pos = Vector3Subtract(cam->position, cam->target);
float radius = Vector3Length(pos);
/* Calculate Theta (Azimuth) and Phi (Elevation) */
float theta = atan2f(pos.x, pos.z);
float phi = acosf(pos.y / radius);
theta -= delta.x * speed;
phi -= delta.y * speed; /* Drag Up = Decrease Phi = Move Up */
if (phi < 0.01f) {
phi = 0.01f;
}
if (phi > PI - 0.01f) {
phi = (float)PI - 0.01f;
}
pos.x = radius * sinf(phi) * sinf(theta);
pos.z = radius * sinf(phi) * cosf(theta);
pos.y = radius * cosf(phi);
cam->position = Vector3Add(cam->target, pos);
}
/* 2. Zoom (Mouse Wheel) */
float wheel = GetMouseWheelMove();
if (wheel != 0) {
Vector3 pos = Vector3Subtract(cam->position, cam->target);
float radius = Vector3Length(pos);
radius -= wheel * 2.0f;
if (radius < 1.5f) {
radius = 1.5f;
}
pos = Vector3Scale(Vector3Normalize(pos), radius);
cam->position = Vector3Add(cam->target, pos);
}
}
extern int
main(void) {
InitWindow(800, 600, "KESSLER");
SetTargetFPS(60);
init_sim(SIM_AGENT_COUNT);
Camera3D cam = { 0 };
cam.position = (Vector3){ 15.0f, 15.0f, 15.0f };
cam.target = (Vector3){ 0.0f, 0.0f, 0.0f };
cam.up = (Vector3){ 0.0f, 1.0f, 0.0f };
cam.fovy = 45.0f;
cam.projection = CAMERA_PERSPECTIVE;
Model earth = {0};
bool has_model = false;
float model_scale = 1.0f;
if (FileExists("resources/earth.glb")) {
earth = LoadModel("resources/earth.glb");
BoundingBox bb = GetModelBoundingBox(earth);
float dx = bb.max.x - bb.min.x;
float dy = bb.max.y - bb.min.y;
float dz = bb.max.z - bb.min.z;
float max_dim = (dx > dy) ? (dx > dz ? dx : dz) : (dy > dz ? dy : dz);
if (max_dim > 0.0f) {
model_scale = 2.0f / max_dim;
}
has_model = true;
}
uint64_t sim_time = (uint64_t)time(NULL) * 1000;
uint32_t frame_counter = 0;
bool paused = false;
while (!WindowShouldClose()) {
if (!paused) {
update_physics(SIM_AGENT_COUNT, 0.016f * TIME_SCALE);
collision_step(SIM_AGENT_COUNT);
sim_time += (uint64_t)(16 * TIME_SCALE);
frame_counter++;
}
BeginDrawing();
ClearBackground(BLACK);
BeginMode3D(cam);
if (has_model) {
double day_prog = (double)(sim_time % 86400000) / 86400000.0;
DrawModelEx(earth, (Vector3){0,0,0}, (Vector3){0,1,0}, (float)(day_prog*360.0), (Vector3){model_scale,model_scale,model_scale}, WHITE);
} else {
DrawSphereWires((Vector3){0,0,0}, 1.0f, 16, 16, DARKGREEN);
DrawSphere((Vector3){0,0,0}, 0.95f, BLACK);
}
DrawScene(SIM_AGENT_COUNT);
EndMode3D();
int sw = GetScreenWidth();
DrawFPS(10, 10);
char time_buf[64];
{
time_t raw_time = sim_time / 1000;
struct tm * tinfo = gmtime(&raw_time);
strftime(time_buf, sizeof(time_buf), "%H:%M:%S UTC", tinfo);
}
int n_hit = count_hits_avx2(SIM_AGENT_COUNT);
DrawText("KESSLER", sw - 280, 10, 20, WHITE);
DrawText("-----------------------", sw - 280, 30, 20, DARKGRAY);
DrawText(TextFormat("TIME: %s", time_buf), sw - 280, 50, 10, LIGHTGRAY);
DrawText(TextFormat("ASSETS: %d", NUM_SATELLITES), sw - 280, 65, 10, CYAN);
float rec_rate = 60.0f / RECORD_INTERVAL;
int ops = (int)(SIM_AGENT_COUNT * rec_rate);
float mb = ops * 80.0f / (1024*1024);
DrawText(TextFormat("INGEST: %.1f M/sec", ops/1000000.0f), sw - 280, 90, 10, GREEN);
DrawText(TextFormat("LOG RATE: %.1f GB/hr", (mb*3600)/1024), sw - 280, 105, 10, GREEN);
DrawText(TextFormat("EVASIONS:%llu", (unsigned long long)g_total_maneuvers), sw - 280, 130, 10, YELLOW);
if (n_hit > 0) {
DrawText(TextFormat("IMPACTS: %d", n_hit), sw - 280, 145, 10, RED);
} else {
DrawText("IMPACTS: NONE", sw - 280, 145, 10, LIME);
}
if (IsKeyPressed(KEY_SPACE)) {
paused = !paused;
}
if (paused) {
UpdateCameraCustom(&cam);
} else {
UpdateCamera(&cam, CAMERA_ORBITAL);
}
EndDrawing();
}
if(has_model) {
UnloadModel(earth);
}
CloseWindow();
return 0;
}
@vurtun
Copy link
Author

vurtun commented Dec 29, 2025

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment