Last active
December 19, 2025 22:41
-
-
Save vurtun/178045842840b73636383890ade486b4 to your computer and use it in GitHub Desktop.
Foot IK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdint.h> | |
| #include <stdalign.h> /* C11 alignas */ | |
| #include <immintrin.h> /* AVX2 intrinsics */ | |
| #include <float.h> | |
| /* -------------------------------------------------------------------------- | |
| * SYSTEM CONFIGURATION | |
| * -------------------------------------------------------------------------- */ | |
| #define BATCH_SIZE 8 | |
| #define MAX_ENTITIES 512 | |
| #define ALIGN_CACHE 64 | |
| /* | |
| * MATH_BIAS: A tiny offset added to the reference vector. | |
| * Prevents Division-By-Zero if the animator poses the leg perfectly straight. | |
| */ | |
| #define MATH_EPSILON 0.00001f | |
| #define MATH_BIAS 0.00010f | |
| /* -------------------------------------------------------------------------- | |
| * DATA STRUCTURES | |
| * -------------------------------------------------------------------------- */ | |
| struct ik_system { | |
| /* --- INPUTS --- */ | |
| alignas(ALIGN_CACHE) float hip_x[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float hip_y[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float hip_z[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float target_x[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float target_y[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float target_z[MAX_ENTITIES]; | |
| /* The knee position from the animation pose (Reference Pole) */ | |
| alignas(ALIGN_CACHE) float anim_knee_x[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float anim_knee_y[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float anim_knee_z[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float upper_len[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float lower_len[MAX_ENTITIES]; | |
| /* --- OUTPUTS --- */ | |
| alignas(ALIGN_CACHE) float knee_res_x[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float knee_res_y[MAX_ENTITIES]; | |
| alignas(ALIGN_CACHE) float knee_res_z[MAX_ENTITIES]; | |
| }; | |
| /* Global Storage (Static .bss allocation) */ | |
| static struct ik_system global_ik_system; | |
| /* -------------------------------------------------------------------------- | |
| * MATH INTRINSICS | |
| * -------------------------------------------------------------------------- */ | |
| static inline __m256 | |
| math_dot_avx(__m256 ax, __m256 ay, __m256 az, | |
| __m256 bx, __m256 by, __m256 bz) { | |
| __m256 res = _mm256_mul_ps(ax, bx); | |
| res = _mm256_fmadd_ps(ay, by, res); | |
| res = _mm256_fmadd_ps(az, bz, res); | |
| return res; | |
| } | |
| static inline void | |
| math_normalize_avx(__m256 *x, __m256 *y, __m256 *z) { | |
| __m256 dot = math_dot_avx(*x, *y, *z, *x, *y, *z); | |
| __m256 inv = _mm256_rsqrt_ps(dot); | |
| *x = _mm256_mul_ps(*x, inv); | |
| *y = _mm256_mul_ps(*y, inv); | |
| *z = _mm256_mul_ps(*z, inv); | |
| } | |
| /* -------------------------------------------------------------------------- | |
| * LOGIC | |
| * -------------------------------------------------------------------------- */ | |
| /* | |
| * AVX2 Optimized Initialization. | |
| * Uses 256-bit stores to fill memory 8x faster than scalar loops. | |
| * Ensuring deterministic "warm-up" of the cache lines. | |
| */ | |
| extern void | |
| ik_init(struct ik_system *sys) { | |
| /* Prepare Constants */ | |
| const __m256 v_zero = _mm256_setzero_ps(); | |
| const __m256 v_hip_y = _mm256_set1_ps(20.0f); | |
| /* Default Knee: Slightly bent forward to ensure valid initial plane */ | |
| const __m256 v_knee_y = _mm256_set1_ps(10.0f); | |
| const __m256 v_knee_z = _mm256_set1_ps(5.0f); | |
| const __m256 v_len = _mm256_set1_ps(10.0f); | |
| for (int i = 0; i < MAX_ENTITIES; i += BATCH_SIZE) { | |
| /* Inputs: Hip */ | |
| _mm256_store_ps(&sys->hip_x[i], v_zero); | |
| _mm256_store_ps(&sys->hip_y[i], v_hip_y); | |
| _mm256_store_ps(&sys->hip_z[i], v_zero); | |
| /* Inputs: Target */ | |
| _mm256_store_ps(&sys->target_x[i], v_zero); | |
| _mm256_store_ps(&sys->target_y[i], v_zero); | |
| _mm256_store_ps(&sys->target_z[i], v_zero); | |
| /* Inputs: Animated Knee (Reference) */ | |
| _mm256_store_ps(&sys->anim_knee_x[i], v_zero); | |
| _mm256_store_ps(&sys->anim_knee_y[i], v_knee_y); | |
| _mm256_store_ps(&sys->anim_knee_z[i], v_knee_z); | |
| /* Inputs: Lengths */ | |
| _mm256_store_ps(&sys->upper_len[i], v_len); | |
| _mm256_store_ps(&sys->lower_len[i], v_len); | |
| /* Outputs: Result */ | |
| _mm256_store_ps(&sys->knee_res_x[i], v_zero); | |
| _mm256_store_ps(&sys->knee_res_y[i], v_zero); | |
| _mm256_store_ps(&sys->knee_res_z[i], v_zero); | |
| } | |
| } | |
| extern void | |
| ik_solve(struct ik_system * restrict sys, int start, int end) { | |
| const __m256 v_epsilon = _mm256_set1_ps(MATH_EPSILON); | |
| const __m256 v_bias = _mm256_set1_ps(MATH_BIAS); | |
| const __m256 v_zero = _mm256_setzero_ps(); | |
| const __m256 v_two = _mm256_set1_ps(2.0f); | |
| for (int i = start; i < end; i += BATCH_SIZE) { | |
| /* --- 1. Load Data --- */ | |
| __m256 hip_x = _mm256_load_ps(&sys->hip_x[i]); | |
| __m256 hip_y = _mm256_load_ps(&sys->hip_y[i]); | |
| __m256 hip_z = _mm256_load_ps(&sys->hip_z[i]); | |
| __m256 tgt_x = _mm256_load_ps(&sys->target_x[i]); | |
| __m256 tgt_y = _mm256_load_ps(&sys->target_y[i]); | |
| __m256 tgt_z = _mm256_load_ps(&sys->target_z[i]); | |
| __m256 l1 = _mm256_load_ps(&sys->upper_len[i]); | |
| __m256 l2 = _mm256_load_ps(&sys->lower_len[i]); | |
| /* --- 2. Calculate Axis (Hip -> Target) --- */ | |
| __m256 u_x = _mm256_sub_ps(tgt_x, hip_x); | |
| __m256 u_y = _mm256_sub_ps(tgt_y, hip_y); | |
| __m256 u_z = _mm256_sub_ps(tgt_z, hip_z); | |
| __m256 dist_sq = math_dot_avx(u_x, u_y, u_z, u_x, u_y, u_z); | |
| __m256 dist = _mm256_sqrt_ps(dist_sq); | |
| /* --- 3. Branchless Triangle Logic --- */ | |
| __m256 total_len = _mm256_add_ps(l1, l2); | |
| __m256 max_reach = _mm256_sub_ps(total_len, v_epsilon); | |
| __m256 c_dist = _mm256_min_ps(dist, max_reach); | |
| c_dist = _mm256_max_ps(c_dist, v_epsilon); | |
| /* Law of Cosines */ | |
| __m256 l1_sq = _mm256_mul_ps(l1, l1); | |
| __m256 l2_sq = _mm256_mul_ps(l2, l2); | |
| __m256 c_sq = _mm256_mul_ps(c_dist, c_dist); | |
| __m256 num = _mm256_add_ps(l1_sq, _mm256_sub_ps(c_sq, l2_sq)); | |
| __m256 den = _mm256_mul_ps(v_two, _mm256_mul_ps(l1, c_dist)); | |
| __m256 cos_a = _mm256_div_ps(num, den); | |
| __m256 adj = _mm256_mul_ps(l1, cos_a); | |
| __m256 opp_sq = _mm256_sub_ps(l1_sq, _mm256_mul_ps(adj, adj)); | |
| __m256 opp = _mm256_sqrt_ps(_mm256_max_ps(opp_sq, v_zero)); | |
| /* --- 4. Basis Construction (Anim Knee Reference) --- */ | |
| __m256 anim_x = _mm256_load_ps(&sys->anim_knee_x[i]); | |
| __m256 anim_y = _mm256_load_ps(&sys->anim_knee_y[i]); | |
| __m256 anim_z = _mm256_load_ps(&sys->anim_knee_z[i]); | |
| __m256 ref_x = _mm256_sub_ps(anim_x, hip_x); | |
| __m256 ref_y = _mm256_sub_ps(anim_y, hip_y); | |
| __m256 ref_z = _mm256_sub_ps(anim_z, hip_z); | |
| /* Bias to prevent singularity on straight legs */ | |
| ref_x = _mm256_add_ps(ref_x, v_bias); | |
| math_normalize_avx(&u_x, &u_y, &u_z); | |
| /* V = Cross(U, Ref) */ | |
| __m256 v_x = _mm256_sub_ps(_mm256_mul_ps(u_y, ref_z), _mm256_mul_ps(u_z, ref_y)); | |
| __m256 v_y = _mm256_sub_ps(_mm256_mul_ps(u_z, ref_x), _mm256_mul_ps(u_x, ref_z)); | |
| __m256 v_z = _mm256_sub_ps(_mm256_mul_ps(u_x, ref_y), _mm256_mul_ps(u_y, ref_x)); | |
| math_normalize_avx(&v_x, &v_y, &v_z); | |
| /* W = Cross(V, U) */ | |
| __m256 w_x = _mm256_sub_ps(_mm256_mul_ps(v_y, u_z), _mm256_mul_ps(v_z, u_y)); | |
| __m256 w_y = _mm256_sub_ps(_mm256_mul_ps(v_z, u_x), _mm256_mul_ps(v_x, u_z)); | |
| __m256 w_z = _mm256_sub_ps(_mm256_mul_ps(v_x, u_y), _mm256_mul_ps(v_y, u_x)); | |
| math_normalize_avx(&w_x, &w_y, &w_z); | |
| /* --- 5. Final Position --- */ | |
| __m256 p1_x = _mm256_mul_ps(u_x, adj); | |
| __m256 p1_y = _mm256_mul_ps(u_y, adj); | |
| __m256 p1_z = _mm256_mul_ps(u_z, adj); | |
| __m256 p2_x = _mm256_mul_ps(w_x, opp); | |
| __m256 p2_y = _mm256_mul_ps(w_y, opp); | |
| __m256 p2_z = _mm256_mul_ps(w_z, opp); | |
| __m256 res_x = _mm256_add_ps(hip_x, _mm256_add_ps(p1_x, p2_x)); | |
| __m256 res_y = _mm256_add_ps(hip_y, _mm256_add_ps(p1_y, p2_y)); | |
| __m256 res_z = _mm256_add_ps(hip_z, _mm256_add_ps(p1_z, p2_z)); | |
| /* --- 6. Store --- */ | |
| _mm256_store_ps(&sys->knee_res_x[i], res_x); | |
| _mm256_store_ps(&sys->knee_res_y[i], res_y); | |
| _mm256_store_ps(&sys->knee_res_z[i], res_z); | |
| } | |
| } | |
| extern void | |
| ik_run(void) { | |
| ik_solve(&global_ik_system, 0, MAX_ENTITIES); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "raylib.h" | |
| #include "raymath.h" | |
| #include <math.h> | |
| extern int | |
| main(void) { | |
| // 1. Initialization | |
| const int screenWidth = 600; | |
| const int screenHeight = 480; | |
| InitWindow(screenWidth, screenHeight, "IK Test"); | |
| // 2. Setup Camera | |
| Camera3D camera = { 0 }; | |
| camera.position = (Vector3){ 40.0f, 40.0f, 40.0f }; | |
| camera.target = (Vector3){ 0.0f, 10.0f, 0.0f }; | |
| camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; | |
| camera.fovy = 45.0f; | |
| camera.projection = CAMERA_PERSPECTIVE; | |
| // 3. Initialize IK System | |
| ik_init(&global_ik_system); | |
| SetTargetFPS(60); | |
| float time = 0.0f; | |
| while (!WindowShouldClose()) { | |
| time += GetFrameTime(); | |
| // --- UPDATE IK INPUTS --- | |
| int i = 0; // We only visualize the first entity | |
| // Fix Hip in space | |
| global_ik_system.hip_x[i] = 0.0f; | |
| global_ik_system.hip_y[i] = 25.0f; | |
| global_ik_system.hip_z[i] = 0.0f; | |
| // Make the Target (Foot) move in a circle on the floor | |
| global_ik_system.target_x[i] = sinf(time * 2.0f) * 15.0f; | |
| global_ik_system.target_y[i] = 0.0f; // On the ground | |
| global_ik_system.target_z[i] = cosf(time * 2.0f) * 15.0f; | |
| // Make the "Animated Knee" (Reference) orbit slowly | |
| // This demonstrates how the knee follows the animator's "hint" | |
| global_ik_system.anim_knee_x[i] = sinf(time * 0.5f) * 10.0f; | |
| global_ik_system.anim_knee_y[i] = 12.0f; | |
| global_ik_system.anim_knee_z[i] = cosf(time * 0.5f) * 10.0f; | |
| global_ik_system.upper_len[i] = 15.0f; | |
| global_ik_system.lower_len[i] = 15.0f; | |
| // --- RUN SOLVER --- | |
| ik_run(); | |
| // --- DRAWING --- | |
| BeginDrawing(); | |
| ClearBackground(DARKGRAY); | |
| BeginMode3D(camera); | |
| DrawGrid(20, 5.0f); | |
| // Get Coordinates | |
| Vector3 hip = { global_ik_system.hip_x[i], global_ik_system.hip_y[i], global_ik_system.hip_z[i] }; | |
| Vector3 target = { global_ik_system.target_x[i], global_ik_system.target_y[i], global_ik_system.target_z[i] }; | |
| Vector3 animKnee = { global_ik_system.anim_knee_x[i], global_ik_system.anim_knee_y[i], global_ik_system.anim_knee_z[i] }; | |
| Vector3 solvedKnee = { global_ik_system.knee_res_x[i], global_ik_system.knee_res_y[i], global_ik_system.knee_res_z[i] }; | |
| // 1. Draw "Animated Knee" Reference (Ghostly Sphere) | |
| DrawSphere(animKnee, 0.5f, Fade(YELLOW, 0.3f)); | |
| DrawLine3D(hip, animKnee, Fade(YELLOW, 0.2f)); | |
| // 2. Draw Target (Foot destination) | |
| DrawSphere(target, 1.0f, RED); | |
| // 3. Draw IK Solved Bones (Thick lines) | |
| DrawCylinderEx(hip, solvedKnee, 0.8f, 0.8f, 8, BLUE); // Thigh | |
| DrawCylinderEx(solvedKnee, target, 0.8f, 0.8f, 8, SKYBLUE); // Calf | |
| // 4. Draw Joints | |
| DrawSphere(hip, 1.2f, BLACK); | |
| DrawSphere(solvedKnee, 1.2f, WHITE); | |
| EndMode3D(); | |
| DrawText("AVX2 IK SOLVER TEST", 10, 10, 20, RAYWHITE); | |
| DrawText("Red = Target | Blue = Solved Bones | Yellow = Plane Hint", 10, 40, 10, LIGHTGRAY); | |
| DrawFPS(10, screenHeight - 20); | |
| EndDrawing(); | |
| } | |
| CloseWindow(); | |
| return 0; | |
| } |
Author
vurtun
commented
Dec 19, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment