Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active December 19, 2025 22:41
Show Gist options
  • Select an option

  • Save vurtun/178045842840b73636383890ade486b4 to your computer and use it in GitHub Desktop.

Select an option

Save vurtun/178045842840b73636383890ade486b4 to your computer and use it in GitHub Desktop.
Foot IK
#include <stdint.h>
#include <stdalign.h> /* C11 alignas */
#include <immintrin.h> /* AVX2 intrinsics */
#include <float.h>
/* --------------------------------------------------------------------------
* SYSTEM CONFIGURATION
* -------------------------------------------------------------------------- */
#define BATCH_SIZE 8
#define MAX_ENTITIES 512
#define ALIGN_CACHE 64
/*
* MATH_BIAS: A tiny offset added to the reference vector.
* Prevents Division-By-Zero if the animator poses the leg perfectly straight.
*/
#define MATH_EPSILON 0.00001f
#define MATH_BIAS 0.00010f
/* --------------------------------------------------------------------------
* DATA STRUCTURES
* -------------------------------------------------------------------------- */
struct ik_system {
/* --- INPUTS --- */
alignas(ALIGN_CACHE) float hip_x[MAX_ENTITIES];
alignas(ALIGN_CACHE) float hip_y[MAX_ENTITIES];
alignas(ALIGN_CACHE) float hip_z[MAX_ENTITIES];
alignas(ALIGN_CACHE) float target_x[MAX_ENTITIES];
alignas(ALIGN_CACHE) float target_y[MAX_ENTITIES];
alignas(ALIGN_CACHE) float target_z[MAX_ENTITIES];
/* The knee position from the animation pose (Reference Pole) */
alignas(ALIGN_CACHE) float anim_knee_x[MAX_ENTITIES];
alignas(ALIGN_CACHE) float anim_knee_y[MAX_ENTITIES];
alignas(ALIGN_CACHE) float anim_knee_z[MAX_ENTITIES];
alignas(ALIGN_CACHE) float upper_len[MAX_ENTITIES];
alignas(ALIGN_CACHE) float lower_len[MAX_ENTITIES];
/* --- OUTPUTS --- */
alignas(ALIGN_CACHE) float knee_res_x[MAX_ENTITIES];
alignas(ALIGN_CACHE) float knee_res_y[MAX_ENTITIES];
alignas(ALIGN_CACHE) float knee_res_z[MAX_ENTITIES];
};
/* Global Storage (Static .bss allocation) */
static struct ik_system global_ik_system;
/* --------------------------------------------------------------------------
* MATH INTRINSICS
* -------------------------------------------------------------------------- */
static inline __m256
math_dot_avx(__m256 ax, __m256 ay, __m256 az,
__m256 bx, __m256 by, __m256 bz) {
__m256 res = _mm256_mul_ps(ax, bx);
res = _mm256_fmadd_ps(ay, by, res);
res = _mm256_fmadd_ps(az, bz, res);
return res;
}
static inline void
math_normalize_avx(__m256 *x, __m256 *y, __m256 *z) {
__m256 dot = math_dot_avx(*x, *y, *z, *x, *y, *z);
__m256 inv = _mm256_rsqrt_ps(dot);
*x = _mm256_mul_ps(*x, inv);
*y = _mm256_mul_ps(*y, inv);
*z = _mm256_mul_ps(*z, inv);
}
/* --------------------------------------------------------------------------
* LOGIC
* -------------------------------------------------------------------------- */
/*
* AVX2 Optimized Initialization.
* Uses 256-bit stores to fill memory 8x faster than scalar loops.
* Ensuring deterministic "warm-up" of the cache lines.
*/
extern void
ik_init(struct ik_system *sys) {
/* Prepare Constants */
const __m256 v_zero = _mm256_setzero_ps();
const __m256 v_hip_y = _mm256_set1_ps(20.0f);
/* Default Knee: Slightly bent forward to ensure valid initial plane */
const __m256 v_knee_y = _mm256_set1_ps(10.0f);
const __m256 v_knee_z = _mm256_set1_ps(5.0f);
const __m256 v_len = _mm256_set1_ps(10.0f);
for (int i = 0; i < MAX_ENTITIES; i += BATCH_SIZE) {
/* Inputs: Hip */
_mm256_store_ps(&sys->hip_x[i], v_zero);
_mm256_store_ps(&sys->hip_y[i], v_hip_y);
_mm256_store_ps(&sys->hip_z[i], v_zero);
/* Inputs: Target */
_mm256_store_ps(&sys->target_x[i], v_zero);
_mm256_store_ps(&sys->target_y[i], v_zero);
_mm256_store_ps(&sys->target_z[i], v_zero);
/* Inputs: Animated Knee (Reference) */
_mm256_store_ps(&sys->anim_knee_x[i], v_zero);
_mm256_store_ps(&sys->anim_knee_y[i], v_knee_y);
_mm256_store_ps(&sys->anim_knee_z[i], v_knee_z);
/* Inputs: Lengths */
_mm256_store_ps(&sys->upper_len[i], v_len);
_mm256_store_ps(&sys->lower_len[i], v_len);
/* Outputs: Result */
_mm256_store_ps(&sys->knee_res_x[i], v_zero);
_mm256_store_ps(&sys->knee_res_y[i], v_zero);
_mm256_store_ps(&sys->knee_res_z[i], v_zero);
}
}
extern void
ik_solve(struct ik_system * restrict sys, int start, int end) {
const __m256 v_epsilon = _mm256_set1_ps(MATH_EPSILON);
const __m256 v_bias = _mm256_set1_ps(MATH_BIAS);
const __m256 v_zero = _mm256_setzero_ps();
const __m256 v_two = _mm256_set1_ps(2.0f);
for (int i = start; i < end; i += BATCH_SIZE) {
/* --- 1. Load Data --- */
__m256 hip_x = _mm256_load_ps(&sys->hip_x[i]);
__m256 hip_y = _mm256_load_ps(&sys->hip_y[i]);
__m256 hip_z = _mm256_load_ps(&sys->hip_z[i]);
__m256 tgt_x = _mm256_load_ps(&sys->target_x[i]);
__m256 tgt_y = _mm256_load_ps(&sys->target_y[i]);
__m256 tgt_z = _mm256_load_ps(&sys->target_z[i]);
__m256 l1 = _mm256_load_ps(&sys->upper_len[i]);
__m256 l2 = _mm256_load_ps(&sys->lower_len[i]);
/* --- 2. Calculate Axis (Hip -> Target) --- */
__m256 u_x = _mm256_sub_ps(tgt_x, hip_x);
__m256 u_y = _mm256_sub_ps(tgt_y, hip_y);
__m256 u_z = _mm256_sub_ps(tgt_z, hip_z);
__m256 dist_sq = math_dot_avx(u_x, u_y, u_z, u_x, u_y, u_z);
__m256 dist = _mm256_sqrt_ps(dist_sq);
/* --- 3. Branchless Triangle Logic --- */
__m256 total_len = _mm256_add_ps(l1, l2);
__m256 max_reach = _mm256_sub_ps(total_len, v_epsilon);
__m256 c_dist = _mm256_min_ps(dist, max_reach);
c_dist = _mm256_max_ps(c_dist, v_epsilon);
/* Law of Cosines */
__m256 l1_sq = _mm256_mul_ps(l1, l1);
__m256 l2_sq = _mm256_mul_ps(l2, l2);
__m256 c_sq = _mm256_mul_ps(c_dist, c_dist);
__m256 num = _mm256_add_ps(l1_sq, _mm256_sub_ps(c_sq, l2_sq));
__m256 den = _mm256_mul_ps(v_two, _mm256_mul_ps(l1, c_dist));
__m256 cos_a = _mm256_div_ps(num, den);
__m256 adj = _mm256_mul_ps(l1, cos_a);
__m256 opp_sq = _mm256_sub_ps(l1_sq, _mm256_mul_ps(adj, adj));
__m256 opp = _mm256_sqrt_ps(_mm256_max_ps(opp_sq, v_zero));
/* --- 4. Basis Construction (Anim Knee Reference) --- */
__m256 anim_x = _mm256_load_ps(&sys->anim_knee_x[i]);
__m256 anim_y = _mm256_load_ps(&sys->anim_knee_y[i]);
__m256 anim_z = _mm256_load_ps(&sys->anim_knee_z[i]);
__m256 ref_x = _mm256_sub_ps(anim_x, hip_x);
__m256 ref_y = _mm256_sub_ps(anim_y, hip_y);
__m256 ref_z = _mm256_sub_ps(anim_z, hip_z);
/* Bias to prevent singularity on straight legs */
ref_x = _mm256_add_ps(ref_x, v_bias);
math_normalize_avx(&u_x, &u_y, &u_z);
/* V = Cross(U, Ref) */
__m256 v_x = _mm256_sub_ps(_mm256_mul_ps(u_y, ref_z), _mm256_mul_ps(u_z, ref_y));
__m256 v_y = _mm256_sub_ps(_mm256_mul_ps(u_z, ref_x), _mm256_mul_ps(u_x, ref_z));
__m256 v_z = _mm256_sub_ps(_mm256_mul_ps(u_x, ref_y), _mm256_mul_ps(u_y, ref_x));
math_normalize_avx(&v_x, &v_y, &v_z);
/* W = Cross(V, U) */
__m256 w_x = _mm256_sub_ps(_mm256_mul_ps(v_y, u_z), _mm256_mul_ps(v_z, u_y));
__m256 w_y = _mm256_sub_ps(_mm256_mul_ps(v_z, u_x), _mm256_mul_ps(v_x, u_z));
__m256 w_z = _mm256_sub_ps(_mm256_mul_ps(v_x, u_y), _mm256_mul_ps(v_y, u_x));
math_normalize_avx(&w_x, &w_y, &w_z);
/* --- 5. Final Position --- */
__m256 p1_x = _mm256_mul_ps(u_x, adj);
__m256 p1_y = _mm256_mul_ps(u_y, adj);
__m256 p1_z = _mm256_mul_ps(u_z, adj);
__m256 p2_x = _mm256_mul_ps(w_x, opp);
__m256 p2_y = _mm256_mul_ps(w_y, opp);
__m256 p2_z = _mm256_mul_ps(w_z, opp);
__m256 res_x = _mm256_add_ps(hip_x, _mm256_add_ps(p1_x, p2_x));
__m256 res_y = _mm256_add_ps(hip_y, _mm256_add_ps(p1_y, p2_y));
__m256 res_z = _mm256_add_ps(hip_z, _mm256_add_ps(p1_z, p2_z));
/* --- 6. Store --- */
_mm256_store_ps(&sys->knee_res_x[i], res_x);
_mm256_store_ps(&sys->knee_res_y[i], res_y);
_mm256_store_ps(&sys->knee_res_z[i], res_z);
}
}
extern void
ik_run(void) {
ik_solve(&global_ik_system, 0, MAX_ENTITIES);
}
#include "raylib.h"
#include "raymath.h"
#include <math.h>
extern int
main(void) {
// 1. Initialization
const int screenWidth = 600;
const int screenHeight = 480;
InitWindow(screenWidth, screenHeight, "IK Test");
// 2. Setup Camera
Camera3D camera = { 0 };
camera.position = (Vector3){ 40.0f, 40.0f, 40.0f };
camera.target = (Vector3){ 0.0f, 10.0f, 0.0f };
camera.up = (Vector3){ 0.0f, 1.0f, 0.0f };
camera.fovy = 45.0f;
camera.projection = CAMERA_PERSPECTIVE;
// 3. Initialize IK System
ik_init(&global_ik_system);
SetTargetFPS(60);
float time = 0.0f;
while (!WindowShouldClose()) {
time += GetFrameTime();
// --- UPDATE IK INPUTS ---
int i = 0; // We only visualize the first entity
// Fix Hip in space
global_ik_system.hip_x[i] = 0.0f;
global_ik_system.hip_y[i] = 25.0f;
global_ik_system.hip_z[i] = 0.0f;
// Make the Target (Foot) move in a circle on the floor
global_ik_system.target_x[i] = sinf(time * 2.0f) * 15.0f;
global_ik_system.target_y[i] = 0.0f; // On the ground
global_ik_system.target_z[i] = cosf(time * 2.0f) * 15.0f;
// Make the "Animated Knee" (Reference) orbit slowly
// This demonstrates how the knee follows the animator's "hint"
global_ik_system.anim_knee_x[i] = sinf(time * 0.5f) * 10.0f;
global_ik_system.anim_knee_y[i] = 12.0f;
global_ik_system.anim_knee_z[i] = cosf(time * 0.5f) * 10.0f;
global_ik_system.upper_len[i] = 15.0f;
global_ik_system.lower_len[i] = 15.0f;
// --- RUN SOLVER ---
ik_run();
// --- DRAWING ---
BeginDrawing();
ClearBackground(DARKGRAY);
BeginMode3D(camera);
DrawGrid(20, 5.0f);
// Get Coordinates
Vector3 hip = { global_ik_system.hip_x[i], global_ik_system.hip_y[i], global_ik_system.hip_z[i] };
Vector3 target = { global_ik_system.target_x[i], global_ik_system.target_y[i], global_ik_system.target_z[i] };
Vector3 animKnee = { global_ik_system.anim_knee_x[i], global_ik_system.anim_knee_y[i], global_ik_system.anim_knee_z[i] };
Vector3 solvedKnee = { global_ik_system.knee_res_x[i], global_ik_system.knee_res_y[i], global_ik_system.knee_res_z[i] };
// 1. Draw "Animated Knee" Reference (Ghostly Sphere)
DrawSphere(animKnee, 0.5f, Fade(YELLOW, 0.3f));
DrawLine3D(hip, animKnee, Fade(YELLOW, 0.2f));
// 2. Draw Target (Foot destination)
DrawSphere(target, 1.0f, RED);
// 3. Draw IK Solved Bones (Thick lines)
DrawCylinderEx(hip, solvedKnee, 0.8f, 0.8f, 8, BLUE); // Thigh
DrawCylinderEx(solvedKnee, target, 0.8f, 0.8f, 8, SKYBLUE); // Calf
// 4. Draw Joints
DrawSphere(hip, 1.2f, BLACK);
DrawSphere(solvedKnee, 1.2f, WHITE);
EndMode3D();
DrawText("AVX2 IK SOLVER TEST", 10, 10, 20, RAYWHITE);
DrawText("Red = Target | Blue = Solved Bones | Yellow = Plane Hint", 10, 40, 10, LIGHTGRAY);
DrawFPS(10, screenHeight - 20);
EndDrawing();
}
CloseWindow();
return 0;
}
@vurtun
Copy link
Author

vurtun commented Dec 19, 2025

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment