Created
February 12, 2026 07:34
-
-
Save psilord/9522b5bf6ad7269d92e8fda4ba30b989 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <sys/mman.h> | |
| #include <string.h> | |
| #include <errno.h> | |
| typedef unsigned char machine_code; | |
| /* A function that we will manually "JIT" into an executable sequence of | |
| instructions stored in an array. Note the 'func' argument. It is through | |
| here we pass the address of the printf function at runtime. This is so we | |
| don't have to do nasty linking tricks in this small example. We do actually | |
| perform some linking with the format string, though. So I do show an | |
| example of it. | |
| Here is the C function under test. | |
| int bar(int val, int (*func)(const char *fmt, ...)) | |
| { | |
| func("The val is: %d\n", val); | |
| return val + 1; | |
| } | |
| One could call it like this: | |
| val = bar(val, printf); | |
| It is compiled with: gcc -O0 -Wall -g -c bar.c -o bar.o | |
| Then: objdump -d bar.o to see the assembly of it. | |
| Notice the relocations are 0x00, so we have to fix that up (aka, linking) | |
| at runtime for this sequence of bytes. I've marked with * the fixups that | |
| need to happen. The first star is the location of the format string, and | |
| the second is the location of the printf() function. | |
| Fixing the fmt string relocation will be done by putting the offset | |
| from the instruction loading the fmt string address to the bytes at | |
| the end of the block that actually contain the format string. It is the | |
| offset from the instruction to the string that is important here. | |
| Fixing the printf will be done by passing in the address to printf | |
| to this function and doind an idirect call from that register. | |
| 0000000000000000 <bar>: | |
| 0: f3 0f 1e fa endbr64 | |
| 4: 55 push %rbp | |
| 5: 48 89 e5 mov %rsp,%rbp | |
| 8: 48 83 ec 10 sub $0x10,%rsp | |
| c: 89 7d fc mov %edi,-0x4(%rbp) | |
| f: 48 89 75 f0 mov %rsi,-0x10(%rbp) | |
| 13: 8b 45 fc mov -0x4(%rbp),%eax | |
| 16: 48 8b 55 f0 mov -0x10(%rbp),%rdx | |
| 1a: 89 c6 mov %eax,%esi | |
| * 1c: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 23 <bar+0x23> | |
| 23: 48 89 c7 mov %rax,%rdi | |
| 26: b8 00 00 00 00 mov $0x0,%eax | |
| 2b: ff d2 call *%rdx # printf | |
| 2d: 8b 45 fc mov -0x4(%rbp),%eax | |
| 30: 83 c0 01 add $0x1,%eax | |
| 33: c9 leave | |
| 34: c3 ret | |
| */ | |
| typedef int (*bar_func)(int val, int (*)(const char *fmt, ...)); | |
| /* Helper function for memory allocation */ | |
| void *xmalloc(size_t size) | |
| { | |
| void *mem = calloc(1, size); /* zero fill */ | |
| if (mem == NULL) { | |
| printf("Out of memory! Aborting.\n"); | |
| exit(EXIT_FAILURE); | |
| } | |
| return mem; | |
| } | |
| int main(void) | |
| { | |
| int ret; | |
| /* The instructions we would have assembled into machine code */ | |
| machine_code *code = NULL; | |
| /* Allocate a page of memory, which will be page aligned. | |
| I could have used malloc() here instead, but it would be slightly more | |
| fiddly since I'd have to use mprotect() and make sure things are page | |
| aligned from malloc (which it might not actually be wrt the pointer it | |
| gives back to me). | |
| The code length is 0x35 and the fmt string is 16 bytes plus 1 for nul. | |
| This will fit into 4096 bytes. | |
| This mmap segment will have read, write, and execute permissions. | |
| */ | |
| code = mmap(NULL, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, | |
| MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | |
| if (code == MAP_FAILED) { | |
| perror("mmap"); | |
| exit(EXIT_FAILURE); | |
| } | |
| /* fill in the instructions from the above function. A just in time compiler | |
| would create this buffer full of bytes from lisp code, assembly, | |
| whatever. | |
| */ | |
| /* 0: f3 0f 1e fa endbr64 */ | |
| code[0x00] = 0xf3; | |
| code[0x01] = 0x0f; | |
| code[0x02] = 0x1e; | |
| code[0x03] = 0xfa; | |
| /* 4: 55 push %rbp */ | |
| code[0x04] = 0x55; | |
| /* 5: 48 89 e5 mov %rsp,%rbp */ | |
| code[0x05] = 0x48; | |
| code[0x06] = 0x89; | |
| code[0x07] = 0xe5; | |
| /* 8: 48 83 ec 10 sub $0x10,%rsp */ | |
| code[0x08] = 0x48; | |
| code[0x09] = 0x83; | |
| code[0x0a] = 0xec; | |
| code[0x0b] = 0x10; | |
| /* c: 89 7d fc mov %edi,-0x4(%rbp) */ | |
| code[0x0c] = 0x89; | |
| code[0x0d] = 0x7d; | |
| code[0x0e] = 0xfc; | |
| /* f: 48 89 75 f0 mov %rsi,-0x10(%rbp) */ | |
| code[0x0f] = 0x48; | |
| code[0x10] = 0x89; | |
| code[0x11] = 0x75; | |
| code[0x12] = 0xf0; | |
| /* 13: 8b 45 fc mov -0x4(%rbp),%eax */ | |
| code[0x13] = 0x8b; | |
| code[0x14] = 0x45; | |
| code[0x15] = 0xfc; | |
| /* 16: 48 8b 55 f0 mov -0x10(%rbp),%rdx */ | |
| code[0x16] = 0x48; | |
| code[0x17] = 0x8b; | |
| code[0x18] = 0x55; | |
| code[0x19] = 0xf0; | |
| /* 1a: 89 c6 mov %eax,%esi */ | |
| code[0x1a] = 0x89; | |
| code[0x1b] = 0xc6; | |
| /* 1c: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 23 <bar+0x23> */ | |
| code[0x1c] = 0x48; | |
| code[0x1d] = 0x8d; | |
| code[0x1e] = 0x05; | |
| /* we must fix up this relocation to be the offset from the current | |
| instruction pointer to the fmt string at the end of the | |
| code block. That is 0x35 - 0x23 = 0x12 bytes. | |
| */ | |
| code[0x1f] = 0x12; | |
| code[0x20] = 0x00; | |
| code[0x21] = 0x00; | |
| code[0x22] = 0x00; | |
| /* 23: 48 89 c7 mov %rax,%rdi */ | |
| code[0x23] = 0x48; | |
| code[0x24] = 0x89; | |
| code[0x25] = 0xc7; | |
| /* 26: b8 00 00 00 00 mov $0x0,%eax */ | |
| code[0x26] = 0xb8; | |
| code[0x27] = 0x00; | |
| code[0x28] = 0x00; | |
| code[0x29] = 0x00; | |
| code[0x2a] = 0x00; | |
| /* 2b: ff d2 call *%rdx # printf */ | |
| /* Here we indirect call printf via the func function pointer in %rdx */ | |
| code[0x2b] = 0xff; | |
| code[0x2c] = 0xd2; | |
| /* 2d: 8b 45 fc mov -0x4(%rbp),%eax */ | |
| code[0x2d] = 0x8b; | |
| code[0x2e] = 0x45; | |
| code[0x2f] = 0xfc; | |
| /* 30: 83 c0 01 add $0x1,%eax */ | |
| code[0x30] = 0x83; | |
| code[0x31] = 0xc0; | |
| code[0x32] = 0x01; | |
| /* 33: c9 leave */ | |
| code[0x33] = 0xc9; | |
| /* 34: c3 ret */ | |
| code[0x34] = 0xc3; | |
| /* format string at the end of the instructions: "The val is: %d\n" */ | |
| code[0x35] = 'T'; | |
| code[0x36] = 'h'; | |
| code[0x37] = 'e'; | |
| code[0x38] = ' '; | |
| code[0x39] = 'v'; | |
| code[0x3a] = 'a'; | |
| code[0x3b] = 'l'; | |
| code[0x3c] = ' '; | |
| code[0x3d] = 'i'; | |
| code[0x3e] = 's'; | |
| code[0x3f] = ':'; | |
| code[0x40] = ' '; | |
| code[0x41] = '%'; | |
| code[0x42] = 'd'; | |
| code[0x43] = '\n'; | |
| code[0x44] = '\0'; /* NUL byte to end string */ | |
| /* Finally, we type cast the shit out of code into a function and call it! */ | |
| ret = ((bar_func)code)(42, printf); | |
| printf("And, it returned a value of: %d\n", ret); | |
| /* get rid of the code block */ | |
| if (munmap(code, 4096) < 0) { | |
| perror("munmap"); | |
| exit(EXIT_FAILURE); | |
| } | |
| code = NULL; | |
| return EXIT_SUCCESS; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment