Created
November 5, 2025 10:15
-
-
Save trufae/42ac5b2c1145ebc60e9fca52559546b1 to your computer and use it in GitHub Desktop.
DWARF writer in V
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module dwarfwriter | |
| import encoding.binary | |
| // SymEntry represents a symbol entry with address and symbol name | |
| pub struct SymEntry { | |
| pub: | |
| addr u64 | |
| symbol string | |
| } | |
| // LineEntry represents a line entry with address, file, and line number | |
| pub struct LineEntry { | |
| pub: | |
| addr u64 | |
| file string | |
| line int | |
| } | |
| // Helper functions for appending binary data in little-endian format | |
| fn u16_bytes(val u16) []u8 { | |
| mut b := []u8{len: 2} | |
| binary.little_endian_put_u16(mut b, val) | |
| return b | |
| } | |
| fn u32_bytes(val u32) []u8 { | |
| mut b := []u8{len: 4} | |
| binary.little_endian_put_u32(mut b, val) | |
| return b | |
| } | |
| fn u64_bytes(val u64) []u8 { | |
| mut b := []u8{len: 8} | |
| binary.little_endian_put_u64(mut b, val) | |
| return b | |
| } | |
| fn append_uleb128(mut buf []u8, val u32) { | |
| mut v := val | |
| for { | |
| mut byte := u8(v & 0x7f) | |
| v >>= 7 | |
| if v != 0 { | |
| byte |= 0x80 | |
| } | |
| buf << byte | |
| if v == 0 { | |
| break | |
| } | |
| } | |
| } | |
| fn append_sleb128(mut buf []u8, delta i64) { | |
| mut val := u64(delta) | |
| mut more := true | |
| for more { | |
| mut byte := u8(val & 0x7f) | |
| val >>= 7 | |
| sign := if delta < 0 { 1 } else { 0 } | |
| if (val == 0 && sign == 0) || (i64(val) == -1 && sign == 1) { | |
| more = false | |
| } else { | |
| byte |= 0x80 | |
| } | |
| buf << byte | |
| } | |
| } | |
| fn write_at(mut buf []u8, off int, data []u8) { | |
| for buf.len <= off + data.len { | |
| buf << 0 | |
| } | |
| for i, b in data { | |
| buf[off + i] = b | |
| } | |
| } | |
| // create_elf_with_dwarf creates an ELF 64-bit object with DWARF debug info | |
| pub fn create_elf_with_dwarf(lines []LineEntry, symbols []SymEntry) []u8 { | |
| mut buf := []u8{} | |
| mut text_off := 0 | |
| mut text_sz := 0 | |
| mut debug_info_start := 0 | |
| mut debug_abbrev_start := 0 | |
| mut debug_line_start := 0 | |
| mut symtab_off := 0 | |
| mut symtab_sz := u32(0) | |
| mut strtab_off := 0 | |
| mut strtab_sz := u32(0) | |
| mut shoff_off := 0 | |
| mut shdrs_off := 0 | |
| // ELF header | |
| buf << '\x7fELF'.bytes() // e_ident[EI_MAG] | |
| buf << u8(2) // e_ident[EI_CLASS] = ELFCLASS64 | |
| buf << u8(1) // e_ident[EI_DATA] = ELFDATA2LSB | |
| buf << u8(1) // e_ident[EI_VERSION] = EV_CURRENT | |
| buf << u8(0) // e_ident[EI_OSABI] = ELFOSABI_NONE | |
| buf << u8(0) // e_ident[EI_ABIVERSION] | |
| for _ in 0..7 { | |
| buf << u8(0) // e_ident[EI_PAD] | |
| } | |
| buf << u16_bytes(1) // e_type = ET_REL | |
| buf << u16_bytes(0x3E) // e_machine = EM_X86_64 | |
| buf << u32_bytes(1) // e_version = EV_CURRENT | |
| buf << u64_bytes(0) // e_entry | |
| buf << u64_bytes(0) // e_phoff | |
| shoff_off = buf.len | |
| buf << u64_bytes(0) // e_shoff (placeholder) | |
| buf << u32_bytes(0) // e_flags | |
| buf << u16_bytes(64) // e_ehsize | |
| buf << u16_bytes(0) // e_phentsize | |
| buf << u16_bytes(0) // e_phnum | |
| buf << u16_bytes(64) // e_shentsize | |
| buf << u16_bytes(8) // e_shnum = 8 (null + 7 sections) | |
| buf << u16_bytes(7) // e_shstrndx = 7 (.shstrtab) | |
| // .text | |
| text_off = buf.len | |
| buf << u8(0xC3) // RET | |
| text_sz = buf.len - text_off | |
| // .debug_info | |
| debug_info_start = buf.len | |
| cu_len_off := buf.len | |
| buf << u32_bytes(0) // unit_length (placeholder) | |
| buf << u16_bytes(2) // DWARF version 2 | |
| buf << u32_bytes(0) // debug_abbrev_offset | |
| buf << u8(8) // address_size | |
| buf << u8(0x01) // Abbrev code 1 | |
| buf << u8(0x0c) // DW_AT_language (DW_LANG_C99) | |
| buf << 'main.c'.bytes() | |
| buf << u8(0) | |
| buf << '.'.bytes() | |
| buf << u8(0) | |
| buf << u64_bytes(0) // low_pc | |
| buf << u64_bytes(1) // high_pc | |
| buf << u32_bytes(0) // stmt_list | |
| for sym in symbols { | |
| buf << u8(0x02) | |
| buf << sym.symbol.bytes() | |
| buf << u8(0) | |
| buf << u64_bytes(sym.addr) | |
| buf << u64_bytes(sym.addr + 1) | |
| } | |
| buf << u8(0) // end | |
| cu_end := buf.len | |
| cu_len := u32(cu_end - (cu_len_off + 4)) | |
| mut b_cu := []u8{len: 4} | |
| binary.little_endian_put_u32(mut b_cu, cu_len) | |
| write_at(mut buf, cu_len_off, b_cu) | |
| // .debug_abbrev | |
| debug_abbrev_start = buf.len | |
| buf << u8(0x01) // abbrev code 1 | |
| buf << u8(0x11) // DW_TAG_compile_unit | |
| buf << u8(0x01) // has children | |
| buf << u8(0x13) | |
| buf << u8(0x0b) | |
| buf << u8(0x03) | |
| buf << u8(0x08) | |
| buf << u8(0x1b) | |
| buf << u8(0x08) | |
| buf << u8(0x11) | |
| buf << u8(0x01) | |
| buf << u8(0x12) | |
| buf << u8(0x01) | |
| buf << u8(0x10) | |
| buf << u8(0x06) | |
| buf << u8(0x00) | |
| buf << u8(0x00) | |
| buf << u8(0x02) // abbrev code 2 | |
| buf << u8(0x2e) // DW_TAG_subprogram | |
| buf << u8(0x00) // no children | |
| buf << u8(0x03) | |
| buf << u8(0x08) | |
| buf << u8(0x11) | |
| buf << u8(0x01) | |
| buf << u8(0x12) | |
| buf << u8(0x01) | |
| buf << u8(0x00) | |
| buf << u8(0x00) | |
| buf << u8(0x00) // end | |
| // .debug_line | |
| debug_line_start = buf.len | |
| line_len_off := buf.len | |
| buf << u32_bytes(0) // unit_length (placeholder) | |
| buf << u16_bytes(2) // version | |
| hdr_len_off := buf.len | |
| buf << u32_bytes(0) // header_length (placeholder) | |
| buf << u8(1) // minimum_instruction_length | |
| buf << u8(1) // default_is_stmt | |
| buf << u8(0xFB) // line_base | |
| buf << u8(14) // line_range | |
| buf << u8(13) // opcode_base | |
| std_op_len := [u8(0), 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1] | |
| for len_val in std_op_len { | |
| buf << len_val | |
| } | |
| // dirs (empty) | |
| buf << u8(0) | |
| // files | |
| mut files := []string{} | |
| for le in lines { | |
| if le.file !in files { | |
| files << le.file | |
| } | |
| } | |
| for f in files { | |
| buf << f.bytes() | |
| buf << u8(0) | |
| buf << u8(0) // dir index | |
| buf << u8(0) // mod time | |
| buf << u8(0) // file length | |
| } | |
| buf << u8(0) // end files | |
| // line program | |
| mut prev_line := 1 | |
| mut prev_file_idx := 1 | |
| for le in lines { | |
| // DW_LNE_set_address | |
| buf << u8(0) | |
| buf << u8(1 + 8) | |
| buf << u8(0x02) | |
| buf << u64_bytes(le.addr) | |
| // DW_LNS_set_file | |
| mut file_idx := 1 | |
| for i, f in files { | |
| if f == le.file { | |
| file_idx = i + 1 | |
| break | |
| } | |
| } | |
| if file_idx != prev_file_idx { | |
| buf << u8(0x04) | |
| append_uleb128(mut buf, u32(file_idx)) | |
| prev_file_idx = file_idx | |
| } | |
| // DW_LNS_advance_line | |
| buf << u8(0x03) | |
| delta := i64(le.line) - i64(prev_line) | |
| append_sleb128(mut buf, delta) | |
| prev_line = le.line | |
| // DW_LNS_copy | |
| buf << u8(0x01) | |
| } | |
| // DW_LNE_end_sequence | |
| buf << u8(0) | |
| buf << u8(0x01) | |
| buf << u8(0x01) | |
| line_ops_end := buf.len | |
| hdr_len := u32(line_ops_end - (hdr_len_off + 4)) | |
| mut b_hdr := []u8{len: 4} | |
| binary.little_endian_put_u32(mut b_hdr, hdr_len) | |
| write_at(mut buf, hdr_len_off, b_hdr) | |
| line_end := buf.len | |
| line_len := u32(line_end - (line_len_off + 4)) | |
| mut b_line := []u8{len: 4} | |
| binary.little_endian_put_u32(mut b_line, line_len) | |
| write_at(mut buf, line_len_off, b_line) | |
| // Symbol table | |
| sym_count := symbols.len | |
| mut str_offsets := []u32{len: sym_count} | |
| mut cur_off := u32(1) | |
| for i, se in symbols { | |
| str_offsets[i] = cur_off | |
| cur_off += u32(se.symbol.len + 1) | |
| } | |
| symtab_off = buf.len | |
| // NULL symbol | |
| buf << u32_bytes(0) // st_name | |
| buf << u8(0) // st_info | |
| buf << u8(0) // st_other | |
| buf << u16_bytes(0) // st_shndx | |
| buf << u64_bytes(0) // st_value | |
| buf << u64_bytes(0) // st_size | |
| for i, se in symbols { | |
| buf << u32_bytes(str_offsets[i]) // st_name | |
| buf << u8(0x12) // st_info (STB_GLOBAL | STT_FUNC) | |
| buf << u8(0) // st_other | |
| buf << u16_bytes(1) // st_shndx (.text) | |
| buf << u64_bytes(se.addr) // st_value | |
| buf << u64_bytes(1) // st_size | |
| } | |
| symtab_end := buf.len | |
| symtab_sz = u32(symtab_end - symtab_off) | |
| // String table | |
| strtab_off = buf.len | |
| buf << u8(0) // empty string | |
| for se in symbols { | |
| buf << se.symbol.bytes() | |
| buf << u8(0) | |
| } | |
| strtab_end := buf.len | |
| strtab_sz = u32(strtab_end - strtab_off) | |
| // Section header string table | |
| shstr_off := buf.len | |
| mut sh_names := []u32{len: 8} | |
| buf << u8(0) // null | |
| sh_names[1] = u32(buf.len - shstr_off) | |
| buf << '.text'.bytes() | |
| buf << u8(0) | |
| sh_names[2] = u32(buf.len - shstr_off) | |
| buf << '.debug_info'.bytes() | |
| buf << u8(0) | |
| sh_names[3] = u32(buf.len - shstr_off) | |
| buf << '.debug_abbrev'.bytes() | |
| buf << u8(0) | |
| sh_names[4] = u32(buf.len - shstr_off) | |
| buf << '.debug_line'.bytes() | |
| buf << u8(0) | |
| sh_names[5] = u32(buf.len - shstr_off) | |
| buf << '.symtab'.bytes() | |
| buf << u8(0) | |
| sh_names[6] = u32(buf.len - shstr_off) | |
| buf << '.strtab'.bytes() | |
| buf << u8(0) | |
| sh_names[7] = u32(buf.len - shstr_off) | |
| buf << '.shstrtab'.bytes() | |
| buf << u8(0) | |
| shstr_end := buf.len | |
| shstr_sz := u32(shstr_end - shstr_off) | |
| // Patch section header table offset | |
| shdrs_off = buf.len | |
| mut b_shoff := []u8{len: 8} | |
| binary.little_endian_put_u64(mut b_shoff, u64(shdrs_off)) | |
| write_at(mut buf, shoff_off, b_shoff) | |
| // Section headers | |
| // NULL section | |
| for _ in 0..64 { | |
| buf << u8(0) | |
| } | |
| // .text | |
| buf << u32_bytes(sh_names[1]) // sh_name | |
| buf << u32_bytes(1) // sh_type = SHT_PROGBITS | |
| buf << u64_bytes(6) // sh_flags = SHF_ALLOC | SHF_EXECINSTR | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(text_off)) // sh_offset | |
| buf << u64_bytes(u64(text_sz)) // sh_size | |
| buf << u32_bytes(0) // sh_link | |
| buf << u32_bytes(0) // sh_info | |
| buf << u64_bytes(16) // sh_addralign | |
| buf << u64_bytes(0) // sh_entsize | |
| // .debug_info | |
| buf << u32_bytes(sh_names[2]) // sh_name | |
| buf << u32_bytes(1) // sh_type = SHT_PROGBITS | |
| buf << u64_bytes(0) // sh_flags | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(debug_info_start)) // sh_offset | |
| buf << u64_bytes(u64(debug_abbrev_start - debug_info_start)) // sh_size | |
| buf << u32_bytes(0) // sh_link | |
| buf << u32_bytes(0) // sh_info | |
| buf << u64_bytes(1) // sh_addralign | |
| buf << u64_bytes(0) // sh_entsize | |
| // .debug_abbrev | |
| buf << u32_bytes(sh_names[3]) // sh_name | |
| buf << u32_bytes(1) // sh_type = SHT_PROGBITS | |
| buf << u64_bytes(0) // sh_flags | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(debug_abbrev_start)) // sh_offset | |
| buf << u64_bytes(u64(debug_line_start - debug_abbrev_start)) // sh_size | |
| buf << u32_bytes(0) // sh_link | |
| buf << u32_bytes(0) // sh_info | |
| buf << u64_bytes(1) // sh_addralign | |
| buf << u64_bytes(0) // sh_entsize | |
| // .debug_line | |
| buf << u32_bytes(sh_names[4]) // sh_name | |
| buf << u32_bytes(1) // sh_type = SHT_PROGBITS | |
| buf << u64_bytes(0) // sh_flags | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(debug_line_start)) // sh_offset | |
| buf << u64_bytes(u64(symtab_off - debug_line_start)) // sh_size | |
| buf << u32_bytes(0) // sh_link | |
| buf << u32_bytes(0) // sh_info | |
| buf << u64_bytes(1) // sh_addralign | |
| buf << u64_bytes(0) // sh_entsize | |
| // .symtab | |
| buf << u32_bytes(sh_names[5]) // sh_name | |
| buf << u32_bytes(2) // sh_type = SHT_SYMTAB | |
| buf << u64_bytes(0) // sh_flags | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(symtab_off)) // sh_offset | |
| buf << u64_bytes(u64(symtab_sz)) // sh_size | |
| buf << u32_bytes(6) // sh_link (.strtab) | |
| buf << u32_bytes(1) // sh_info (first non-local symbol) | |
| buf << u64_bytes(8) // sh_addralign | |
| buf << u64_bytes(24) // sh_entsize | |
| // .strtab | |
| buf << u32_bytes(sh_names[6]) // sh_name | |
| buf << u32_bytes(3) // sh_type = SHT_STRTAB | |
| buf << u64_bytes(0) // sh_flags | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(strtab_off)) // sh_offset | |
| buf << u64_bytes(u64(strtab_sz)) // sh_size | |
| buf << u32_bytes(0) // sh_link | |
| buf << u32_bytes(0) // sh_info | |
| buf << u64_bytes(1) // sh_addralign | |
| buf << u64_bytes(0) // sh_entsize | |
| // .shstrtab | |
| buf << u32_bytes(sh_names[7]) // sh_name | |
| buf << u32_bytes(3) // sh_type = SHT_STRTAB | |
| buf << u64_bytes(0) // sh_flags | |
| buf << u64_bytes(0) // sh_addr | |
| buf << u64_bytes(u64(shstr_off)) // sh_offset | |
| buf << u64_bytes(u64(shstr_sz)) // sh_size | |
| buf << u32_bytes(0) // sh_link | |
| buf << u32_bytes(0) // sh_info | |
| buf << u64_bytes(1) // sh_addralign | |
| buf << u64_bytes(0) // sh_entsize | |
| return buf | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import dwarfwriter | |
| import os | |
| fn main() { | |
| mut lines := []dwarfwriter.LineEntry{} | |
| lines << dwarfwriter.LineEntry{ | |
| addr: 0x1000 | |
| file: 'main.c' | |
| line: 42 | |
| } | |
| lines << dwarfwriter.LineEntry{ | |
| addr: 0x2000 | |
| file: 'main.c' | |
| line: 53 | |
| } | |
| lines << dwarfwriter.LineEntry{ | |
| addr: 0x3000 | |
| file: 'foo.c' | |
| line: 63 | |
| } | |
| mut symbols := []dwarfwriter.SymEntry{} | |
| symbols << dwarfwriter.SymEntry{ | |
| addr: 0x1000 | |
| symbol: 'main' | |
| } | |
| symbols << dwarfwriter.SymEntry{ | |
| addr: 0x2000 | |
| symbol: 'check' | |
| } | |
| elf_buf := dwarfwriter.create_elf_with_dwarf(lines, symbols) | |
| println('Generated ELF buffer of size ${elf_buf.len}') | |
| os.write_bytes('dwarf.elf', elf_buf)! | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment