19h · February 11, 2026 19:35
diff --git a/analyze_v850_relocs.py b/analyze_v850_relocs.py
 #!/usr/bin/env python3
 """
 Analyze v850/RH850 relocations across all compiled object files.

 For each relocation in .text* sections:
  - Read the instruction bytes at the relocation offset
  - Determine the relocation type and its bit mask
  - Report per-relocation-type statistics and the exact mask to apply

 This directly tells us which instruction bits are position-dependent
 and must be masked out in Lumina signatures.
 """

 import subprocess
 import struct
 import sys
 import os
 from collections import defaultdict
 from pathlib import Path

 OBJDIR = Path("/home/null/dev/gcc/test-programs/v850-out")
 READELF = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-readelf"
 OBJDUMP = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objdump"
 OBJCOPY = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objcopy"

 # ============================================================================
 # Relocation type definitions
 # Maps relocation type number -> (name, size_bytes, mask_in_little_endian_bytes)
 #
 # The "mask" here is: which bits of the instruction encode the relocated value.
 # These are the bits that CHANGE when the binary is linked at a different address.
 # For Lumina, these bits must be zeroed/masked out in the signature.
 #
 # Derived from binutils source: bfd/elf32-v850.c v850_elf_reloc() function
 # and the howto tables.
 # ============================================================================

 # Standard V850 relocations (types 0-51)
 # RH850 relocations (types 0x30+)
 # We handle both since our .o files use RH850 variant

 def compute_mask_le_bytes(mask32, nbytes):
    """Convert a 32-bit mask to little-endian byte array of nbytes."""
    result = []
    for i in range(nbytes):
        result.append((mask32 >> (i * 8)) & 0xFF)
    return bytes(result)

 # For each relocation type, define:
 #   (name, size_in_bytes, mask_as_32bit_le_int, is_pc_relative, description)
 # The mask represents which bits of the instruction word(s) contain the relocated value.
 # For 16-bit instructions, only the lower 16 bits of the mask matter.
 # For 32-bit instructions, the full mask applies.
 # For data relocations (ABS32, WORD), all bits are affected.

 RELOC_INFO = {
    # === Standard V850 relocations ===
    0:  ("R_V850_NONE",       0, 0x00000000, False, "No relocation"),
    1:  ("R_V850_9_PCREL",    2, 0x0000f870, True,  "9-bit PC-relative branch (Format III bCC) - bits[11:8,6:4] of 16-bit insn"),
    2:  ("R_V850_22_PCREL",   4, 0xfffe003f, True,  "22-bit PC-relative (Format V jr/jarl) - bits[31:17] | bits[5:0]"),
    3:  ("R_V850_HI16_S",     2, 0x0000ffff, False, "High 16 bits (signed adjust) - all 16 bits of the halfword"),
    4:  ("R_V850_HI16",       2, 0x0000ffff, False, "High 16 bits - all 16 bits of the halfword"),
    5:  ("R_V850_LO16",       2, 0x0000ffff, False, "Low 16 bits - all 16 bits of the halfword"),
    6:  ("R_V850_ABS32",      4, 0xffffffff, False, "Absolute 32-bit address"),
    7:  ("R_V850_16",         2, 0x0000ffff, False, "16-bit value"),
    8:  ("R_V850_8",          1, 0x000000ff, False, "8-bit value"),
    9:  ("R_V850_SDA_16_16_OFFSET",  2, 0x0000ffff, False, "SDA 16-bit offset (ld.b/st.b/movea)"),
    10: ("R_V850_SDA_15_16_OFFSET",  2, 0x0000fffe, False, "SDA 15-bit offset (ld.w/st.w) - bits[15:1]"),
    11: ("R_V850_ZDA_16_16_OFFSET",  2, 0x0000ffff, False, "ZDA 16-bit offset"),
    12: ("R_V850_ZDA_15_16_OFFSET",  2, 0x0000fffe, False, "ZDA 15-bit offset"),
    13: ("R_V850_TDA_6_8_OFFSET",    2, 0x0000007e, False, "TDA 6-bit offset (sst.w/sld.w) - bits[6:1]"),
    14: ("R_V850_TDA_7_8_OFFSET",    2, 0x0000007f, False, "TDA 7-bit offset (sst.h/sld.h) - bits[6:0]"),
    15: ("R_V850_TDA_7_7_OFFSET",    2, 0x0000007f, False, "TDA 7-bit offset (sst.b/sld.b) - bits[6:0]"),
    16: ("R_V850_TDA_16_16_OFFSET",  2, 0x0000ffff, False, "TDA 16-bit offset"),
    17: ("R_V850_TDA_4_5_OFFSET",    2, 0x0000000f, False, "TDA 4-bit offset (sld.hu) - bits[3:0]"),
    18: ("R_V850_TDA_4_4_OFFSET",    2, 0x0000000f, False, "TDA 4-bit offset (sld.bu) - bits[3:0]"),
    19: ("R_V850_SDA_16_16_SPLIT_OFFSET", 4, 0xfffe0020, False, "SDA split 16-bit (ld.bu) - bits[31:17]|bit[5]"),
    20: ("R_V850_ZDA_16_16_SPLIT_OFFSET", 4, 0xfffe0020, False, "ZDA split 16-bit (ld.bu) - bits[31:17]|bit[5]"),
    21: ("R_V850_CALLT_6_7_OFFSET",  2, 0x0000003f, False, "CALLT 6-bit offset - bits[5:0]"),
    22: ("R_V850_CALLT_16_16_OFFSET",2, 0x0000ffff, False, "CALLT 16-bit offset"),
    23: ("R_V850_GNU_VTINHERIT",     0, 0x00000000, False, "C++ vtable inheritance (no bits)"),
    24: ("R_V850_GNU_VTENTRY",       0, 0x00000000, False, "C++ vtable entry (no bits)"),
    25: ("R_V850_LONGCALL",          0, 0x00000000, True,  "Linker relaxation hint (no bits)"),
    26: ("R_V850_LONGJUMP",          0, 0x00000000, True,  "Linker relaxation hint (no bits)"),
    27: ("R_V850_ALIGN",             0, 0x00000000, False, "Alignment hint (no bits)"),
    28: ("R_V850_REL32",             4, 0xffffffff, True,  "32-bit PC-relative"),
    29: ("R_V850_LO16_SPLIT_OFFSET", 4, 0xfffe0020, False, "LO16 split (ld.bu) - bits[31:17]|bit[5]"),
    30: ("R_V850_16_PCREL",          2, 0x0000fffe, True,  "16-bit PC-relative (loop)"),
    31: ("R_V850_17_PCREL",          4, 0xfffe0010, True,  "17-bit PC-relative branch - bits[31:17]|bit[4]"),
    32: ("R_V850_23",                4, 0xffff07f0, False, "23-bit offset - bits[31:16]|bits[10:4]"),
    33: ("R_V850_32_PCREL",          4, 0xfffffffe, True,  "32-bit PC-relative - bits[31:1]"),
    34: ("R_V850_32_ABS",            4, 0xfffffffe, False, "32-bit absolute branch - bits[31:1]"),
    35: ("R_V850_16_SPLIT_OFFSET",   4, 0xfffe0020, False, "16-bit split offset (same as SDA split)"),
    36: ("R_V850_16_S1",             2, 0x0000fffe, False, "16-bit shifted by 1 - bits[15:1]"),
    37: ("R_V850_LO16_S1",           2, 0x0000fffe, False, "LO16 shifted by 1 - bits[15:1]"),
    38: ("R_V850_CALLT_15_16_OFFSET",2, 0x0000fffe, False, "CALLT 15-bit offset - bits[15:1]"),
    39: ("R_V850_32_GOTPCREL",       4, 0xffffffff, True,  "32-bit GOT PC-relative"),
    40: ("R_V850_16_GOT",            4, 0x0000ffff, False, "16-bit GOT offset"),
    41: ("R_V850_32_GOT",            4, 0xffffffff, False, "32-bit GOT offset"),
    42: ("R_V850_22_PLT",            4, 0x07ffff80, True,  "22-bit PLT relative"),
    43: ("R_V850_32_PLT",            4, 0xffffffff, True,  "32-bit PLT relative"),

    # === RH850 / V800 relocations (type numbers 0x30+) ===
    # These are indexed as (type - 0x30) in v800_elf_howto_table
    0x30: ("R_V810_NONE",     0, 0x00000000, False, "No relocation"),
    0x31: ("R_V810_BYTE",     1, 0x000000ff, False, "8-bit absolute"),
    0x32: ("R_V810_HWORD",    2, 0x0000ffff, False, "16-bit absolute"),
    0x33: ("R_V810_WORD",     4, 0xffffffff, False, "32-bit absolute"),
    0x34: ("R_V810_WLO",      2, 0x0000ffff, False, "Low 16 bits (movea) - all 16 bits of halfword"),
    0x35: ("R_V810_WHI",      2, 0x0000ffff, False, "High 16 bits (movhi) - all 16 bits of halfword"),
    0x36: ("R_V810_WHI1",     2, 0x0000ffff, False, "High 16 bits signed (movhi) - all 16 bits of halfword"),
    0x37: ("R_V810_GPBYTE",   1, 0x000000ff, False, "GP-relative 8-bit"),
    0x38: ("R_V810_GPHWORD",  2, 0x0000ffff, False, "GP-relative 16-bit"),
    0x39: ("R_V810_GPWORD",   4, 0xffffffff, False, "GP-relative 32-bit"),
    0x3a: ("R_V810_GPWLO",    2, 0x0000ffff, False, "GP-relative low 16"),
    0x3b: ("R_V810_GPWHI",    2, 0x0000ffff, False, "GP-relative high 16"),
    0x3c: ("R_V810_GPWHI1",   2, 0x0000ffff, False, "GP-relative high 16 signed"),
    0x3d: ("R_V850_HWLO",     2, 0x0000fffe, False, "Half-word low (shifted) - bits[15:1]"),
    0x3f: ("R_V850_EP7BIT",   2, 0x0000007f, False, "EP 7-bit"),  # actually 1-byte field in howto but 16-bit insn
    0x40: ("R_V850_EPHBYTE",  2, 0x0000007f, False, "EP halfbyte"),
    0x41: ("R_V850_EPWBYTE",  2, 0x0000007e, False, "EP wordbyte"),
    0x42: ("R_V850_REGHWLO",  2, 0x0000fffe, False, "Register region half-word low"),
    0x44: ("R_V850_GPHWLO",   2, 0x0000fffe, False, "GP half-word low"),
    0x46: ("R_V850_PCR22",    4, 0xfffe003f, True,  "22-bit PC-relative (jr/jarl) - bits[31:17]|bits[5:0]"),
    0x47: ("R_V850_BLO",      4, 0xfffe0020, False, "24-bit LO split"),
    0x48: ("R_V850_EP4BIT",   2, 0x0000000f, False, "EP 4-bit"),
    0x49: ("R_V850_EP5BIT",   2, 0x0000000f, False, "EP 5-bit (shifted)"),  # bits[3:0], shifted by 1
    0x4a: ("R_V850_REGBLO",   4, 0xfffe0020, False, "Reg region 24-bit LO split"),
    0x4b: ("R_V850_GPBLO",    4, 0xfffe0020, False, "GP 24-bit LO split"),
    0x4c: ("R_V810_WLO_1",    2, 0x0000fffe, False, "Low 16 bits shifted (ld.w/st.w) - bits[15:1]"),
    0x4d: ("R_V810_GPWLO_1",  2, 0x0000fffe, False, "GP low 16 shifted"),
    0x4e: ("R_V850_BLO_1",    4, 0xfffe0020, False, "24-bit LO split shifted"),
    0x4f: ("R_V850_HWLO_1",   2, 0x0000fffe, False, "Half-word low shifted"),
    0x51: ("R_V850_GPBLO_1",  4, 0xfffe0020, False, "GP 24-bit LO split shifted"),
    0x52: ("R_V850_GPHWLO_1", 2, 0x0000fffe, False, "GP half-word low shifted"),
    0x54: ("R_V850_EPBLO",    4, 0xfffe0020, False, "EP 24-bit LO split"),
    0x55: ("R_V850_EPHWLO",   2, 0x0000fffe, False, "EP half-word low"),
    0x57: ("R_V850_EPWLO_N",  2, 0x0000fffe, False, "EP word low N"),
    0x58: ("R_V850_PC32",     4, 0xfffffffe, True,  "32-bit PC-relative"),
    0x59: ("R_V850_W23BIT",   4, 0xffff07f0, False, "23-bit word offset"),
    0x5a: ("R_V850_GPW23BIT", 4, 0xffff07f0, False, "GP 23-bit word offset"),
    0x5b: ("R_V850_EPW23BIT", 4, 0xffff07f0, False, "EP 23-bit word offset"),
    0x5c: ("R_V850_B23BIT",   4, 0xffff07f0, False, "23-bit byte offset"),
    0x5d: ("R_V850_GPB23BIT", 4, 0xffff07f0, False, "GP 23-bit byte offset"),
    0x5e: ("R_V850_EPB23BIT", 4, 0xffff07f0, False, "EP 23-bit byte offset"),
    0x5f: ("R_V850_PC16U",    2, 0x0000fffe, True,  "16-bit PC-relative unsigned"),
    0x60: ("R_V850_PC17",     4, 0xfffe0010, True,  "17-bit PC-relative branch"),
    0x61: ("R_V850_DW8",      4, 0x00000000, False, "8-bit double-word (TODO)"),  # complex
    0x62: ("R_V850_GPDW8",    4, 0x00000000, False, "GP 8-bit double-word (TODO)"),
    0x63: ("R_V850_EPDW8",    4, 0x00000000, False, "EP 8-bit double-word (TODO)"),
    0x64: ("R_V850_PC9",      2, 0x0000f870, True,  "9-bit PC-relative (bCC) - bits[15:11,6:4]"),
    0x65: ("R_V810_REGBYTE",  1, 0x000000ff, False, "Reg region 8-bit"),
    0x66: ("R_V810_REGHWORD", 2, 0x0000ffff, False, "Reg region 16-bit"),
    0x67: ("R_V810_REGWORD",  4, 0xffffffff, False, "Reg region 32-bit"),
    0x68: ("R_V810_REGWLO",   2, 0x0000ffff, False, "Reg region low 16"),
    0x69: ("R_V810_REGWHI",   2, 0x0000ffff, False, "Reg region high 16"),
    0x6a: ("R_V810_REGWHI1",  2, 0x0000ffff, False, "Reg region high 16 signed"),
    0x6b: ("R_V850_REGW23BIT",4, 0xffff07f0, False, "Reg region 23-bit word"),
    0x6c: ("R_V850_REGB23BIT",4, 0xffff07f0, False, "Reg region 23-bit byte"),
    0x6d: ("R_V850_REGDW8",   4, 0x00000000, False, "Reg region 8-bit double-word"),
    0x6e: ("R_V810_EPBYTE",   1, 0x000000ff, False, "EP 8-bit"),
    0x6f: ("R_V810_EPHWORD",  2, 0x0000ffff, False, "EP 16-bit"),
    0x70: ("R_V810_EPWORD",   4, 0xffffffff, False, "EP 32-bit"),
    0x71: ("R_V850_WLO23",    4, 0xffff07f0, False, "23-bit word LO"),  # same encoding as R_V850_23
    0x72: ("R_V850_WORD_E",   4, 0xffffffff, False, "32-bit word (extended)"),
    0x73: ("R_V850_REGWORD_E",4, 0xffffffff, False, "Reg region 32-bit (extended)"),
    0x74: ("R_V850_WORD",     4, 0xffffffff, False, "32-bit word"),
    0x75: ("R_V850_GPWORD",   4, 0xffffffff, False, "GP 32-bit word"),
    0x76: ("R_V850_REGWORD2", 4, 0xffffffff, False, "Reg region 32-bit (#2)"),
    0x77: ("R_V850_EPWORD2",  4, 0xffffffff, False, "EP 32-bit word (#2)"),
    0x78: ("R_V810_TPBYTE",   1, 0x000000ff, False, "TP 8-bit"),
    0x79: ("R_V810_TPHWORD",  2, 0x0000ffff, False, "TP 16-bit"),
    0x7a: ("R_V810_TPWORD",   4, 0xffffffff, False, "TP 32-bit"),
    0x7b: ("R_V810_TPWLO",    2, 0x0000ffff, False, "TP low 16"),
    0x7c: ("R_V810_TPWHI",    2, 0x0000ffff, False, "TP high 16"),
    0x7d: ("R_V810_TPWHI1",   2, 0x0000ffff, False, "TP high 16 signed"),
    0xa0: ("R_V810_ABS32",    4, 0xffffffff, False, "Absolute 32-bit"),
 }


 def parse_readelf_relocs(objfile):
    """Parse relocations from readelf -r output.
    Returns list of (section_name, offset, reloc_type_num, reloc_type_name, sym_name, addend)
    """
    result = subprocess.run(
        [READELF, "-r", str(objfile)],
        capture_output=True, text=True
    )
    
    relocs = []
    current_section = None
    
    for line in result.stdout.splitlines():
        line = line.strip()
        if line.startswith("Relocation section '"):
            # Extract section name: Relocation section '.rela.text' at offset ...
            current_section = line.split("'")[1]  # e.g., '.rela.text'
            # Remove the .rela prefix to get the actual section
            if current_section.startswith(".rela."):
                current_section = "." + current_section[6:]
            elif current_section.startswith(".rel."):
                current_section = "." + current_section[5:]
            continue
        
        if not current_section:
            continue
            
        # Only care about .text sections
        if not current_section.startswith(".text"):
            continue
        
        # Parse relocation entries like:
        # 00000004  00000536 R_V810_WHI1       00000000   .rodata + 0
        parts = line.split()
        if len(parts) < 5:
            continue
        
        try:
            offset = int(parts[0], 16)
        except ValueError:
            continue
        
        info = int(parts[1], 16)
        reloc_type_num = info & 0xFF  # Lower 8 bits for 32-bit ELF
        reloc_type_name = parts[2]
        sym_name = parts[4] if len(parts) > 4 else ""
        
        # Parse addend
        addend = 0
        if "+" in line:
            try:
                addend_str = line.split("+")[-1].strip()
                addend = int(addend_str, 16)
            except ValueError:
                pass
        
        relocs.append((current_section, offset, reloc_type_num, reloc_type_name, sym_name, addend))
    
    return relocs


 def get_section_bytes(objfile, section_name):
    """Extract raw bytes of a section using objcopy."""
    import tempfile
    with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
        tmpname = tmp.name
    
    try:
        # objcopy to extract section
        result = subprocess.run(
            [OBJCOPY, "-O", "binary", "-j", section_name, str(objfile), tmpname],
            capture_output=True, text=True
        )
        if result.returncode != 0:
            return None
        
        with open(tmpname, "rb") as f:
            return f.read()
    finally:
        try:
            os.unlink(tmpname)
        except:
            pass


 def get_section_bytes_via_readelf(objfile, section_name):
    """Alternative: get section offset and size from readelf -S, then read from the ELF."""
    result = subprocess.run(
        [READELF, "-S", str(objfile)],
        capture_output=True, text=True
    )
    
    for line in result.stdout.splitlines():
        if section_name in line:
            # Parse the section header line
            # Format varies, but we need offset and size
            parts = line.split()
            # Find the section - readelf -S output format:
            # [Nr] Name Type Addr Off Size ...
            try:
                # Find offset and size in hex
                idx = None
                for i, p in enumerate(parts):
                    if p == section_name:
                        idx = i
                        break
                if idx is not None:
                    # Next fields after name: Type, Addr, Off, Size
                    off_hex = parts[idx + 3]  # offset in file
                    size_hex = parts[idx + 4]  # size
                    offset = int(off_hex, 16)
                    size = int(size_hex, 16)
                    
                    with open(objfile, "rb") as f:
                        f.seek(offset)
                        return f.read(size)
            except (IndexError, ValueError):
                pass
    
    return None


 def disassemble_at(objfile, section_name, offset, nbytes=4):
    """Get disassembly at a specific offset."""
    result = subprocess.run(
        [OBJDUMP, "-d", "-j", section_name, "--start-address=0x%x" % offset,
         "--stop-address=0x%x" % (offset + nbytes), str(objfile)],
        capture_output=True, text=True
    )
    return result.stdout


 def analyze_all_objects():
    """Main analysis: iterate all .o files, extract relocations, compute masks."""
    
    ofiles = sorted(OBJDIR.glob("*.o"))
    if not ofiles:
        print("ERROR: No .o files found in", OBJDIR)
        sys.exit(1)
    
    print(f"Analyzing {len(ofiles)} object files...")
    print("=" * 80)
    
    # Statistics
    reloc_type_counts = defaultdict(int)  # type_num -> count
    reloc_type_names = {}  # type_num -> name
    reloc_type_examples = defaultdict(list)  # type_num -> [(file, section, offset, insn_bytes)]
    total_relocs = 0
    total_text_relocs = 0
    
    for ofile in ofiles:
        relocs = parse_readelf_relocs(ofile)
        
        # Cache section bytes
        section_cache = {}
        
        for section, offset, rtype, rtypename, symname, addend in relocs:
            total_text_relocs += 1
            reloc_type_counts[rtype] += 1
            reloc_type_names[rtype] = rtypename
            
            # Get section bytes if not cached
            if section not in section_cache:
                section_cache[section] = get_section_bytes(ofile, section)
            
            sec_bytes = section_cache[section]
            if sec_bytes is None:
                continue
            
            # Get instruction bytes at the relocation offset
            info = RELOC_INFO.get(rtype)
            if info is None:
                nbytes = 4  # default
            else:
                nbytes = info[1]
                if nbytes == 0:
                    continue  # no-op relocation
            
            if offset + nbytes <= len(sec_bytes):
                insn_bytes = sec_bytes[offset:offset + nbytes]
                
                # Only keep a few examples per type
                if len(reloc_type_examples[rtype]) < 5:
                    reloc_type_examples[rtype].append(
                        (ofile.name, section, offset, insn_bytes, symname, addend)
                    )
    
    # Print results
    print(f"\nTotal .text relocations across {len(ofiles)} files: {total_text_relocs}")
    print()
    
    # Sort by count descending
    sorted_types = sorted(reloc_type_counts.items(), key=lambda x: -x[1])
    
    print("=" * 100)
    print(f"{'Type#':>6} {'Name':<35} {'Count':>6} {'Size':>4} {'Mask (hex)':>12} {'PCrel':>5} {'Description'}")
    print("=" * 100)
    
    for rtype, count in sorted_types:
        info = RELOC_INFO.get(rtype)
        if info:
            name, nbytes, mask, pcrel, desc = info
        else:
            name = reloc_type_names.get(rtype, f"UNKNOWN_{rtype:#x}")
            nbytes = "?"
            mask = "?"
            pcrel = "?"
            desc = "Unknown relocation type"
        
        mask_str = f"0x{mask:08x}" if isinstance(mask, int) else str(mask)
        pcrel_str = "Y" if pcrel else "N"
        size_str = str(nbytes)
        
        print(f"  {rtype:#04x} {name:<35} {count:>6} {size_str:>4}B {mask_str:>12} {pcrel_str:>5}  {desc}")
    
    print("=" * 100)
    
    # Print examples for each type
    print("\n\nDETAILED EXAMPLES PER RELOCATION TYPE:")
    print("=" * 100)
    
    for rtype, count in sorted_types:
        info = RELOC_INFO.get(rtype)
        if info:
            name, nbytes, mask, pcrel, desc = info
        else:
            name = reloc_type_names.get(rtype, f"UNKNOWN_{rtype:#x}")
            nbytes = 0
            mask = 0
            pcrel = False
            desc = "Unknown"
        
        examples = reloc_type_examples[rtype]
        if not examples:
            continue
        
        print(f"\n--- {name} ({rtype:#04x}) - {count} occurrences ---")
        print(f"    Mask: 0x{mask:08x} ({nbytes}B), PC-relative: {pcrel}")
        
        for fname, section, offset, insn_bytes, symname, addend in examples:
            hex_bytes = " ".join(f"{b:02x}" for b in insn_bytes)
            
            # Show which bytes would be masked
            if isinstance(mask, int) and nbytes > 0:
                masked = []
                for i in range(len(insn_bytes)):
                    byte_mask = (mask >> (i * 8)) & 0xFF
                    masked_byte = insn_bytes[i] & ~byte_mask
                    if byte_mask == 0xFF:
                        masked.append("**")
                    elif byte_mask == 0x00:
                        masked.append(f"{insn_bytes[i]:02x}")
                    else:
                        masked.append(f"{masked_byte:02x}+")
                masked_str = " ".join(masked)
            else:
                masked_str = ""
            
            addend_str = f"+{addend:#x}" if addend else ""
            print(f"    {fname}:{section}+{offset:#06x}  bytes=[{hex_bytes}]  masked=[{masked_str}]  sym={symname}{addend_str}")
    
    # =====================================================================
    # LUMINA MASK SUMMARY
    # =====================================================================
    print("\n\n")
    print("=" * 100)
    print("LUMINA SIGNATURE MASK SUMMARY")
    print("=" * 100)
    print()
    print("For Lumina v850 support, the following instruction bytes must be")
    print("masked (zeroed) at each relocation offset:")
    print()
    print(f"{'Reloc Type':<40} {'Size':>4} {'LE Byte Mask':<20} {'Notes'}")
    print("-" * 100)
    
    # Only print types that were actually encountered
    for rtype, count in sorted_types:
        info = RELOC_INFO.get(rtype)
        if not info:
            print(f"  UNKNOWN type {rtype:#04x} ({reloc_type_names.get(rtype, '?')}) - {count} occurrences - NEEDS INVESTIGATION")
            continue
        
        name, nbytes, mask, pcrel, desc = info
        if nbytes == 0:
            continue  # skip no-op relocs
        
        # Show byte-level mask
        byte_masks = []
        for i in range(nbytes):
            byte_masks.append(f"{(mask >> (i * 8)) & 0xFF:02x}")
        byte_mask_str = " ".join(byte_masks)
        
        pcrel_note = " [PC-rel]" if pcrel else ""
        print(f"  {name:<38} {nbytes:>4}B  [{byte_mask_str}]{'':>{18-len(byte_mask_str)}} {desc}{pcrel_note}")
    
    print("-" * 100)
    print()
    print("INTERPRETATION: At each relocation offset, mask out (zero) the bits")
    print("indicated by the byte mask above. For example, for R_V850_PCR22:")
    print("  Bytes at offset: [XX XX XX XX] (little-endian 32-bit)")
    print("  Mask:            [3f 00 fe ff] = 0xfffe003f")
    print("  Keep:  bits that are 0 in the mask (opcode, register fields)")
    print("  Clear: bits that are 1 in the mask (relocated operand)")
    

    return reloc_type_counts, sorted_types


 if __name__ == "__main__":
    analyze_all_objects()
diff --git a/empirical_diff.py b/empirical_diff.py
 #!/usr/bin/env python3
 """
 Empirical verification: Link v850 object files at different base addresses,
 extract .text sections, and binary-diff to find which bytes actually change.
 Cross-reference with relocation data to verify masks.
 """

 import subprocess
 import struct
 import sys
 import os
 import tempfile
 from collections import defaultdict
 from pathlib import Path

 OBJDIR = Path("/home/null/dev/gcc/test-programs/v850-out")
 LINKDIR = Path("/home/null/dev/gcc/test-programs/v850-linked")
 LD = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-ld"
 OBJCOPY = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objcopy"
 OBJDUMP = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objdump"
 READELF = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-readelf"

 BASES = [0x00000000, 0x00100000, 0x10000000]

 # Linker script template
 LDSCRIPT_TEMPLATE = """
 OUTPUT_FORMAT("elf32-v850-rh850", "elf32-v850-rh850", "elf32-v850-rh850")
 OUTPUT_ARCH(v850:rh850)
 ENTRY(_main)
 SECTIONS {{
    . = {base:#010x};
    .text : {{ *(.text .text.* .text.startup .text.startup.*) }}
    . = ALIGN(4);
    .rodata : {{ *(.rodata .rodata.*) }}
    . = ALIGN(4);
    .data : {{ *(.data .data.*) }}
    . = ALIGN(4);
    .bss : {{ *(.bss .bss.* COMMON) }}
    /DISCARD/ : {{ *(.debug_* .comment .note.* .eh_frame .gcc_except_table*) }}
 }}
 """


 def link_at_base(ofile, base, outdir):
    """Link an object file at a given base address. Returns path to .text binary."""
    stem = ofile.stem
    
    # Write linker script
    ldscript = outdir / f"{stem}_base{base:#x}.ld"
    with open(ldscript, "w") as f:
        f.write(LDSCRIPT_TEMPLATE.format(base=base))
    
    elf = outdir / f"{stem}_base{base:#x}.elf"
    textbin = outdir / f"{stem}_base{base:#x}.text.bin"
    
    # Link
    result = subprocess.run(
        [LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
         "-o", str(elf), str(ofile)],
        capture_output=True, text=True
    )
    
    if not elf.exists():
        return None
    
    # Extract .text
    result = subprocess.run(
        [OBJCOPY, "-O", "binary", "-j", ".text", str(elf), str(textbin)],
        capture_output=True, text=True
    )
    
    if not textbin.exists() or textbin.stat().st_size == 0:
        return None
    
    # Clean up
    ldscript.unlink(missing_ok=True)
    elf.unlink(missing_ok=True)
    
    return textbin


 def parse_readelf_relocs(objfile):
    """Parse .text relocations. Returns list of (offset, reloc_type_num, reloc_type_name, sym)."""
    result = subprocess.run(
        [READELF, "-r", str(objfile)],
        capture_output=True, text=True
    )
    
    relocs = []
    current_section = None
    
    for line in result.stdout.splitlines():
        line = line.strip()
        if line.startswith("Relocation section '"):
            current_section = line.split("'")[1]
            if current_section.startswith(".rela."):
                current_section = "." + current_section[6:]
            elif current_section.startswith(".rel."):
                current_section = "." + current_section[5:]
            continue
        
        if not current_section or not current_section.startswith(".text"):
            continue
        
        parts = line.split()
        if len(parts) < 5:
            continue
        
        try:
            offset = int(parts[0], 16)
        except ValueError:
            continue
        
        info = int(parts[1], 16)
        reloc_type_num = info & 0xFF
        reloc_type_name = parts[2]
        sym_name = parts[4] if len(parts) > 4 else ""
        
        relocs.append((offset, reloc_type_num, reloc_type_name, sym_name))
    
    return relocs


 def get_section_offsets(objfile):
    """Get the offsets of .text subsections to compute the combined .text layout.
    
    When linking, all .text.* sections get merged into .text.
    We need to know the order and offsets to map relocations correctly.
    
    Returns: dict mapping section_name -> offset_in_combined_text
    """
    # The simplest approach: link at base=0, then read the map
    # But we can also check objdump of the linked elf
    # For now, since all sections merge linearly, we can just assume
    # the relocation offsets are relative to their containing section,
    # and the linker preserves order.
    # 
    # Actually, for simplicity, let's just parse relocations from a linked 
    # ELF at base 0, compare to base N, and diff the raw .text bytes.
    pass


 def diff_bytes(bytes0, bytes1):
    """Compare two byte arrays. Returns list of (offset, byte0, byte1)."""
    diffs = []
    minlen = min(len(bytes0), len(bytes1))
    for i in range(minlen):
        if bytes0[i] != bytes1[i]:
            diffs.append((i, bytes0[i], bytes1[i]))
    return diffs


 def compute_diff_mask(bytes_list):
    """Given a list of byte arrays (same length), compute per-byte mask of changing bits.
    Returns byte array where 1 bits indicate positions that changed."""
    if not bytes_list or len(bytes_list) < 2:
        return None
    
    ref = bytes_list[0]
    mask = bytearray(len(ref))
    
    for other in bytes_list[1:]:
        for i in range(len(ref)):
            mask[i] |= (ref[i] ^ other[i])
    
    return bytes(mask)


 def main():
    LINKDIR.mkdir(parents=True, exist_ok=True)
    
    ofiles = sorted(OBJDIR.glob("*.o"))
    print(f"Processing {len(ofiles)} object files...")
    print(f"Base addresses: {[hex(b) for b in BASES]}")
    print()
    
    total_diff_bytes = 0
    total_text_bytes = 0
    
    # Per-relocation-type verification
    # For each reloc at an offset, check if the diff mask matches the expected mask
    verified_types = defaultdict(lambda: {"match": 0, "mismatch": 0, "examples": []})
    
    # Reloc type info (subset - the ones we actually see)
    EXPECTED_MASKS = {
        # type_num: (name, size_bytes, le_mask_int)
        0x36: ("R_V810_WHI1",   2, 0x0000ffff),
        0x34: ("R_V810_WLO",    2, 0x0000ffff),
        0x46: ("R_V850_PCR22",  4, 0xfffe003f),
        0x4c: ("R_V810_WLO_1",  2, 0x0000fffe),
        0x47: ("R_V850_BLO",    4, 0xfffe0020),
        # Standard V850 types (in case any appear)
        1:    ("R_V850_9_PCREL", 2, 0x0000f870),
        2:    ("R_V850_22_PCREL", 4, 0xfffe003f),  # same bit pattern as PCR22 in code
        3:    ("R_V850_HI16_S", 2, 0x0000ffff),
        5:    ("R_V850_LO16",   2, 0x0000ffff),
        6:    ("R_V850_ABS32",  4, 0xffffffff),
    }
    
    for ofile in ofiles:
        # Link at all base addresses
        text_bins = {}
        for base in BASES:
            binpath = link_at_base(ofile, base, LINKDIR)
            if binpath:
                with open(binpath, "rb") as f:
                    text_bins[base] = f.read()
                binpath.unlink(missing_ok=True)
        
        if len(text_bins) < 2:
            print(f"  SKIP {ofile.name}: could not link at enough bases")
            continue
        
        # All should be same size
        sizes = set(len(v) for v in text_bins.values())
        if len(sizes) != 1:
            print(f"  WARN {ofile.name}: different .text sizes: {sizes}")
            continue
        
        text_size = sizes.pop()
        total_text_bytes += text_size
        
        # Compute diff mask across all bases
        bytes_list = [text_bins[b] for b in sorted(text_bins.keys())]
        diff_mask = compute_diff_mask(bytes_list)
        
        # Count changed bytes
        changed_bytes = sum(1 for b in diff_mask if b != 0)
        total_diff_bytes += changed_bytes
        
        if changed_bytes == 0:
            # No relocations applied (or all PC-relative within same section)
            continue
        
        # Get relocation info from the .o file
        relocs = parse_readelf_relocs(ofile)
        
        # For each relocation, check if the diff mask matches
        # Note: reloc offsets in the .o are relative to each section.
        # When linked, multiple .text sections get concatenated.
        # We need to figure out the combined offset.
        
        # Get the section layout from objdump of a linked ELF to map section offsets
        # Actually, let's just link one more time and check objdump
        # Simpler: use the diff mask directly and check byte-by-byte
        
        # Report diff positions
        diff_positions = [(i, diff_mask[i]) for i in range(len(diff_mask)) if diff_mask[i] != 0]
        
        if len(diff_positions) > 0:
            # Group consecutive diffs into ranges
            ranges = []
            start = diff_positions[0][0]
            end = start
            for pos, mask_byte in diff_positions[1:]:
                if pos == end + 1:
                    end = pos
                else:
                    ranges.append((start, end))
                    start = pos
                    end = pos
            ranges.append((start, end))
            
            # Show bytes from base0 at diff positions
            base0_bytes = bytes_list[0]
            base1_bytes = bytes_list[1]
            
            print(f"\n{ofile.name}: {text_size} bytes, {changed_bytes} bytes differ ({changed_bytes*100/text_size:.1f}%)")
            print(f"  Changed ranges: {len(ranges)}")
            
            if len(ranges) <= 20:
                for start, end in ranges:
                    nbytes = end - start + 1
                    b0 = " ".join(f"{base0_bytes[i]:02x}" for i in range(start, end+1))
                    b1 = " ".join(f"{base1_bytes[i]:02x}" for i in range(start, end+1))
                    dm = " ".join(f"{diff_mask[i]:02x}" for i in range(start, end+1))
                    print(f"    offset {start:#06x}-{end:#06x} ({nbytes}B): base0=[{b0}] base1=[{b1}] mask=[{dm}]")
            else:
                print(f"  (too many ranges to show individually)")
    
    print("\n" + "=" * 80)
    print(f"SUMMARY: {total_text_bytes} total .text bytes across all files")
    print(f"         {total_diff_bytes} bytes changed ({total_diff_bytes*100/total_text_bytes:.1f}% of total)")
    print()
    
    # Now do the detailed cross-reference:
    # Link each file at base0 and base1, get relocations, and verify each relocation's mask
    print("\n" + "=" * 80)
    print("CROSS-REFERENCE: Verifying relocation masks empirically")
    print("=" * 80)
    
    # For this we need to know the exact offset mapping between the .o relocations
    # and the combined linked .text. Let's do this properly by examining the linked ELF.
    
    verified_count = 0
    mismatch_count = 0
    
    for ofile in ofiles:
        # Link at base 0 and base 0x10000000 (far enough to see changes in HI16)
        text_bins = {}
        elf_paths = {}
        
        for base in [0x00000000, 0x10000000]:
            stem = ofile.stem
            ldscript = LINKDIR / f"_verify_{stem}_{base:#x}.ld"
            elf = LINKDIR / f"_verify_{stem}_{base:#x}.elf"
            
            with open(ldscript, "w") as f:
                f.write(LDSCRIPT_TEMPLATE.format(base=base))
            
            result = subprocess.run(
                [LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
                 "-o", str(elf), str(ofile)],
                capture_output=True, text=True
            )
            
            if elf.exists():
                elf_paths[base] = elf
                textbin = LINKDIR / f"_verify_{stem}_{base:#x}.text.bin"
                subprocess.run(
                    [OBJCOPY, "-O", "binary", "-j", ".text", str(elf), str(textbin)],
                    capture_output=True, text=True
                )
                if textbin.exists():
                    with open(textbin, "rb") as f:
                        text_bins[base] = f.read()
                    textbin.unlink(missing_ok=True)
            
            ldscript.unlink(missing_ok=True)
        
        if len(text_bins) < 2:
            for p in elf_paths.values():
                p.unlink(missing_ok=True)
            continue
        
        base0_bytes = text_bins[0x00000000]
        base1_bytes = text_bins[0x10000000]
        
        if len(base0_bytes) != len(base1_bytes):
            for p in elf_paths.values():
                p.unlink(missing_ok=True)
            continue
        
        # Compute per-byte XOR mask
        xor_mask = bytearray(len(base0_bytes))
        for i in range(len(base0_bytes)):
            xor_mask[i] = base0_bytes[i] ^ base1_bytes[i]
        
        # Get section layout from the linked ELF to map reloc offsets
        # Parse objdump -h to get section positions
        if 0x00000000 in elf_paths:
            result = subprocess.run(
                [OBJDUMP, "-h", str(elf_paths[0x00000000])],
                capture_output=True, text=True
            )
            
            # Parse section VMA and file offsets
            # We need to understand how .text subsections are laid out
            # Since we told the linker to put all .text* into .text,
            # the final .text is the concatenation.
            
            # Get the .text VMA
            text_vma = None
            text_size_linked = None
            for line in result.stdout.splitlines():
                parts = line.split()
                if len(parts) >= 6 and parts[1] == ".text":
                    text_size_linked = int(parts[2], 16)
                    text_vma = int(parts[3], 16)
                    break
        
        # Now get relocations from the ORIGINAL .o file
        # The tricky part: reloc offsets in the .o file are per-section
        # (.text, .text.startup, etc.) but in the linked ELF they're combined.
        # We need to know the offset of each input section in the combined .text.
        
        # Use -M to get the map, or use objdump to find function starts
        # Simplest: use readelf -S on the .o to get section sizes, assume they're
        # concatenated in the order they appear.
        
        result = subprocess.run(
            [READELF, "-S", str(ofile)],
            capture_output=True, text=True
        )
        
        # Parse section sizes
        text_sections_ordered = []  # (section_name, size)
        for line in result.stdout.splitlines():
            # Typical format:
            # [ 1] .text             PROGBITS        00000000 000034 000254 00  AX  0   0  2
            # [ 2] .rela.text        RELA            ...
            parts = line.split()
            for i, p in enumerate(parts):
                if p.startswith(".text") and not p.startswith(".text.") and i+1 < len(parts):
                    # Check it's a PROGBITS section
                    if "PROGBITS" in line:
                        sec_name = p
                        # Find size field - it's the 3rd hex number after section name
                        # Format: Name Type Addr Off Size ...
                        idx = parts.index(p)
                        try:
                            # Could be tricky with column alignment, try a different approach
                            pass
                        except:
                            pass
        
        # Actually, let's do it more robustly by looking at readelf -S --wide
        result = subprocess.run(
            [READELF, "-S", "--wide", str(ofile)],
            capture_output=True, text=True
        )
        
        text_sections = {}  # name -> size
        for line in result.stdout.splitlines():
            line = line.strip()
            if "PROGBITS" not in line:
                continue
            # Parse: [Nr] Name Type Addr Off Size ...
            # Example: [ 1] .text PROGBITS 00000000 000034 000254 00 AX 0 0 2
            parts = line.split()
            sec_name = None
            for i, p in enumerate(parts):
                if p.startswith(".text"):
                    sec_name = p
                    # The fields after Name are: Type Addr Off Size
                    # Find the PROGBITS index
                    pb_idx = parts.index("PROGBITS")
                    try:
                        # After PROGBITS: Addr, Off, Size
                        size = int(parts[pb_idx + 3], 16)
                        text_sections[sec_name] = size
                    except (IndexError, ValueError):
                        pass
                    break
        
        # Get relocation data per section
        result = subprocess.run(
            [READELF, "-r", str(ofile)],
            capture_output=True, text=True
        )
        
        relocs_by_section = defaultdict(list)
        current_section = None
        
        for line in result.stdout.splitlines():
            line = line.strip()
            if line.startswith("Relocation section '"):
                current_section = line.split("'")[1]
                if current_section.startswith(".rela."):
                    current_section = "." + current_section[6:]
                elif current_section.startswith(".rel."):
                    current_section = "." + current_section[5:]
                continue
            
            if not current_section or not current_section.startswith(".text"):
                continue
            
            parts = line.split()
            if len(parts) < 5:
                continue
            
            try:
                offset = int(parts[0], 16)
            except ValueError:
                continue
            
            info = int(parts[1], 16)
            reloc_type_num = info & 0xFF
            reloc_type_name = parts[2]
            
            relocs_by_section[current_section].append((offset, reloc_type_num, reloc_type_name))
        
        # Now compute the combined offset for each section
        # The linker script says: *(.text .text.* .text.startup .text.startup.*)
        # The order depends on the linker. Let's determine it from the linked ELF.
        
        # Use nm on the linked ELF to find function addresses
        if 0x00000000 in elf_paths:
            result = subprocess.run(
                [OBJDUMP, "-d", str(elf_paths[0x00000000])],
                capture_output=True, text=True
            )
            
            # Find the start of each function to determine section offsets
            # For a simpler approach: the sections are placed in the order they
            # appear in the wildcard pattern. Since we use *(.text .text.*),
            # .text comes first, then .text.startup, etc.
            
            # But actually, the order within *(.text .text.*) depends on input order.
            # Let's just verify by checking which bytes at reloc offsets differ.
            
            # The safest approach: for each relocation, check at offset ± some range
            # in the combined .text whether the diff matches.
            # 
            # OR: use the linked ELF at base0 with objdump to see what's at each address,
            #     and match function names to find section offsets.
        
        # Simpler approach: just compute the section offset mapping
        # by looking at the linked ELF's symbol table
        # We know function addresses from nm, and function starts in the .o from readelf
        
        # Actually, the SIMPLEST reliable approach:
        # Link with -Map to get a linker map file
        mapfile = LINKDIR / f"_verify_{ofile.stem}_map.txt"
        ldscript = LINKDIR / f"_verify_{ofile.stem}_map.ld"
        elf = LINKDIR / f"_verify_{ofile.stem}_map.elf"
        
        with open(ldscript, "w") as f:
            f.write(LDSCRIPT_TEMPLATE.format(base=0))
        
        result = subprocess.run(
            [LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
             "-Map", str(mapfile), "-o", str(elf), str(ofile)],
            capture_output=True, text=True
        )
        
        # Parse the map file to find section placements
        section_offsets = {}  # input_section_name -> offset in combined .text
        
        if mapfile.exists():
            map_content = mapfile.read_text()
            # Look for lines like:
            # .text           0x0000000000000000      0x254 /path/to/file.o(.text)
            # .text.startup   0x0000000000000254       0x20 /path/to/file.o(.text.startup)
            in_text = False
            for line in map_content.splitlines():
                if line.startswith(".text") and "0x" in line:
                    parts = line.split()
                    if len(parts) >= 3 and "(" in line:
                        # Extract the input section name from parentheses
                        paren_start = line.index("(")
                        paren_end = line.index(")")
                        input_sec = line[paren_start+1:paren_end]
                        
                        # Extract VMA
                        addr_str = parts[1]
                        try:
                            vma = int(addr_str, 16)
                            section_offsets[input_sec] = vma  # offset from base=0
                        except ValueError:
                            pass
            
            mapfile.unlink(missing_ok=True)
        
        ldscript.unlink(missing_ok=True)
        elf.unlink(missing_ok=True)
        
        # Now verify each relocation
        for sec_name, reloc_list in relocs_by_section.items():
            base_offset = section_offsets.get(sec_name, None)
            if base_offset is None:
                # Try without leading dot variations
                continue
            
            for offset, rtype, rtypename in reloc_list:
                combined_offset = base_offset + offset
                
                info = EXPECTED_MASKS.get(rtype)
                if info is None:
                    continue
                
                name, nbytes, expected_mask = info
                
                if combined_offset + nbytes > len(base0_bytes):
                    continue
                
                # Get the actual XOR at this location
                actual_xor = 0
                for i in range(nbytes):
                    actual_xor |= xor_mask[combined_offset + i] << (i * 8)
                
                # The actual XOR should be a SUBSET of the expected mask
                # (some bits might not change if the specific relocated value 
                # happens to have those bits the same at both addresses)
                if (actual_xor & ~expected_mask) == 0:
                    verified_types[rtype]["match"] += 1
                    verified_count += 1
                else:
                    verified_types[rtype]["mismatch"] += 1
                    mismatch_count += 1
                    if len(verified_types[rtype]["examples"]) < 3:
                        verified_types[rtype]["examples"].append(
                            (ofile.name, combined_offset, actual_xor, expected_mask)
                        )
        
        # Cleanup ELF files
        for p in elf_paths.values():
            p.unlink(missing_ok=True)
    
    # Print verification results
    print(f"\nVerified {verified_count} relocations, {mismatch_count} mismatches")
    print()
    
    for rtype in sorted(verified_types.keys()):
        data = verified_types[rtype]
        info = EXPECTED_MASKS.get(rtype, (f"type_{rtype:#x}", 0, 0))
        name = info[0]
        expected_mask = info[2]
        
        status = "OK" if data["mismatch"] == 0 else "MISMATCH"
        print(f"  {name:<25} mask={expected_mask:#010x}  match={data['match']:>5}  mismatch={data['mismatch']:>5}  [{status}]")
        
        for fname, offset, actual, expected in data["examples"]:
            extra_bits = actual & ~expected
            print(f"    MISMATCH in {fname} at offset {offset:#06x}: actual_xor={actual:#010x} expected_mask={expected:#010x} extra={extra_bits:#010x}")
    
    print("\nDone.")


 if __name__ == "__main__":
    main()
diff --git a/relo.md b/relo.md
diff --git a/verify_masks.py b/verify_masks.py
 #!/usr/bin/env python3
 """
 Cross-reference v850 relocation masks with empirical binary diffs.

 For each .o file:
 1. Link at base 0x0 and 0x10000000
 2. Extract combined .text binary from each
 3. XOR to find changed bits
 4. Parse linker map to get section-to-combined-offset mapping
 5. Parse relocations from .o file
 6. At each relocation offset, compare empirical XOR with expected mask
 """

 import subprocess
 import struct
 import sys
 import os
 import tempfile
 import re
 from collections import defaultdict
 from pathlib import Path

 OBJDIR = Path("/home/null/dev/gcc/test-programs/v850-out")
 WORKDIR = Path("/tmp/v850_verify")
 LD = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-ld"
 OBJCOPY = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objcopy"
 OBJDUMP = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objdump"
 READELF = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-readelf"

 LDSCRIPT_TEMPLATE = """OUTPUT_FORMAT("elf32-v850-rh850", "elf32-v850-rh850", "elf32-v850-rh850")
 OUTPUT_ARCH(v850:rh850)
 ENTRY(_main)
 SECTIONS {{
    . = {base:#010x};
    .text : {{ *(.text .text.* .text.startup .text.startup.*) }}
    . = ALIGN(4);
    .rodata : {{ *(.rodata .rodata.*) }}
    . = ALIGN(4);
    .data : {{ *(.data .data.*) }}
    . = ALIGN(4);
    .bss : {{ *(.bss .bss.* COMMON) }}
    /DISCARD/ : {{ *(.debug_* .comment .note.* .eh_frame .gcc_except_table*) }}
 }}"""

 # Expected masks from binutils source, keyed by reloc type number
 # (name, size_bytes, mask_le_32bit)
 EXPECTED = {
    0x36: ("R_V810_WHI1",    2, 0x0000ffff),
    0x34: ("R_V810_WLO",     2, 0x0000ffff),
    0x46: ("R_V850_PCR22",   4, 0xfffe003f),
    0x4c: ("R_V810_WLO_1",   2, 0x0000fffe),
    0x47: ("R_V850_BLO",     4, 0xfffe0020),
    # In case standard v850 types appear
    1:    ("R_V850_9_PCREL",     2, 0x0000f870),
    2:    ("R_V850_22_PCREL",    4, 0xfffe003f),
    3:    ("R_V850_HI16_S",      2, 0x0000ffff),
    5:    ("R_V850_LO16",        2, 0x0000ffff),
    6:    ("R_V850_ABS32",       4, 0xffffffff),
    0x33: ("R_V810_WORD",        4, 0xffffffff),
 }


 def link_and_extract(ofile, base, workdir):
    """Link .o at given base, return (.text bytes, section_offset_map)."""
    stem = ofile.stem
    ldscript = workdir / f"{stem}_{base:#x}.ld"
    elf = workdir / f"{stem}_{base:#x}.elf"
    textbin = workdir / f"{stem}_{base:#x}.text.bin"
    mapfile = workdir / f"{stem}_{base:#x}.map"
    
    ldscript.write_text(LDSCRIPT_TEMPLATE.format(base=base))
    
    subprocess.run(
        [LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
         "-Map", str(mapfile), "-o", str(elf), str(ofile)],
        capture_output=True, text=True
    )
    
    text_bytes = None
    section_offsets = {}  # input_section_name -> VMA offset relative to base
    
    if elf.exists():
        subprocess.run(
            [OBJCOPY, "-O", "binary", "-j", ".text", str(elf), str(textbin)],
            capture_output=True, text=True
        )
        if textbin.exists() and textbin.stat().st_size > 0:
            text_bytes = textbin.read_bytes()
            textbin.unlink()
        elf.unlink()
    
    if mapfile.exists():
        # Parse map to find input section placements within combined .text
        # Lines like:
        #  .text          0x00000000      0x252 /path/to/file.o
        #  .text.startup  0x00000252      0x65a /path/to/file.o
        map_text = mapfile.read_text()
        for line in map_text.splitlines():
            line = line.strip()
            # Match: <section_name> <address> <size> <file>
            m = re.match(r'(\.\S+)\s+(0x[0-9a-f]+)\s+(0x[0-9a-f]+)\s+\S+', line)
            if m:
                sec_name = m.group(1)
                vma = int(m.group(2), 16)
                size = int(m.group(3), 16)
                if sec_name.startswith(".text") and size > 0:
                    # Offset within combined .text = VMA - base
                    section_offsets[sec_name] = vma - base
        
        mapfile.unlink()
    
    ldscript.unlink()
    return text_bytes, section_offsets


 def parse_relocations(ofile):
    """Parse relocations from .o file. Returns dict: section_name -> [(offset, type_num, type_name)]"""
    result = subprocess.run(
        [READELF, "-r", str(ofile)],
        capture_output=True, text=True
    )
    
    relocs = defaultdict(list)
    current_section = None
    
    for line in result.stdout.splitlines():
        line = line.strip()
        if line.startswith("Relocation section '"):
            current_section = line.split("'")[1]
            if current_section.startswith(".rela."):
                current_section = "." + current_section[6:]
            elif current_section.startswith(".rel."):
                current_section = "." + current_section[5:]
            continue
        
        if not current_section or not current_section.startswith(".text"):
            continue
        
        parts = line.split()
        if len(parts) < 3:
            continue
        
        try:
            offset = int(parts[0], 16)
            info = int(parts[1], 16)
        except ValueError:
            continue
        
        reloc_type_num = info & 0xFF
        reloc_type_name = parts[2]
        
        relocs[current_section].append((offset, reloc_type_num, reloc_type_name))
    
    return relocs


 def main():
    WORKDIR.mkdir(parents=True, exist_ok=True)
    
    ofiles = sorted(OBJDIR.glob("*.o"))
    print(f"Verifying masks for {len(ofiles)} object files...")
    print(f"Bases: 0x0 vs 0x10000000")
    print()
    
    # Stats
    type_stats = defaultdict(lambda: {"match": 0, "partial": 0, "mismatch": 0, "zero_xor": 0})
    mismatch_details = []
    
    for ofile in ofiles:
        # Link at two bases
        text0, offsets0 = link_and_extract(ofile, 0x00000000, WORKDIR)
        text1, offsets1 = link_and_extract(ofile, 0x10000000, WORKDIR)
        
        if text0 is None or text1 is None:
            continue
        if len(text0) != len(text1):
            continue
        
        # XOR mask
        xor_mask = bytes(a ^ b for a, b in zip(text0, text1))
        
        # Get relocations
        relocs = parse_relocations(ofile)
        
        for sec_name, reloc_list in relocs.items():
            sec_offset = offsets0.get(sec_name)
            if sec_offset is None:
                # Try to find it - sometimes the name differs slightly
                continue
            
            for offset, rtype, rtypename in reloc_list:
                if rtype not in EXPECTED:
                    continue
                
                name, nbytes, expected_mask = EXPECTED[rtype]
                combined_offset = sec_offset + offset
                
                if combined_offset + nbytes > len(xor_mask):
                    continue
                
                # Read actual XOR at this location
                actual_xor = 0
                for i in range(nbytes):
                    actual_xor |= xor_mask[combined_offset + i] << (i * 8)
                
                if actual_xor == 0:
                    # Might be a PC-relative reloc within the same section
                    # (both bases shift by same amount, so the PC-relative offset 
                    # stays the same), or the value happens to be zero.
                    type_stats[rtype]["zero_xor"] += 1
                elif (actual_xor & ~expected_mask) == 0:
                    # All changed bits are within the expected mask - MATCH
                    type_stats[rtype]["match"] += 1
                else:
                    # Some bits changed outside the expected mask - MISMATCH
                    type_stats[rtype]["mismatch"] += 1
                    if len(mismatch_details) < 20:
                        mismatch_details.append(
                            (ofile.name, sec_name, offset, combined_offset,
                             rtype, rtypename, actual_xor, expected_mask,
                             actual_xor & ~expected_mask)
                        )
    
    # Print results
    print("=" * 110)
    print(f"{'Type':<25} {'Expected Mask':>12} {'Match':>7} {'Zero XOR':>10} {'Mismatch':>10} {'Status'}")
    print("=" * 110)
    
    total_match = 0
    total_zero = 0
    total_mismatch = 0
    
    for rtype in sorted(type_stats.keys()):
        stats = type_stats[rtype]
        info = EXPECTED[rtype]
        name = info[0]
        mask = info[2]
        
        m = stats["match"]
        z = stats["zero_xor"]
        mm = stats["mismatch"]
        total_match += m
        total_zero += z
        total_mismatch += mm
        
        status = "OK" if mm == 0 else "*** MISMATCH ***"
        print(f"  {name:<23} {mask:#010x}  {m:>7} {z:>10} {mm:>10}  {status}")
    
    print("=" * 110)
    print(f"  {'TOTAL':<23} {'':>12} {total_match:>7} {total_zero:>10} {total_mismatch:>10}")
    print()
    
    if total_zero > 0:
        print(f"Note: {total_zero} relocations had zero XOR (no visible change between bases).")
        print("  This is expected for PC-relative relocations (e.g., R_V850_PCR22) when")
        print("  the target is in the same section - the PC-relative offset doesn't change.")
        print("  Also expected for undefined symbols (resolve to 0 at both bases).")
        print()
    
    if mismatch_details:
        print("MISMATCH DETAILS:")
        print("-" * 110)
        for (fname, sec, sec_off, comb_off, rtype, rtname, actual, expected, extra) in mismatch_details:
            print(f"  {fname} {sec}+{sec_off:#06x} (combined:{comb_off:#06x}) {rtname}")
            print(f"    actual_xor={actual:#010x}  expected_mask={expected:#010x}  extra_bits={extra:#010x}")
        print()
    
    if total_mismatch == 0:
        print("RESULT: All relocation masks VERIFIED. No bits changed outside expected masks.")
        print()
        print("The following masks are confirmed correct for Lumina v850 signature generation:")
        print()
        seen = set()
        for rtype in sorted(type_stats.keys()):
            info = EXPECTED[rtype]
            name, nbytes, mask = info
            if name in seen:
                continue
            seen.add(name)
            byte_masks = " ".join(f"{(mask >> (i*8)) & 0xFF:02x}" for i in range(nbytes))
            print(f"  {name:<25} {nbytes}B  mask=0x{mask:08x}  LE bytes=[{byte_masks}]")
    else:
        print(f"WARNING: {total_mismatch} mismatches found. The masks may need adjustment.")


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Analyze v850/RH850 relocations across all compiled object files.

	For each relocation in .text* sections:
	- Read the instruction bytes at the relocation offset
	- Determine the relocation type and its bit mask
	- Report per-relocation-type statistics and the exact mask to apply

	This directly tells us which instruction bits are position-dependent
	and must be masked out in Lumina signatures.
	"""

	import subprocess
	import struct
	import sys
	import os
	from collections import defaultdict
	from pathlib import Path

	OBJDIR = Path("/home/null/dev/gcc/test-programs/v850-out")
	READELF = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-readelf"
	OBJDUMP = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objdump"
	OBJCOPY = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objcopy"

	# ============================================================================
	# Relocation type definitions
	# Maps relocation type number -> (name, size_bytes, mask_in_little_endian_bytes)
	#
	# The "mask" here is: which bits of the instruction encode the relocated value.
	# These are the bits that CHANGE when the binary is linked at a different address.
	# For Lumina, these bits must be zeroed/masked out in the signature.
	#
	# Derived from binutils source: bfd/elf32-v850.c v850_elf_reloc() function
	# and the howto tables.
	# ============================================================================

	# Standard V850 relocations (types 0-51)
	# RH850 relocations (types 0x30+)
	# We handle both since our .o files use RH850 variant

	def compute_mask_le_bytes(mask32, nbytes):
	"""Convert a 32-bit mask to little-endian byte array of nbytes."""
	result = []
	for i in range(nbytes):
	result.append((mask32 >> (i * 8)) & 0xFF)
	return bytes(result)

	# For each relocation type, define:
	# (name, size_in_bytes, mask_as_32bit_le_int, is_pc_relative, description)
	# The mask represents which bits of the instruction word(s) contain the relocated value.
	# For 16-bit instructions, only the lower 16 bits of the mask matter.
	# For 32-bit instructions, the full mask applies.
	# For data relocations (ABS32, WORD), all bits are affected.

	RELOC_INFO = {
	# === Standard V850 relocations ===
	0: ("R_V850_NONE", 0, 0x00000000, False, "No relocation"),
	1: ("R_V850_9_PCREL", 2, 0x0000f870, True, "9-bit PC-relative branch (Format III bCC) - bits[11:8,6:4] of 16-bit insn"),
	2: ("R_V850_22_PCREL", 4, 0xfffe003f, True, "22-bit PC-relative (Format V jr/jarl) - bits[31:17] \| bits[5:0]"),
	3: ("R_V850_HI16_S", 2, 0x0000ffff, False, "High 16 bits (signed adjust) - all 16 bits of the halfword"),
	4: ("R_V850_HI16", 2, 0x0000ffff, False, "High 16 bits - all 16 bits of the halfword"),
	5: ("R_V850_LO16", 2, 0x0000ffff, False, "Low 16 bits - all 16 bits of the halfword"),
	6: ("R_V850_ABS32", 4, 0xffffffff, False, "Absolute 32-bit address"),
	7: ("R_V850_16", 2, 0x0000ffff, False, "16-bit value"),
	8: ("R_V850_8", 1, 0x000000ff, False, "8-bit value"),
	9: ("R_V850_SDA_16_16_OFFSET", 2, 0x0000ffff, False, "SDA 16-bit offset (ld.b/st.b/movea)"),
	10: ("R_V850_SDA_15_16_OFFSET", 2, 0x0000fffe, False, "SDA 15-bit offset (ld.w/st.w) - bits[15:1]"),
	11: ("R_V850_ZDA_16_16_OFFSET", 2, 0x0000ffff, False, "ZDA 16-bit offset"),
	12: ("R_V850_ZDA_15_16_OFFSET", 2, 0x0000fffe, False, "ZDA 15-bit offset"),
	13: ("R_V850_TDA_6_8_OFFSET", 2, 0x0000007e, False, "TDA 6-bit offset (sst.w/sld.w) - bits[6:1]"),
	14: ("R_V850_TDA_7_8_OFFSET", 2, 0x0000007f, False, "TDA 7-bit offset (sst.h/sld.h) - bits[6:0]"),
	15: ("R_V850_TDA_7_7_OFFSET", 2, 0x0000007f, False, "TDA 7-bit offset (sst.b/sld.b) - bits[6:0]"),
	16: ("R_V850_TDA_16_16_OFFSET", 2, 0x0000ffff, False, "TDA 16-bit offset"),
	17: ("R_V850_TDA_4_5_OFFSET", 2, 0x0000000f, False, "TDA 4-bit offset (sld.hu) - bits[3:0]"),
	18: ("R_V850_TDA_4_4_OFFSET", 2, 0x0000000f, False, "TDA 4-bit offset (sld.bu) - bits[3:0]"),
	19: ("R_V850_SDA_16_16_SPLIT_OFFSET", 4, 0xfffe0020, False, "SDA split 16-bit (ld.bu) - bits[31:17]\|bit[5]"),
	20: ("R_V850_ZDA_16_16_SPLIT_OFFSET", 4, 0xfffe0020, False, "ZDA split 16-bit (ld.bu) - bits[31:17]\|bit[5]"),
	21: ("R_V850_CALLT_6_7_OFFSET", 2, 0x0000003f, False, "CALLT 6-bit offset - bits[5:0]"),
	22: ("R_V850_CALLT_16_16_OFFSET",2, 0x0000ffff, False, "CALLT 16-bit offset"),
	23: ("R_V850_GNU_VTINHERIT", 0, 0x00000000, False, "C++ vtable inheritance (no bits)"),
	24: ("R_V850_GNU_VTENTRY", 0, 0x00000000, False, "C++ vtable entry (no bits)"),
	25: ("R_V850_LONGCALL", 0, 0x00000000, True, "Linker relaxation hint (no bits)"),
	26: ("R_V850_LONGJUMP", 0, 0x00000000, True, "Linker relaxation hint (no bits)"),
	27: ("R_V850_ALIGN", 0, 0x00000000, False, "Alignment hint (no bits)"),
	28: ("R_V850_REL32", 4, 0xffffffff, True, "32-bit PC-relative"),
	29: ("R_V850_LO16_SPLIT_OFFSET", 4, 0xfffe0020, False, "LO16 split (ld.bu) - bits[31:17]\|bit[5]"),
	30: ("R_V850_16_PCREL", 2, 0x0000fffe, True, "16-bit PC-relative (loop)"),
	31: ("R_V850_17_PCREL", 4, 0xfffe0010, True, "17-bit PC-relative branch - bits[31:17]\|bit[4]"),
	32: ("R_V850_23", 4, 0xffff07f0, False, "23-bit offset - bits[31:16]\|bits[10:4]"),
	33: ("R_V850_32_PCREL", 4, 0xfffffffe, True, "32-bit PC-relative - bits[31:1]"),
	34: ("R_V850_32_ABS", 4, 0xfffffffe, False, "32-bit absolute branch - bits[31:1]"),
	35: ("R_V850_16_SPLIT_OFFSET", 4, 0xfffe0020, False, "16-bit split offset (same as SDA split)"),
	36: ("R_V850_16_S1", 2, 0x0000fffe, False, "16-bit shifted by 1 - bits[15:1]"),
	37: ("R_V850_LO16_S1", 2, 0x0000fffe, False, "LO16 shifted by 1 - bits[15:1]"),
	38: ("R_V850_CALLT_15_16_OFFSET",2, 0x0000fffe, False, "CALLT 15-bit offset - bits[15:1]"),
	39: ("R_V850_32_GOTPCREL", 4, 0xffffffff, True, "32-bit GOT PC-relative"),
	40: ("R_V850_16_GOT", 4, 0x0000ffff, False, "16-bit GOT offset"),
	41: ("R_V850_32_GOT", 4, 0xffffffff, False, "32-bit GOT offset"),
	42: ("R_V850_22_PLT", 4, 0x07ffff80, True, "22-bit PLT relative"),
	43: ("R_V850_32_PLT", 4, 0xffffffff, True, "32-bit PLT relative"),

	# === RH850 / V800 relocations (type numbers 0x30+) ===
	# These are indexed as (type - 0x30) in v800_elf_howto_table
	0x30: ("R_V810_NONE", 0, 0x00000000, False, "No relocation"),
	0x31: ("R_V810_BYTE", 1, 0x000000ff, False, "8-bit absolute"),
	0x32: ("R_V810_HWORD", 2, 0x0000ffff, False, "16-bit absolute"),
	0x33: ("R_V810_WORD", 4, 0xffffffff, False, "32-bit absolute"),
	0x34: ("R_V810_WLO", 2, 0x0000ffff, False, "Low 16 bits (movea) - all 16 bits of halfword"),
	0x35: ("R_V810_WHI", 2, 0x0000ffff, False, "High 16 bits (movhi) - all 16 bits of halfword"),
	0x36: ("R_V810_WHI1", 2, 0x0000ffff, False, "High 16 bits signed (movhi) - all 16 bits of halfword"),
	0x37: ("R_V810_GPBYTE", 1, 0x000000ff, False, "GP-relative 8-bit"),
	0x38: ("R_V810_GPHWORD", 2, 0x0000ffff, False, "GP-relative 16-bit"),
	0x39: ("R_V810_GPWORD", 4, 0xffffffff, False, "GP-relative 32-bit"),
	0x3a: ("R_V810_GPWLO", 2, 0x0000ffff, False, "GP-relative low 16"),
	0x3b: ("R_V810_GPWHI", 2, 0x0000ffff, False, "GP-relative high 16"),
	0x3c: ("R_V810_GPWHI1", 2, 0x0000ffff, False, "GP-relative high 16 signed"),
	0x3d: ("R_V850_HWLO", 2, 0x0000fffe, False, "Half-word low (shifted) - bits[15:1]"),
	0x3f: ("R_V850_EP7BIT", 2, 0x0000007f, False, "EP 7-bit"), # actually 1-byte field in howto but 16-bit insn
	0x40: ("R_V850_EPHBYTE", 2, 0x0000007f, False, "EP halfbyte"),
	0x41: ("R_V850_EPWBYTE", 2, 0x0000007e, False, "EP wordbyte"),
	0x42: ("R_V850_REGHWLO", 2, 0x0000fffe, False, "Register region half-word low"),
	0x44: ("R_V850_GPHWLO", 2, 0x0000fffe, False, "GP half-word low"),
	0x46: ("R_V850_PCR22", 4, 0xfffe003f, True, "22-bit PC-relative (jr/jarl) - bits[31:17]\|bits[5:0]"),
	0x47: ("R_V850_BLO", 4, 0xfffe0020, False, "24-bit LO split"),
	0x48: ("R_V850_EP4BIT", 2, 0x0000000f, False, "EP 4-bit"),
	0x49: ("R_V850_EP5BIT", 2, 0x0000000f, False, "EP 5-bit (shifted)"), # bits[3:0], shifted by 1
	0x4a: ("R_V850_REGBLO", 4, 0xfffe0020, False, "Reg region 24-bit LO split"),
	0x4b: ("R_V850_GPBLO", 4, 0xfffe0020, False, "GP 24-bit LO split"),
	0x4c: ("R_V810_WLO_1", 2, 0x0000fffe, False, "Low 16 bits shifted (ld.w/st.w) - bits[15:1]"),
	0x4d: ("R_V810_GPWLO_1", 2, 0x0000fffe, False, "GP low 16 shifted"),
	0x4e: ("R_V850_BLO_1", 4, 0xfffe0020, False, "24-bit LO split shifted"),
	0x4f: ("R_V850_HWLO_1", 2, 0x0000fffe, False, "Half-word low shifted"),
	0x51: ("R_V850_GPBLO_1", 4, 0xfffe0020, False, "GP 24-bit LO split shifted"),
	0x52: ("R_V850_GPHWLO_1", 2, 0x0000fffe, False, "GP half-word low shifted"),
	0x54: ("R_V850_EPBLO", 4, 0xfffe0020, False, "EP 24-bit LO split"),
	0x55: ("R_V850_EPHWLO", 2, 0x0000fffe, False, "EP half-word low"),
	0x57: ("R_V850_EPWLO_N", 2, 0x0000fffe, False, "EP word low N"),
	0x58: ("R_V850_PC32", 4, 0xfffffffe, True, "32-bit PC-relative"),
	0x59: ("R_V850_W23BIT", 4, 0xffff07f0, False, "23-bit word offset"),
	0x5a: ("R_V850_GPW23BIT", 4, 0xffff07f0, False, "GP 23-bit word offset"),
	0x5b: ("R_V850_EPW23BIT", 4, 0xffff07f0, False, "EP 23-bit word offset"),
	0x5c: ("R_V850_B23BIT", 4, 0xffff07f0, False, "23-bit byte offset"),
	0x5d: ("R_V850_GPB23BIT", 4, 0xffff07f0, False, "GP 23-bit byte offset"),
	0x5e: ("R_V850_EPB23BIT", 4, 0xffff07f0, False, "EP 23-bit byte offset"),
	0x5f: ("R_V850_PC16U", 2, 0x0000fffe, True, "16-bit PC-relative unsigned"),
	0x60: ("R_V850_PC17", 4, 0xfffe0010, True, "17-bit PC-relative branch"),
	0x61: ("R_V850_DW8", 4, 0x00000000, False, "8-bit double-word (TODO)"), # complex
	0x62: ("R_V850_GPDW8", 4, 0x00000000, False, "GP 8-bit double-word (TODO)"),
	0x63: ("R_V850_EPDW8", 4, 0x00000000, False, "EP 8-bit double-word (TODO)"),
	0x64: ("R_V850_PC9", 2, 0x0000f870, True, "9-bit PC-relative (bCC) - bits[15:11,6:4]"),
	0x65: ("R_V810_REGBYTE", 1, 0x000000ff, False, "Reg region 8-bit"),
	0x66: ("R_V810_REGHWORD", 2, 0x0000ffff, False, "Reg region 16-bit"),
	0x67: ("R_V810_REGWORD", 4, 0xffffffff, False, "Reg region 32-bit"),
	0x68: ("R_V810_REGWLO", 2, 0x0000ffff, False, "Reg region low 16"),
	0x69: ("R_V810_REGWHI", 2, 0x0000ffff, False, "Reg region high 16"),
	0x6a: ("R_V810_REGWHI1", 2, 0x0000ffff, False, "Reg region high 16 signed"),
	0x6b: ("R_V850_REGW23BIT",4, 0xffff07f0, False, "Reg region 23-bit word"),
	0x6c: ("R_V850_REGB23BIT",4, 0xffff07f0, False, "Reg region 23-bit byte"),
	0x6d: ("R_V850_REGDW8", 4, 0x00000000, False, "Reg region 8-bit double-word"),
	0x6e: ("R_V810_EPBYTE", 1, 0x000000ff, False, "EP 8-bit"),
	0x6f: ("R_V810_EPHWORD", 2, 0x0000ffff, False, "EP 16-bit"),
	0x70: ("R_V810_EPWORD", 4, 0xffffffff, False, "EP 32-bit"),
	0x71: ("R_V850_WLO23", 4, 0xffff07f0, False, "23-bit word LO"), # same encoding as R_V850_23
	0x72: ("R_V850_WORD_E", 4, 0xffffffff, False, "32-bit word (extended)"),
	0x73: ("R_V850_REGWORD_E",4, 0xffffffff, False, "Reg region 32-bit (extended)"),
	0x74: ("R_V850_WORD", 4, 0xffffffff, False, "32-bit word"),
	0x75: ("R_V850_GPWORD", 4, 0xffffffff, False, "GP 32-bit word"),
	0x76: ("R_V850_REGWORD2", 4, 0xffffffff, False, "Reg region 32-bit (#2)"),
	0x77: ("R_V850_EPWORD2", 4, 0xffffffff, False, "EP 32-bit word (#2)"),
	0x78: ("R_V810_TPBYTE", 1, 0x000000ff, False, "TP 8-bit"),
	0x79: ("R_V810_TPHWORD", 2, 0x0000ffff, False, "TP 16-bit"),
	0x7a: ("R_V810_TPWORD", 4, 0xffffffff, False, "TP 32-bit"),
	0x7b: ("R_V810_TPWLO", 2, 0x0000ffff, False, "TP low 16"),
	0x7c: ("R_V810_TPWHI", 2, 0x0000ffff, False, "TP high 16"),
	0x7d: ("R_V810_TPWHI1", 2, 0x0000ffff, False, "TP high 16 signed"),
	0xa0: ("R_V810_ABS32", 4, 0xffffffff, False, "Absolute 32-bit"),
	}


	def parse_readelf_relocs(objfile):
	"""Parse relocations from readelf -r output.
	Returns list of (section_name, offset, reloc_type_num, reloc_type_name, sym_name, addend)
	"""
	result = subprocess.run(
	[READELF, "-r", str(objfile)],
	capture_output=True, text=True
	)

	relocs = []
	current_section = None

	for line in result.stdout.splitlines():
	line = line.strip()
	if line.startswith("Relocation section '"):
	# Extract section name: Relocation section '.rela.text' at offset ...
	current_section = line.split("'")[1] # e.g., '.rela.text'
	# Remove the .rela prefix to get the actual section
	if current_section.startswith(".rela."):
	current_section = "." + current_section[6:]
	elif current_section.startswith(".rel."):
	current_section = "." + current_section[5:]
	continue

	if not current_section:
	continue

	# Only care about .text sections
	if not current_section.startswith(".text"):
	continue

	# Parse relocation entries like:
	# 00000004 00000536 R_V810_WHI1 00000000 .rodata + 0
	parts = line.split()
	if len(parts) < 5:
	continue

	try:
	offset = int(parts[0], 16)
	except ValueError:
	continue

	info = int(parts[1], 16)
	reloc_type_num = info & 0xFF # Lower 8 bits for 32-bit ELF
	reloc_type_name = parts[2]
	sym_name = parts[4] if len(parts) > 4 else ""

	# Parse addend
	addend = 0
	if "+" in line:
	try:
	addend_str = line.split("+")[-1].strip()
	addend = int(addend_str, 16)
	except ValueError:
	pass

	relocs.append((current_section, offset, reloc_type_num, reloc_type_name, sym_name, addend))

	return relocs


	def get_section_bytes(objfile, section_name):
	"""Extract raw bytes of a section using objcopy."""
	import tempfile
	with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
	tmpname = tmp.name

	try:
	# objcopy to extract section
	result = subprocess.run(
	[OBJCOPY, "-O", "binary", "-j", section_name, str(objfile), tmpname],
	capture_output=True, text=True
	)
	if result.returncode != 0:
	return None

	with open(tmpname, "rb") as f:
	return f.read()
	finally:
	try:
	os.unlink(tmpname)
	except:
	pass


	def get_section_bytes_via_readelf(objfile, section_name):
	"""Alternative: get section offset and size from readelf -S, then read from the ELF."""
	result = subprocess.run(
	[READELF, "-S", str(objfile)],
	capture_output=True, text=True
	)

	for line in result.stdout.splitlines():
	if section_name in line:
	# Parse the section header line
	# Format varies, but we need offset and size
	parts = line.split()
	# Find the section - readelf -S output format:
	# [Nr] Name Type Addr Off Size ...
	try:
	# Find offset and size in hex
	idx = None
	for i, p in enumerate(parts):
	if p == section_name:
	idx = i
	break
	if idx is not None:
	# Next fields after name: Type, Addr, Off, Size
	off_hex = parts[idx + 3] # offset in file
	size_hex = parts[idx + 4] # size
	offset = int(off_hex, 16)
	size = int(size_hex, 16)

	with open(objfile, "rb") as f:
	f.seek(offset)
	return f.read(size)
	except (IndexError, ValueError):
	pass

	return None


	def disassemble_at(objfile, section_name, offset, nbytes=4):
	"""Get disassembly at a specific offset."""
	result = subprocess.run(
	[OBJDUMP, "-d", "-j", section_name, "--start-address=0x%x" % offset,
	"--stop-address=0x%x" % (offset + nbytes), str(objfile)],
	capture_output=True, text=True
	)
	return result.stdout


	def analyze_all_objects():
	"""Main analysis: iterate all .o files, extract relocations, compute masks."""

	ofiles = sorted(OBJDIR.glob("*.o"))
	if not ofiles:
	print("ERROR: No .o files found in", OBJDIR)
	sys.exit(1)

	print(f"Analyzing {len(ofiles)} object files...")
	print("=" * 80)

	# Statistics
	reloc_type_counts = defaultdict(int) # type_num -> count
	reloc_type_names = {} # type_num -> name
	reloc_type_examples = defaultdict(list) # type_num -> [(file, section, offset, insn_bytes)]
	total_relocs = 0
	total_text_relocs = 0

	for ofile in ofiles:
	relocs = parse_readelf_relocs(ofile)

	# Cache section bytes
	section_cache = {}

	for section, offset, rtype, rtypename, symname, addend in relocs:
	total_text_relocs += 1
	reloc_type_counts[rtype] += 1
	reloc_type_names[rtype] = rtypename

	# Get section bytes if not cached
	if section not in section_cache:
	section_cache[section] = get_section_bytes(ofile, section)

	sec_bytes = section_cache[section]
	if sec_bytes is None:
	continue

	# Get instruction bytes at the relocation offset
	info = RELOC_INFO.get(rtype)
	if info is None:
	nbytes = 4 # default
	else:
	nbytes = info[1]
	if nbytes == 0:
	continue # no-op relocation

	if offset + nbytes <= len(sec_bytes):
	insn_bytes = sec_bytes[offset:offset + nbytes]

	# Only keep a few examples per type
	if len(reloc_type_examples[rtype]) < 5:
	reloc_type_examples[rtype].append(
	(ofile.name, section, offset, insn_bytes, symname, addend)
	)

	# Print results
	print(f"\nTotal .text relocations across {len(ofiles)} files: {total_text_relocs}")
	print()

	# Sort by count descending
	sorted_types = sorted(reloc_type_counts.items(), key=lambda x: -x[1])

	print("=" * 100)
	print(f"{'Type#':>6} {'Name':<35} {'Count':>6} {'Size':>4} {'Mask (hex)':>12} {'PCrel':>5} {'Description'}")
	print("=" * 100)

	for rtype, count in sorted_types:
	info = RELOC_INFO.get(rtype)
	if info:
	name, nbytes, mask, pcrel, desc = info
	else:
	name = reloc_type_names.get(rtype, f"UNKNOWN_{rtype:#x}")
	nbytes = "?"
	mask = "?"
	pcrel = "?"
	desc = "Unknown relocation type"

	mask_str = f"0x{mask:08x}" if isinstance(mask, int) else str(mask)
	pcrel_str = "Y" if pcrel else "N"
	size_str = str(nbytes)

	print(f" {rtype:#04x} {name:<35} {count:>6} {size_str:>4}B {mask_str:>12} {pcrel_str:>5} {desc}")

	print("=" * 100)

	# Print examples for each type
	print("\n\nDETAILED EXAMPLES PER RELOCATION TYPE:")
	print("=" * 100)

	for rtype, count in sorted_types:
	info = RELOC_INFO.get(rtype)
	if info:
	name, nbytes, mask, pcrel, desc = info
	else:
	name = reloc_type_names.get(rtype, f"UNKNOWN_{rtype:#x}")
	nbytes = 0
	mask = 0
	pcrel = False
	desc = "Unknown"

	examples = reloc_type_examples[rtype]
	if not examples:
	continue

	print(f"\n--- {name} ({rtype:#04x}) - {count} occurrences ---")
	print(f" Mask: 0x{mask:08x} ({nbytes}B), PC-relative: {pcrel}")

	for fname, section, offset, insn_bytes, symname, addend in examples:
	hex_bytes = " ".join(f"{b:02x}" for b in insn_bytes)

	# Show which bytes would be masked
	if isinstance(mask, int) and nbytes > 0:
	masked = []
	for i in range(len(insn_bytes)):
	byte_mask = (mask >> (i * 8)) & 0xFF
	masked_byte = insn_bytes[i] & ~byte_mask
	if byte_mask == 0xFF:
	masked.append("**")
	elif byte_mask == 0x00:
	masked.append(f"{insn_bytes[i]:02x}")
	else:
	masked.append(f"{masked_byte:02x}+")
	masked_str = " ".join(masked)
	else:
	masked_str = ""

	addend_str = f"+{addend:#x}" if addend else ""
	print(f" {fname}:{section}+{offset:#06x} bytes=[{hex_bytes}] masked=[{masked_str}] sym={symname}{addend_str}")

	# =====================================================================
	# LUMINA MASK SUMMARY
	# =====================================================================
	print("\n\n")
	print("=" * 100)
	print("LUMINA SIGNATURE MASK SUMMARY")
	print("=" * 100)
	print()
	print("For Lumina v850 support, the following instruction bytes must be")
	print("masked (zeroed) at each relocation offset:")
	print()
	print(f"{'Reloc Type':<40} {'Size':>4} {'LE Byte Mask':<20} {'Notes'}")
	print("-" * 100)

	# Only print types that were actually encountered
	for rtype, count in sorted_types:
	info = RELOC_INFO.get(rtype)
	if not info:
	print(f" UNKNOWN type {rtype:#04x} ({reloc_type_names.get(rtype, '?')}) - {count} occurrences - NEEDS INVESTIGATION")
	continue

	name, nbytes, mask, pcrel, desc = info
	if nbytes == 0:
	continue # skip no-op relocs

	# Show byte-level mask
	byte_masks = []
	for i in range(nbytes):
	byte_masks.append(f"{(mask >> (i * 8)) & 0xFF:02x}")
	byte_mask_str = " ".join(byte_masks)

	pcrel_note = " [PC-rel]" if pcrel else ""
	print(f" {name:<38} {nbytes:>4}B [{byte_mask_str}]{'':>{18-len(byte_mask_str)}} {desc}{pcrel_note}")

	print("-" * 100)
	print()
	print("INTERPRETATION: At each relocation offset, mask out (zero) the bits")
	print("indicated by the byte mask above. For example, for R_V850_PCR22:")
	print(" Bytes at offset: [XX XX XX XX] (little-endian 32-bit)")
	print(" Mask: [3f 00 fe ff] = 0xfffe003f")
	print(" Keep: bits that are 0 in the mask (opcode, register fields)")
	print(" Clear: bits that are 1 in the mask (relocated operand)")


	return reloc_type_counts, sorted_types


	if __name__ == "__main__":
	analyze_all_objects()
	#!/usr/bin/env python3
	"""
	Empirical verification: Link v850 object files at different base addresses,
	extract .text sections, and binary-diff to find which bytes actually change.
	Cross-reference with relocation data to verify masks.
	"""

	import subprocess
	import struct
	import sys
	import os
	import tempfile
	from collections import defaultdict
	from pathlib import Path

	OBJDIR = Path("/home/null/dev/gcc/test-programs/v850-out")
	LINKDIR = Path("/home/null/dev/gcc/test-programs/v850-linked")
	LD = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-ld"
	OBJCOPY = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objcopy"
	OBJDUMP = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objdump"
	READELF = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-readelf"

	BASES = [0x00000000, 0x00100000, 0x10000000]

	# Linker script template
	LDSCRIPT_TEMPLATE = """
	OUTPUT_FORMAT("elf32-v850-rh850", "elf32-v850-rh850", "elf32-v850-rh850")
	OUTPUT_ARCH(v850:rh850)
	ENTRY(_main)
	SECTIONS {{
	. = {base:#010x};
	.text : {{ (.text .text. .text.startup .text.startup.*) }}
	. = ALIGN(4);
	.rodata : {{ (.rodata .rodata.) }}
	. = ALIGN(4);
	.data : {{ (.data .data.) }}
	. = ALIGN(4);
	.bss : {{ (.bss .bss. COMMON) }}
	/DISCARD/ : {{ (.debug_ .comment .note.* .eh_frame .gcc_except_table*) }}
	}}
	"""


	def link_at_base(ofile, base, outdir):
	"""Link an object file at a given base address. Returns path to .text binary."""
	stem = ofile.stem

	# Write linker script
	ldscript = outdir / f"{stem}_base{base:#x}.ld"
	with open(ldscript, "w") as f:
	f.write(LDSCRIPT_TEMPLATE.format(base=base))

	elf = outdir / f"{stem}_base{base:#x}.elf"
	textbin = outdir / f"{stem}_base{base:#x}.text.bin"

	# Link
	result = subprocess.run(
	[LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
	"-o", str(elf), str(ofile)],
	capture_output=True, text=True
	)

	if not elf.exists():
	return None

	# Extract .text
	result = subprocess.run(
	[OBJCOPY, "-O", "binary", "-j", ".text", str(elf), str(textbin)],
	capture_output=True, text=True
	)

	if not textbin.exists() or textbin.stat().st_size == 0:
	return None

	# Clean up
	ldscript.unlink(missing_ok=True)
	elf.unlink(missing_ok=True)

	return textbin


	def parse_readelf_relocs(objfile):
	"""Parse .text relocations. Returns list of (offset, reloc_type_num, reloc_type_name, sym)."""
	result = subprocess.run(
	[READELF, "-r", str(objfile)],
	capture_output=True, text=True
	)

	relocs = []
	current_section = None

	for line in result.stdout.splitlines():
	line = line.strip()
	if line.startswith("Relocation section '"):
	current_section = line.split("'")[1]
	if current_section.startswith(".rela."):
	current_section = "." + current_section[6:]
	elif current_section.startswith(".rel."):
	current_section = "." + current_section[5:]
	continue

	if not current_section or not current_section.startswith(".text"):
	continue

	parts = line.split()
	if len(parts) < 5:
	continue

	try:
	offset = int(parts[0], 16)
	except ValueError:
	continue

	info = int(parts[1], 16)
	reloc_type_num = info & 0xFF
	reloc_type_name = parts[2]
	sym_name = parts[4] if len(parts) > 4 else ""

	relocs.append((offset, reloc_type_num, reloc_type_name, sym_name))

	return relocs


	def get_section_offsets(objfile):
	"""Get the offsets of .text subsections to compute the combined .text layout.

	When linking, all .text.* sections get merged into .text.
	We need to know the order and offsets to map relocations correctly.

	Returns: dict mapping section_name -> offset_in_combined_text
	"""
	# The simplest approach: link at base=0, then read the map
	# But we can also check objdump of the linked elf
	# For now, since all sections merge linearly, we can just assume
	# the relocation offsets are relative to their containing section,
	# and the linker preserves order.
	#
	# Actually, for simplicity, let's just parse relocations from a linked
	# ELF at base 0, compare to base N, and diff the raw .text bytes.
	pass


	def diff_bytes(bytes0, bytes1):
	"""Compare two byte arrays. Returns list of (offset, byte0, byte1)."""
	diffs = []
	minlen = min(len(bytes0), len(bytes1))
	for i in range(minlen):
	if bytes0[i] != bytes1[i]:
	diffs.append((i, bytes0[i], bytes1[i]))
	return diffs


	def compute_diff_mask(bytes_list):
	"""Given a list of byte arrays (same length), compute per-byte mask of changing bits.
	Returns byte array where 1 bits indicate positions that changed."""
	if not bytes_list or len(bytes_list) < 2:
	return None

	ref = bytes_list[0]
	mask = bytearray(len(ref))

	for other in bytes_list[1:]:
	for i in range(len(ref)):
	mask[i] \|= (ref[i] ^ other[i])

	return bytes(mask)


	def main():
	LINKDIR.mkdir(parents=True, exist_ok=True)

	ofiles = sorted(OBJDIR.glob("*.o"))
	print(f"Processing {len(ofiles)} object files...")
	print(f"Base addresses: {[hex(b) for b in BASES]}")
	print()

	total_diff_bytes = 0
	total_text_bytes = 0

	# Per-relocation-type verification
	# For each reloc at an offset, check if the diff mask matches the expected mask
	verified_types = defaultdict(lambda: {"match": 0, "mismatch": 0, "examples": []})

	# Reloc type info (subset - the ones we actually see)
	EXPECTED_MASKS = {
	# type_num: (name, size_bytes, le_mask_int)
	0x36: ("R_V810_WHI1", 2, 0x0000ffff),
	0x34: ("R_V810_WLO", 2, 0x0000ffff),
	0x46: ("R_V850_PCR22", 4, 0xfffe003f),
	0x4c: ("R_V810_WLO_1", 2, 0x0000fffe),
	0x47: ("R_V850_BLO", 4, 0xfffe0020),
	# Standard V850 types (in case any appear)
	1: ("R_V850_9_PCREL", 2, 0x0000f870),
	2: ("R_V850_22_PCREL", 4, 0xfffe003f), # same bit pattern as PCR22 in code
	3: ("R_V850_HI16_S", 2, 0x0000ffff),
	5: ("R_V850_LO16", 2, 0x0000ffff),
	6: ("R_V850_ABS32", 4, 0xffffffff),
	}

	for ofile in ofiles:
	# Link at all base addresses
	text_bins = {}
	for base in BASES:
	binpath = link_at_base(ofile, base, LINKDIR)
	if binpath:
	with open(binpath, "rb") as f:
	text_bins[base] = f.read()
	binpath.unlink(missing_ok=True)

	if len(text_bins) < 2:
	print(f" SKIP {ofile.name}: could not link at enough bases")
	continue

	# All should be same size
	sizes = set(len(v) for v in text_bins.values())
	if len(sizes) != 1:
	print(f" WARN {ofile.name}: different .text sizes: {sizes}")
	continue

	text_size = sizes.pop()
	total_text_bytes += text_size

	# Compute diff mask across all bases
	bytes_list = [text_bins[b] for b in sorted(text_bins.keys())]
	diff_mask = compute_diff_mask(bytes_list)

	# Count changed bytes
	changed_bytes = sum(1 for b in diff_mask if b != 0)
	total_diff_bytes += changed_bytes

	if changed_bytes == 0:
	# No relocations applied (or all PC-relative within same section)
	continue

	# Get relocation info from the .o file
	relocs = parse_readelf_relocs(ofile)

	# For each relocation, check if the diff mask matches
	# Note: reloc offsets in the .o are relative to each section.
	# When linked, multiple .text sections get concatenated.
	# We need to figure out the combined offset.

	# Get the section layout from objdump of a linked ELF to map section offsets
	# Actually, let's just link one more time and check objdump
	# Simpler: use the diff mask directly and check byte-by-byte

	# Report diff positions
	diff_positions = [(i, diff_mask[i]) for i in range(len(diff_mask)) if diff_mask[i] != 0]

	if len(diff_positions) > 0:
	# Group consecutive diffs into ranges
	ranges = []
	start = diff_positions[0][0]
	end = start
	for pos, mask_byte in diff_positions[1:]:
	if pos == end + 1:
	end = pos
	else:
	ranges.append((start, end))
	start = pos
	end = pos
	ranges.append((start, end))

	# Show bytes from base0 at diff positions
	base0_bytes = bytes_list[0]
	base1_bytes = bytes_list[1]

	print(f"\n{ofile.name}: {text_size} bytes, {changed_bytes} bytes differ ({changed_bytes*100/text_size:.1f}%)")
	print(f" Changed ranges: {len(ranges)}")

	if len(ranges) <= 20:
	for start, end in ranges:
	nbytes = end - start + 1
	b0 = " ".join(f"{base0_bytes[i]:02x}" for i in range(start, end+1))
	b1 = " ".join(f"{base1_bytes[i]:02x}" for i in range(start, end+1))
	dm = " ".join(f"{diff_mask[i]:02x}" for i in range(start, end+1))
	print(f" offset {start:#06x}-{end:#06x} ({nbytes}B): base0=[{b0}] base1=[{b1}] mask=[{dm}]")
	else:
	print(f" (too many ranges to show individually)")

	print("\n" + "=" * 80)
	print(f"SUMMARY: {total_text_bytes} total .text bytes across all files")
	print(f" {total_diff_bytes} bytes changed ({total_diff_bytes*100/total_text_bytes:.1f}% of total)")
	print()

	# Now do the detailed cross-reference:
	# Link each file at base0 and base1, get relocations, and verify each relocation's mask
	print("\n" + "=" * 80)
	print("CROSS-REFERENCE: Verifying relocation masks empirically")
	print("=" * 80)

	# For this we need to know the exact offset mapping between the .o relocations
	# and the combined linked .text. Let's do this properly by examining the linked ELF.

	verified_count = 0
	mismatch_count = 0

	for ofile in ofiles:
	# Link at base 0 and base 0x10000000 (far enough to see changes in HI16)
	text_bins = {}
	elf_paths = {}

	for base in [0x00000000, 0x10000000]:
	stem = ofile.stem
	ldscript = LINKDIR / f"_verify_{stem}_{base:#x}.ld"
	elf = LINKDIR / f"_verify_{stem}_{base:#x}.elf"

	with open(ldscript, "w") as f:
	f.write(LDSCRIPT_TEMPLATE.format(base=base))

	result = subprocess.run(
	[LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
	"-o", str(elf), str(ofile)],
	capture_output=True, text=True
	)

	if elf.exists():
	elf_paths[base] = elf
	textbin = LINKDIR / f"_verify_{stem}_{base:#x}.text.bin"
	subprocess.run(
	[OBJCOPY, "-O", "binary", "-j", ".text", str(elf), str(textbin)],
	capture_output=True, text=True
	)
	if textbin.exists():
	with open(textbin, "rb") as f:
	text_bins[base] = f.read()
	textbin.unlink(missing_ok=True)

	ldscript.unlink(missing_ok=True)

	if len(text_bins) < 2:
	for p in elf_paths.values():
	p.unlink(missing_ok=True)
	continue

	base0_bytes = text_bins[0x00000000]
	base1_bytes = text_bins[0x10000000]

	if len(base0_bytes) != len(base1_bytes):
	for p in elf_paths.values():
	p.unlink(missing_ok=True)
	continue

	# Compute per-byte XOR mask
	xor_mask = bytearray(len(base0_bytes))
	for i in range(len(base0_bytes)):
	xor_mask[i] = base0_bytes[i] ^ base1_bytes[i]

	# Get section layout from the linked ELF to map reloc offsets
	# Parse objdump -h to get section positions
	if 0x00000000 in elf_paths:
	result = subprocess.run(
	[OBJDUMP, "-h", str(elf_paths[0x00000000])],
	capture_output=True, text=True
	)

	# Parse section VMA and file offsets
	# We need to understand how .text subsections are laid out
	# Since we told the linker to put all .text* into .text,
	# the final .text is the concatenation.

	# Get the .text VMA
	text_vma = None
	text_size_linked = None
	for line in result.stdout.splitlines():
	parts = line.split()
	if len(parts) >= 6 and parts[1] == ".text":
	text_size_linked = int(parts[2], 16)
	text_vma = int(parts[3], 16)
	break

	# Now get relocations from the ORIGINAL .o file
	# The tricky part: reloc offsets in the .o file are per-section
	# (.text, .text.startup, etc.) but in the linked ELF they're combined.
	# We need to know the offset of each input section in the combined .text.

	# Use -M to get the map, or use objdump to find function starts
	# Simplest: use readelf -S on the .o to get section sizes, assume they're
	# concatenated in the order they appear.

	result = subprocess.run(
	[READELF, "-S", str(ofile)],
	capture_output=True, text=True
	)

	# Parse section sizes
	text_sections_ordered = [] # (section_name, size)
	for line in result.stdout.splitlines():
	# Typical format:
	# [ 1] .text PROGBITS 00000000 000034 000254 00 AX 0 0 2
	# [ 2] .rela.text RELA ...
	parts = line.split()
	for i, p in enumerate(parts):
	if p.startswith(".text") and not p.startswith(".text.") and i+1 < len(parts):
	# Check it's a PROGBITS section
	if "PROGBITS" in line:
	sec_name = p
	# Find size field - it's the 3rd hex number after section name
	# Format: Name Type Addr Off Size ...
	idx = parts.index(p)
	try:
	# Could be tricky with column alignment, try a different approach
	pass
	except:
	pass

	# Actually, let's do it more robustly by looking at readelf -S --wide
	result = subprocess.run(
	[READELF, "-S", "--wide", str(ofile)],
	capture_output=True, text=True
	)

	text_sections = {} # name -> size
	for line in result.stdout.splitlines():
	line = line.strip()
	if "PROGBITS" not in line:
	continue
	# Parse: [Nr] Name Type Addr Off Size ...
	# Example: [ 1] .text PROGBITS 00000000 000034 000254 00 AX 0 0 2
	parts = line.split()
	sec_name = None
	for i, p in enumerate(parts):
	if p.startswith(".text"):
	sec_name = p
	# The fields after Name are: Type Addr Off Size
	# Find the PROGBITS index
	pb_idx = parts.index("PROGBITS")
	try:
	# After PROGBITS: Addr, Off, Size
	size = int(parts[pb_idx + 3], 16)
	text_sections[sec_name] = size
	except (IndexError, ValueError):
	pass
	break

	# Get relocation data per section
	result = subprocess.run(
	[READELF, "-r", str(ofile)],
	capture_output=True, text=True
	)

	relocs_by_section = defaultdict(list)
	current_section = None

	for line in result.stdout.splitlines():
	line = line.strip()
	if line.startswith("Relocation section '"):
	current_section = line.split("'")[1]
	if current_section.startswith(".rela."):
	current_section = "." + current_section[6:]
	elif current_section.startswith(".rel."):
	current_section = "." + current_section[5:]
	continue

	if not current_section or not current_section.startswith(".text"):
	continue

	parts = line.split()
	if len(parts) < 5:
	continue

	try:
	offset = int(parts[0], 16)
	except ValueError:
	continue

	info = int(parts[1], 16)
	reloc_type_num = info & 0xFF
	reloc_type_name = parts[2]

	relocs_by_section[current_section].append((offset, reloc_type_num, reloc_type_name))

	# Now compute the combined offset for each section
	# The linker script says: (.text .text. .text.startup .text.startup.*)
	# The order depends on the linker. Let's determine it from the linked ELF.

	# Use nm on the linked ELF to find function addresses
	if 0x00000000 in elf_paths:
	result = subprocess.run(
	[OBJDUMP, "-d", str(elf_paths[0x00000000])],
	capture_output=True, text=True
	)

	# Find the start of each function to determine section offsets
	# For a simpler approach: the sections are placed in the order they
	# appear in the wildcard pattern. Since we use (.text .text.),
	# .text comes first, then .text.startup, etc.

	# But actually, the order within (.text .text.) depends on input order.
	# Let's just verify by checking which bytes at reloc offsets differ.

	# The safest approach: for each relocation, check at offset ± some range
	# in the combined .text whether the diff matches.
	#
	# OR: use the linked ELF at base0 with objdump to see what's at each address,
	# and match function names to find section offsets.

	# Simpler approach: just compute the section offset mapping
	# by looking at the linked ELF's symbol table
	# We know function addresses from nm, and function starts in the .o from readelf

	# Actually, the SIMPLEST reliable approach:
	# Link with -Map to get a linker map file
	mapfile = LINKDIR / f"_verify_{ofile.stem}_map.txt"
	ldscript = LINKDIR / f"_verify_{ofile.stem}_map.ld"
	elf = LINKDIR / f"_verify_{ofile.stem}_map.elf"

	with open(ldscript, "w") as f:
	f.write(LDSCRIPT_TEMPLATE.format(base=0))

	result = subprocess.run(
	[LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
	"-Map", str(mapfile), "-o", str(elf), str(ofile)],
	capture_output=True, text=True
	)

	# Parse the map file to find section placements
	section_offsets = {} # input_section_name -> offset in combined .text

	if mapfile.exists():
	map_content = mapfile.read_text()
	# Look for lines like:
	# .text 0x0000000000000000 0x254 /path/to/file.o(.text)
	# .text.startup 0x0000000000000254 0x20 /path/to/file.o(.text.startup)
	in_text = False
	for line in map_content.splitlines():
	if line.startswith(".text") and "0x" in line:
	parts = line.split()
	if len(parts) >= 3 and "(" in line:
	# Extract the input section name from parentheses
	paren_start = line.index("(")
	paren_end = line.index(")")
	input_sec = line[paren_start+1:paren_end]

	# Extract VMA
	addr_str = parts[1]
	try:
	vma = int(addr_str, 16)
	section_offsets[input_sec] = vma # offset from base=0
	except ValueError:
	pass

	mapfile.unlink(missing_ok=True)

	ldscript.unlink(missing_ok=True)
	elf.unlink(missing_ok=True)

	# Now verify each relocation
	for sec_name, reloc_list in relocs_by_section.items():
	base_offset = section_offsets.get(sec_name, None)
	if base_offset is None:
	# Try without leading dot variations
	continue

	for offset, rtype, rtypename in reloc_list:
	combined_offset = base_offset + offset

	info = EXPECTED_MASKS.get(rtype)
	if info is None:
	continue

	name, nbytes, expected_mask = info

	if combined_offset + nbytes > len(base0_bytes):
	continue

	# Get the actual XOR at this location
	actual_xor = 0
	for i in range(nbytes):
	actual_xor \|= xor_mask[combined_offset + i] << (i * 8)

	# The actual XOR should be a SUBSET of the expected mask
	# (some bits might not change if the specific relocated value
	# happens to have those bits the same at both addresses)
	if (actual_xor & ~expected_mask) == 0:
	verified_types[rtype]["match"] += 1
	verified_count += 1
	else:
	verified_types[rtype]["mismatch"] += 1
	mismatch_count += 1
	if len(verified_types[rtype]["examples"]) < 3:
	verified_types[rtype]["examples"].append(
	(ofile.name, combined_offset, actual_xor, expected_mask)
	)

	# Cleanup ELF files
	for p in elf_paths.values():
	p.unlink(missing_ok=True)

	# Print verification results
	print(f"\nVerified {verified_count} relocations, {mismatch_count} mismatches")
	print()

	for rtype in sorted(verified_types.keys()):
	data = verified_types[rtype]
	info = EXPECTED_MASKS.get(rtype, (f"type_{rtype:#x}", 0, 0))
	name = info[0]
	expected_mask = info[2]

	status = "OK" if data["mismatch"] == 0 else "MISMATCH"
	print(f" {name:<25} mask={expected_mask:#010x} match={data['match']:>5} mismatch={data['mismatch']:>5} [{status}]")

	for fname, offset, actual, expected in data["examples"]:
	extra_bits = actual & ~expected
	print(f" MISMATCH in {fname} at offset {offset:#06x}: actual_xor={actual:#010x} expected_mask={expected:#010x} extra={extra_bits:#010x}")

	print("\nDone.")


	if __name__ == "__main__":
	main()
#	Relocation Name	Value	Description
0	`R_V850_NONE`	0	No relocation
1	`R_V850_9_PCREL`	1	9-bit PC-relative branch (conditional branch short)
2	`R_V850_22_PCREL`	2	22-bit PC-relative branch (jr/jarl)
3	`R_V850_HI16_S`	3	High 16 bits of address, adjusted (movhi)
4	`R_V850_HI16`	4	High 16 bits of address, unadjusted
5	`R_V850_LO16`	5	Low 16 bits of address (movea, addi)
6	`R_V850_ABS32`	6	32-bit absolute address (.long)
7	`R_V850_16`	7	16-bit value
8	`R_V850_8`	8	8-bit value
9	`R_V850_SDA_16_16_OFFSET`	9	SDA 16-bit offset (from gp) for ld/st
10	`R_V850_SDA_15_16_OFFSET`	10	SDA 15-bit offset (from gp), word-aligned
11	`R_V850_ZDA_16_16_OFFSET`	11	ZDA 16-bit offset (from r0) for ld/st
12	`R_V850_ZDA_15_16_OFFSET`	12	ZDA 15-bit offset (from r0), word-aligned
13	`R_V850_TDA_6_8_OFFSET`	13	TDA 6-bit unsigned offset for sld.w/sst.w (ep-relative, <<2)
14	`R_V850_TDA_7_8_OFFSET`	14	TDA 7-bit unsigned offset for sld.h/sst.h (ep-relative, <<1)
15	`R_V850_TDA_7_7_OFFSET`	15	TDA 7-bit unsigned offset for sld.b/sst.b (ep-relative)
16	`R_V850_TDA_16_16_OFFSET`	16	TDA 16-bit offset (from ep) for ld/st
17	`R_V850_TDA_4_5_OFFSET`	17	TDA 4-bit unsigned offset for sld.hu (ep-relative, <<1)
18	`R_V850_TDA_4_4_OFFSET`	18	TDA 4-bit unsigned offset for sld.bu (ep-relative)
19	`R_V850_SDA_16_16_SPLIT_OFFSET`	19	SDA 16-bit offset, split (for v850e ld.bu/st.b etc.)
20	`R_V850_ZDA_16_16_SPLIT_OFFSET`	20	ZDA 16-bit offset, split
21	`R_V850_CALLT_6_7_OFFSET`	21	CALLT table 6-bit offset (<<1)
22	`R_V850_CALLT_16_16_OFFSET`	22	CALLT 16-bit offset
23	`R_V850_GNU_VTINHERIT`	23	C++ vtable hierarchy
24	`R_V850_GNU_VTENTRY`	24	C++ vtable member
25	`R_V850_LONGCALL`	25	Relaxation hint: long call
26	`R_V850_LONGJUMP`	26	Relaxation hint: long jump
27	`R_V850_ALIGN`	27	Alignment marker for relaxation
28	`R_V850_LO16_SPLIT_OFFSET`	28	Low 16-bit split displacement (v850e2)
29	`R_V850_16_PCREL`	29	16-bit PC-relative (v850e2 conditional branches)
30	`R_V850_17_PCREL`	30	17-bit PC-relative (v850e2 conditional branches, <<1)
31	`R_V850_23`	31	23-bit field (v850e2)
32	`R_V850_32_PCREL`	32	32-bit PC-relative
33	`R_V850_32_ABS`	33	32-bit absolute (alias/alternate)
34	`R_V850_16_SPLIT_OFFSET`	34	16-bit split displacement
35	`R_V850_16_S1`	35	16-bit signed, shifted left 1
36	`R_V850_LO16_S1`	36	Low 16-bit, shifted left 1
37	`R_V850_CALLT_15_16_OFFSET`	37	CALLT 15-bit offset
38	`R_V850_32_GOTPCREL`	38	32-bit GOT-relative PC-relative
39	`R_V850_16_GOT`	39	16-bit GOT offset
40	`R_V850_32_GOT`	40	32-bit GOT offset
41	`R_V850_22_PLT`	41	22-bit PC-relative PLT
42	`R_V850_32_PLT`	42	32-bit PLT
43	`R_V850_COPY`	43	Dynamic: copy
44	`R_V850_GLOB_DAT`	44	Dynamic: global data
45	`R_V850_JMP_SLOT`	45	Dynamic: jump slot
46	`R_V850_RELATIVE`	46	Dynamic: relative
47	`R_V850_16_GOTOFF`	47	16-bit GOT-relative offset
48	`R_V850_32_GOTOFF`	48	32-bit GOT-relative offset
49	`R_V850_CODE`	49	Marks code section
50	`R_V850_DATA`	50	Marks data section
Relocation	rightshift	bitsize	bitpos	overflow	dst_mask	Notes
`R_V850_NONE`	0	0	0	dont	0x00000000	No relocation
`R_V850_9_PCREL`	0	9	0	signed	0x00070070	Bits [6:4],[2:0] of 16-bit insn; PC-relative
`R_V850_22_PCREL`	0	22	0	signed	0x07f07f3f	Bits split across 32-bit insn; PC-relative
`R_V850_HI16_S`	0	16	16	dont	0xffff0000	Upper half-word of 32-bit insn
`R_V850_HI16`	0	16	16	dont	0xffff0000	Upper half-word of 32-bit insn
`R_V850_LO16`	0	16	16	dont	0xffff0000	Upper half-word of 32-bit insn (immediate is in high 16 bits of word)
`R_V850_ABS32`	0	32	0	dont	0xffffffff	Full 32-bit word
`R_V850_16`	0	16	0	dont	0x0000ffff	16-bit value
`R_V850_8`	0	8	0	dont	0x000000ff	8-bit value
`R_V850_SDA_16_16_OFFSET`	0	16	16	dont	0xffff0000	16-bit disp in upper half of 32-bit insn
`R_V850_SDA_15_16_OFFSET`	1	15	17	dont	0xfffe0000	15-bit disp (bit0 implicit 0), bits [31:17]
`R_V850_ZDA_16_16_OFFSET`	0	16	16	dont	0xffff0000	Same format as SDA but from r0
`R_V850_ZDA_15_16_OFFSET`	1	15	17	dont	0xfffe0000	Same format as SDA_15
`R_V850_TDA_6_8_OFFSET`	2	6	1	dont	0x0000007e	6-bit field bits [6:1] of 16-bit insn (sld.w/sst.w; implicit <<2)
`R_V850_TDA_7_8_OFFSET`	1	7	0	dont	0x0000007f	7-bit field bits [6:0] of 16-bit insn (sld.h/sst.h; implicit <<1)
`R_V850_TDA_7_7_OFFSET`	0	7	0	dont	0x0000007f	7-bit field bits [6:0] of 16-bit insn (sld.b/sst.b)
`R_V850_TDA_16_16_OFFSET`	0	16	16	dont	0xffff0000	16-bit disp in upper half of 32-bit insn (from ep)
`R_V850_TDA_4_5_OFFSET`	1	4	0	dont	0x0000000f	4-bit field bits [3:0] (sld.hu; implicit <<1)
`R_V850_TDA_4_4_OFFSET`	0	4	0	dont	0x0000000f	4-bit field bits [3:0] (sld.bu)
`R_V850_SDA_16_16_SPLIT_OFFSET`	0	16	0	dont	0xfffe0020	Split: bits [15:1] in [31:17], bit[0] in [5]
`R_V850_ZDA_16_16_SPLIT_OFFSET`	0	16	0	dont	0xfffe0020	Same split format for ZDA
`R_V850_CALLT_6_7_OFFSET`	1	6	0	dont	0x0000003f	6-bit field (callt instruction, implicit <<1)
`R_V850_CALLT_16_16_OFFSET`	0	16	16	dont	0xffff0000	16-bit CALLT displacement
`R_V850_GNU_VTINHERIT`	0	0	0	dont	0x00000000	C++ ABI
`R_V850_GNU_VTENTRY`	0	0	0	dont	0x00000000	C++ ABI
`R_V850_LONGCALL`	0	32	0	dont	0xffffffff	Relaxation marker
`R_V850_LONGJUMP`	0	32	0	dont	0xffffffff	Relaxation marker
`R_V850_ALIGN`	0	32	0	dont	0xffffffff	Relaxation alignment
`R_V850_LO16_SPLIT_OFFSET`	0	16	0	dont	0xfffe0020	Split low-16 for v850e2 format
`R_V850_16_PCREL`	0	16	0	signed	0x0000fffe	16-bit PC-relative
`R_V850_17_PCREL`	0	17	0	signed	0x0000fffe	17-bit PC-rel (bit0 implicit)
`R_V850_23`	0	23	0	dont	0xffff007f	23-bit field split across instruction
`R_V850_32_PCREL`	0	32	0	signed	0xfffffffe	32-bit PC-relative
`R_V850_32_ABS`	0	32	0	dont	0xffffffff	32-bit absolute
`R_V850_16_SPLIT_OFFSET`	0	16	0	dont	0xfffe0020	16-bit split displacement
`R_V850_16_S1`	1	16	0	dont	0x0000fffe	16-bit shifted left 1
`R_V850_LO16_S1`	1	16	16	dont	0xfffe0000	Low 16 bits shifted left 1, in upper half-word
`R_V850_CALLT_15_16_OFFSET`	1	15	17	dont	0xfffe0000	15-bit CALLT offset
Assembly Pattern	Relocation(s) Used	Instruction Format
`movhi hi(%sym), r0, r2`	`R_V850_HI16_S`	Format VI, 32-bit
`movea lo(%sym), r1, r2`	`R_V850_LO16`	Format VI, 32-bit
`mov hilo(%sym), r1`	`R_V850_32_ABS` (or HI16_S+LO16 pair)	48-bit (v850E+)
`jr %target`	`R_V850_22_PCREL`	Format V, 32-bit
`jarl %target, r31`	`R_V850_22_PCREL`	Format V, 32-bit
`br %target`	`R_V850_9_PCREL`	Format III, 16-bit
`bCC %target` (short)	`R_V850_9_PCREL`	Format III, 16-bit
`bCC %target` (v850e3v5, long)	`R_V850_17_PCREL`	32-bit extended
`addi sdaoff(%sym), gp, r2`	`R_V850_SDA_16_16_OFFSET`	Format VI, 32-bit
`movea sdaoff(%sym), gp, r2`	`R_V850_SDA_16_16_OFFSET`	Format VI, 32-bit
`ld.w sdaoff(%sym)[gp], r2`	`R_V850_SDA_15_16_OFFSET`	Format VII, 32-bit
`ld.b zdaoff(%sym)[r0], r2`	`R_V850_ZDA_16_16_OFFSET`	Format VII, 32-bit
`ld.w zdaoff(%sym)[r0], r2`	`R_V850_ZDA_15_16_OFFSET`	Format VII, 32-bit
`sld.w tdaoff(%sym)[ep], r2`	`R_V850_TDA_6_8_OFFSET`	Format IV, 16-bit
`sld.h tdaoff(%sym)[ep], r2`	`R_V850_TDA_7_8_OFFSET`	Format IV, 16-bit
`sld.b tdaoff(%sym)[ep], r2`	`R_V850_TDA_7_7_OFFSET`	Format IV, 16-bit
`sld.hu tdaoff(%sym)[ep], r2`	`R_V850_TDA_4_5_OFFSET`	Format IV, 16-bit (v850E)
`sld.bu tdaoff(%sym)[ep], r2`	`R_V850_TDA_4_4_OFFSET`	Format IV, 16-bit (v850E)
`ld.bu zdaoff(%sym)[r0], r2`	`R_V850_ZDA_16_16_SPLIT_OFFSET`	v850E split fmt, 32-bit
`callt ctoff(%sym)`	`R_V850_CALLT_6_7_OFFSET`	16-bit
`.long %sym`	`R_V850_ABS32`	Data, 32-bit
	#!/usr/bin/env python3
	"""
	Cross-reference v850 relocation masks with empirical binary diffs.

	For each .o file:
	1. Link at base 0x0 and 0x10000000
	2. Extract combined .text binary from each
	3. XOR to find changed bits
	4. Parse linker map to get section-to-combined-offset mapping
	5. Parse relocations from .o file
	6. At each relocation offset, compare empirical XOR with expected mask
	"""

	import subprocess
	import struct
	import sys
	import os
	import tempfile
	import re
	from collections import defaultdict
	from pathlib import Path

	OBJDIR = Path("/home/null/dev/gcc/test-programs/v850-out")
	WORKDIR = Path("/tmp/v850_verify")
	LD = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-ld"
	OBJCOPY = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objcopy"
	OBJDUMP = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-objdump"
	READELF = "/models/dev/v850-toolchain/opt/v850-gcc/bin/v850-elf-readelf"

	LDSCRIPT_TEMPLATE = """OUTPUT_FORMAT("elf32-v850-rh850", "elf32-v850-rh850", "elf32-v850-rh850")
	OUTPUT_ARCH(v850:rh850)
	ENTRY(_main)
	SECTIONS {{
	. = {base:#010x};
	.text : {{ (.text .text. .text.startup .text.startup.*) }}
	. = ALIGN(4);
	.rodata : {{ (.rodata .rodata.) }}
	. = ALIGN(4);
	.data : {{ (.data .data.) }}
	. = ALIGN(4);
	.bss : {{ (.bss .bss. COMMON) }}
	/DISCARD/ : {{ (.debug_ .comment .note.* .eh_frame .gcc_except_table*) }}
	}}"""

	# Expected masks from binutils source, keyed by reloc type number
	# (name, size_bytes, mask_le_32bit)
	EXPECTED = {
	0x36: ("R_V810_WHI1", 2, 0x0000ffff),
	0x34: ("R_V810_WLO", 2, 0x0000ffff),
	0x46: ("R_V850_PCR22", 4, 0xfffe003f),
	0x4c: ("R_V810_WLO_1", 2, 0x0000fffe),
	0x47: ("R_V850_BLO", 4, 0xfffe0020),
	# In case standard v850 types appear
	1: ("R_V850_9_PCREL", 2, 0x0000f870),
	2: ("R_V850_22_PCREL", 4, 0xfffe003f),
	3: ("R_V850_HI16_S", 2, 0x0000ffff),
	5: ("R_V850_LO16", 2, 0x0000ffff),
	6: ("R_V850_ABS32", 4, 0xffffffff),
	0x33: ("R_V810_WORD", 4, 0xffffffff),
	}


	def link_and_extract(ofile, base, workdir):
	"""Link .o at given base, return (.text bytes, section_offset_map)."""
	stem = ofile.stem
	ldscript = workdir / f"{stem}_{base:#x}.ld"
	elf = workdir / f"{stem}_{base:#x}.elf"
	textbin = workdir / f"{stem}_{base:#x}.text.bin"
	mapfile = workdir / f"{stem}_{base:#x}.map"

	ldscript.write_text(LDSCRIPT_TEMPLATE.format(base=base))

	subprocess.run(
	[LD, "-T", str(ldscript), "--unresolved-symbols=ignore-all", "--noinhibit-exec",
	"-Map", str(mapfile), "-o", str(elf), str(ofile)],
	capture_output=True, text=True
	)

	text_bytes = None
	section_offsets = {} # input_section_name -> VMA offset relative to base

	if elf.exists():
	subprocess.run(
	[OBJCOPY, "-O", "binary", "-j", ".text", str(elf), str(textbin)],
	capture_output=True, text=True
	)
	if textbin.exists() and textbin.stat().st_size > 0:
	text_bytes = textbin.read_bytes()
	textbin.unlink()
	elf.unlink()

	if mapfile.exists():
	# Parse map to find input section placements within combined .text
	# Lines like:
	# .text 0x00000000 0x252 /path/to/file.o
	# .text.startup 0x00000252 0x65a /path/to/file.o
	map_text = mapfile.read_text()
	for line in map_text.splitlines():
	line = line.strip()
	# Match: <section_name> <address> <size> <file>
	m = re.match(r'(\.\S+)\s+(0x[0-9a-f]+)\s+(0x[0-9a-f]+)\s+\S+', line)
	if m:
	sec_name = m.group(1)
	vma = int(m.group(2), 16)
	size = int(m.group(3), 16)
	if sec_name.startswith(".text") and size > 0:
	# Offset within combined .text = VMA - base
	section_offsets[sec_name] = vma - base

	mapfile.unlink()

	ldscript.unlink()
	return text_bytes, section_offsets


	def parse_relocations(ofile):
	"""Parse relocations from .o file. Returns dict: section_name -> [(offset, type_num, type_name)]"""
	result = subprocess.run(
	[READELF, "-r", str(ofile)],
	capture_output=True, text=True
	)

	relocs = defaultdict(list)
	current_section = None

	for line in result.stdout.splitlines():
	line = line.strip()
	if line.startswith("Relocation section '"):
	current_section = line.split("'")[1]
	if current_section.startswith(".rela."):
	current_section = "." + current_section[6:]
	elif current_section.startswith(".rel."):
	current_section = "." + current_section[5:]
	continue

	if not current_section or not current_section.startswith(".text"):
	continue

	parts = line.split()
	if len(parts) < 3:
	continue

	try:
	offset = int(parts[0], 16)
	info = int(parts[1], 16)
	except ValueError:
	continue

	reloc_type_num = info & 0xFF
	reloc_type_name = parts[2]

	relocs[current_section].append((offset, reloc_type_num, reloc_type_name))

	return relocs


	def main():
	WORKDIR.mkdir(parents=True, exist_ok=True)

	ofiles = sorted(OBJDIR.glob("*.o"))
	print(f"Verifying masks for {len(ofiles)} object files...")
	print(f"Bases: 0x0 vs 0x10000000")
	print()

	# Stats
	type_stats = defaultdict(lambda: {"match": 0, "partial": 0, "mismatch": 0, "zero_xor": 0})
	mismatch_details = []

	for ofile in ofiles:
	# Link at two bases
	text0, offsets0 = link_and_extract(ofile, 0x00000000, WORKDIR)
	text1, offsets1 = link_and_extract(ofile, 0x10000000, WORKDIR)

	if text0 is None or text1 is None:
	continue
	if len(text0) != len(text1):
	continue

	# XOR mask
	xor_mask = bytes(a ^ b for a, b in zip(text0, text1))

	# Get relocations
	relocs = parse_relocations(ofile)

	for sec_name, reloc_list in relocs.items():
	sec_offset = offsets0.get(sec_name)
	if sec_offset is None:
	# Try to find it - sometimes the name differs slightly
	continue

	for offset, rtype, rtypename in reloc_list:
	if rtype not in EXPECTED:
	continue

	name, nbytes, expected_mask = EXPECTED[rtype]
	combined_offset = sec_offset + offset

	if combined_offset + nbytes > len(xor_mask):
	continue

	# Read actual XOR at this location
	actual_xor = 0
	for i in range(nbytes):
	actual_xor \|= xor_mask[combined_offset + i] << (i * 8)

	if actual_xor == 0:
	# Might be a PC-relative reloc within the same section
	# (both bases shift by same amount, so the PC-relative offset
	# stays the same), or the value happens to be zero.
	type_stats[rtype]["zero_xor"] += 1
	elif (actual_xor & ~expected_mask) == 0:
	# All changed bits are within the expected mask - MATCH
	type_stats[rtype]["match"] += 1
	else:
	# Some bits changed outside the expected mask - MISMATCH
	type_stats[rtype]["mismatch"] += 1
	if len(mismatch_details) < 20:
	mismatch_details.append(
	(ofile.name, sec_name, offset, combined_offset,
	rtype, rtypename, actual_xor, expected_mask,
	actual_xor & ~expected_mask)
	)

	# Print results
	print("=" * 110)
	print(f"{'Type':<25} {'Expected Mask':>12} {'Match':>7} {'Zero XOR':>10} {'Mismatch':>10} {'Status'}")
	print("=" * 110)

	total_match = 0
	total_zero = 0
	total_mismatch = 0

	for rtype in sorted(type_stats.keys()):
	stats = type_stats[rtype]
	info = EXPECTED[rtype]
	name = info[0]
	mask = info[2]

	m = stats["match"]
	z = stats["zero_xor"]
	mm = stats["mismatch"]
	total_match += m
	total_zero += z
	total_mismatch += mm

	status = "OK" if mm == 0 else "* MISMATCH *"
	print(f" {name:<23} {mask:#010x} {m:>7} {z:>10} {mm:>10} {status}")

	print("=" * 110)
	print(f" {'TOTAL':<23} {'':>12} {total_match:>7} {total_zero:>10} {total_mismatch:>10}")
	print()

	if total_zero > 0:
	print(f"Note: {total_zero} relocations had zero XOR (no visible change between bases).")
	print(" This is expected for PC-relative relocations (e.g., R_V850_PCR22) when")
	print(" the target is in the same section - the PC-relative offset doesn't change.")
	print(" Also expected for undefined symbols (resolve to 0 at both bases).")
	print()

	if mismatch_details:
	print("MISMATCH DETAILS:")
	print("-" * 110)
	for (fname, sec, sec_off, comb_off, rtype, rtname, actual, expected, extra) in mismatch_details:
	print(f" {fname} {sec}+{sec_off:#06x} (combined:{comb_off:#06x}) {rtname}")
	print(f" actual_xor={actual:#010x} expected_mask={expected:#010x} extra_bits={extra:#010x}")
	print()

	if total_mismatch == 0:
	print("RESULT: All relocation masks VERIFIED. No bits changed outside expected masks.")
	print()
	print("The following masks are confirmed correct for Lumina v850 signature generation:")
	print()
	seen = set()
	for rtype in sorted(type_stats.keys()):
	info = EXPECTED[rtype]
	name, nbytes, mask = info
	if name in seen:
	continue
	seen.add(name)
	byte_masks = " ".join(f"{(mask >> (i*8)) & 0xFF:02x}" for i in range(nbytes))
	print(f" {name:<25} {nbytes}B mask=0x{mask:08x} LE bytes=[{byte_masks}]")
	else:
	print(f"WARNING: {total_mismatch} mismatches found. The masks may need adjustment.")


	if __name__ == "__main__":
	main()